pipeline analysis-helpers.ts

55% Statements 33/60
56.75% Branches 21/37
100% Functions 14/14
57.69% Lines 30/52
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7x
 
 
 
 
 
 
 
 
 
 
 
 
 
474x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109x
 
 
 
 
109x
 
 
 
 
 
 
109x
 
 
 
 
 
 
109x
 
 
 
 
 
7x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12x
20x
20x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10x
10x
 
10x
 
 
 
 
 
 
 
 
 
 
1865x
1865x
 
 
 
 
 
 
 
 
32x
 
 
 
 
 
 
 
 
29x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410x
 
 
 
 
 
 
 
 
 
 
 
132x
 
 
 
 
 
 
132x
 
 
 
 
 
 
 
 
 
30x
30x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6x
 
 
 
 
 
 
36x
 
 
 
 
 
 
7x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95x
967x
967x
 
66x
 
 
 
 
  // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
 
/**
 * @module Generators/Pipeline/AnalysisHelpers
 * @description Shared utility functions for the analysis pipeline.
 *
 * Provides sanitization, data coercion, markdown formatting, and file I/O
 * helpers consumed by the category-specific analysis builder modules
 * (`analysis-classification.ts`, `analysis-threats.ts`, `analysis-risk.ts`,
 * `analysis-existing.ts`) and the orchestrator (`analysis-stage.ts`).
 */
 
import type { ConfidenceLevel } from '../../types/index.js';
import type { ClassificationInput } from '../../types/political-classification.js';
import type { ThreatAssessmentInput } from '../../types/political-threats.js';
import { atomicWrite } from '../../utils/file-utils.js';
 
import type { AnalysisMethod } from './analysis-stage.js';
 
// ─── Markdown constants ───────────────────────────────────────────────────────
 
/** Empty table row placeholder for 6-column tables */
export const EMPTY_TABLE_ROW_6 = '| — | — | — | — | — | — |';
 
// ─── Sanitization helpers ─────────────────────────────────────────────────────
 
/**
 * Sanitize untrusted text for safe use in a Markdown table cell.
 *
 * Escapes pipe characters, backslashes, and HTML entities, then normalizes
 * whitespace to prevent table layout corruption from external MCP data.
 *
 * @param input - Untrusted cell text
 * @returns Sanitized text safe for Markdown table cells
 */
export function sanitizeCell(input: string): string {
  return input
    .replace(/\\/g, '\\\\')
    .replace(/\|/g, '\\|')
    .replace(/&/g, '&amp;')
    .replace(/</g, '&lt;')
    .replace(/>/g, '&gt;')
    .replace(/[\r\n]+/g, ' ')
    .trim();
}
 
/**
 * Sanitize a document identifier for safe use as a filesystem filename.
 *
 * Replaces characters unsafe for filenames with hyphens, collapses runs of
 * hyphens, trims, and lowercases.  When the result exceeds 80 characters,
 * a deterministic hash suffix is appended to avoid collisions between IDs
 * that share the same first 80 characters.  Falls back to a deterministic
 * hash of the input when the sanitized result is empty.
 *
 * @param id - Raw document identifier (e.g. "TA-10-2026-0094", procedure reference)
 * @returns Filesystem-safe identifier string (max 80 chars)
 */
export function sanitizeDocumentId(id: string): string {
  const full = id
    .toLowerCase()
    .replace(/[^a-z0-9]+/g, '-')
    .replace(/^-/, '')
    .replace(/-$/, '');
  Iif (!full) {
    let hash = 0;
    for (let i = 0; i < id.length; i++) {
      hash = ((hash << 5) - hash + id.charCodeAt(i)) | 0;
    }
    return `anon-${Math.abs(hash).toString(36).slice(0, 12)}`;
  }
  Iif (full.length > 80) {
    let hash = 0;
    for (let i = 0; i < id.length; i++) {
      hash = ((hash << 5) - hash + id.charCodeAt(i)) | 0;
    }
    return `${full.slice(0, 72)}-${Math.abs(hash).toString(36).slice(0, 7)}`;
  }
  return full;
}
 
// ─── Data coercion helpers ────────────────────────────────────────────────────
 
/** All feed array keys that contain individually-analysable documents */
export const DOCUMENT_FEED_KEYS = [
  'adoptedTexts',
  'procedures',
  'documents',
  'plenaryDocuments',
  'committeeDocuments',
  'plenarySessionDocuments',
  'externalDocuments',
  'events',
] as const;
 
/**
 * Extract a human-readable identifier from a raw feed item.
 *
 * Tries common EP data shapes (`docId`, `procedureId`, `id`, `eventId`,
 * `title`) and falls back to a deterministic hash of the item's JSON
 * representation for truly anonymous items, ensuring reproducibility.
 *
 * @param item - Raw feed item object
 * @returns Best-effort identifier string
 */
export function extractDocumentId(item: Record<string, unknown>): string {
  for (const key of ['docId', 'procedureId', 'id', 'eventId']) {
    const val = item[key]; // eslint-disable-line security/detect-object-injection -- keys are string literals
    if (typeof val === 'string' && val.length > 0) return val;
  }
  const title = item['title'];
  if (typeof title === 'string' && title.length > 0) {
    const repr = JSON.stringify(item);
    let hash = 0;
    for (let i = 0; i < repr.length; i++) {
      hash = ((hash << 5) - hash + repr.charCodeAt(i)) | 0;
    }
    return `${title.slice(0, 50)}-${Math.abs(hash).toString(36).slice(0, 8)}`;
  }
  const repr = JSON.stringify(item);
  let hash = 0;
  for (let i = 0; i < repr.length; i++) {
    hash = ((hash << 5) - hash + repr.charCodeAt(i)) | 0;
  }
  return `anonymous-${Math.abs(hash).toString(36)}`;
}
 
/**
 * Extract a human-readable title from a raw feed item.
 *
 * @param item - Raw feed item object
 * @returns Title string or fallback
 */
export function extractDocumentTitle(item: Record<string, unknown>): string {
  const title = item['title'];
  Eif (typeof title === 'string' && title.length > 0) return title;
  const label = item['label'] ?? item['name'] ?? item['description'];
  Iif (typeof label === 'string' && label.length > 0) return label;
  return 'Untitled document';
}
 
/**
 * Safely extract an array from fetchedData by key.
 * @param data - Raw fetched data record
 * @param key - Key to extract
 * @returns Array or empty array if missing/invalid
 */
export function safeArr(data: Record<string, unknown>, key: string): readonly unknown[] {
  const val = data[key]; // eslint-disable-line security/detect-object-injection -- key is a literal string from caller
  return Array.isArray(val) ? val : [];
}
 
/**
 * Cast fetchedData to ClassificationInput for the classification functions.
 * @param data - Raw fetched data record
 * @returns ClassificationInput-compatible object
 */
export function toClassificationInput(data: Record<string, unknown>): ClassificationInput {
  return data as ClassificationInput;
}
 
/**
 * Cast fetchedData to ThreatAssessmentInput for the threat assessment functions.
 * @param data - Raw fetched data record
 * @returns ThreatAssessmentInput-compatible object
 */
export function toThreatInput(data: Record<string, unknown>): ThreatAssessmentInput {
  return {
    votingRecords: safeArr(data, 'votingRecords'),
    coalitionData: safeArr(data, 'coalitions'),
    mepInfluence: safeArr(data, 'mepUpdates'),
    procedures: safeArr(data, 'procedures'),
    anomalies: safeArr(data, 'anomalies'),
    questions: safeArr(data, 'questions'),
  };
}
 
// ─── Markdown helpers ─────────────────────────────────────────────────────────
 
/**
 * Build a YAML-frontmatter header block for analysis markdown files.
 *
 * @param method - Analysis method identifier
 * @param date - ISO date of the analysis
 * @param confidence - Confidence level for this result
 * @returns Markdown frontmatter string
 */
export function buildMarkdownHeader(
  method: AnalysisMethod | string,
  date: string,
  confidence: ConfidenceLevel
): string {
  return `---
method: ${method}
date: ${date}
confidence: ${confidence}
generated: ${new Date().toISOString()}
source: pipeline
---
 
`;
}
 
/**
 * Write a text file to disk.
 *
 * Used for both analysis markdown files and the analysis `manifest.json`.
 *
 * @param filePath - Absolute file path
 * @param content - File content as a UTF-8 string
 */
export function writeTextFile(filePath: string, content: string): void {
  atomicWrite(filePath, content);
}
 
// ─── Mermaid chart helpers ────────────────────────────────────────────────────
 
/**
 * Map an impact level to a numeric value for Mermaid pie charts.
 *
 * @param level - Impact level string (e.g. 'none', 'low', 'moderate', 'high', 'critical')
 * @returns Numeric value for chart rendering
 */
export function impactToNum(level: string): number {
  const map: Record<string, number> = {
    none: 5,
    low: 20,
    moderate: 45,
    high: 70,
    critical: 90,
  };
  return map[level.toLowerCase()] ?? 30;
}
 
/**
 * Map an impact level string to a coloured indicator emoji.
 *
 * @param level - Impact level string
 * @returns Emoji indicator
 */
export function impactIndicator(level: string): string {
  const lower = level.toLowerCase();
  return lower === 'high' || lower === 'critical' ? '🔴' : lower === 'moderate' ? '🟡' : '🟢';
}
 
/**
 * Return the name of the highest-impact dimension from an impact matrix.
 *
 * @param matrix - Impact matrix with five dimension levels
 * @param matrix.legislativeImpact - Legislative impact level
 * @param matrix.coalitionImpact - Coalition impact level
 * @param matrix.publicOpinionImpact - Public opinion impact level
 * @param matrix.institutionalImpact - Institutional impact level
 * @param matrix.economicImpact - Economic impact level
 * @returns Name of the dimension with the highest impact score
 */
export function highestImpactDimension(matrix: {
  legislativeImpact: string;
  coalitionImpact: string;
  publicOpinionImpact: string;
  institutionalImpact: string;
  economicImpact: string;
}): string {
  return (
    [
      { name: 'Legislative', level: matrix.legislativeImpact },
      { name: 'Coalition', level: matrix.coalitionImpact },
      { name: 'Public Opinion', level: matrix.publicOpinionImpact },
      { name: 'Institutional', level: matrix.institutionalImpact },
      { name: 'Economic', level: matrix.economicImpact },
    ].sort((a, b) => impactToNum(b.level) - impactToNum(a.level))[0]?.name ?? 'N/A'
  );
}
 
// ─── Substantive data check ───────────────────────────────────────────────────
 
/** Keys in fetchedData that count as substantive EP data */
const SUBSTANTIVE_DATA_KEYS = [
  'events',
  'procedures',
  'adoptedTexts',
  'documents',
  'votingRecords',
  'coalitions',
  'questions',
  'mepUpdates',
  'plenaryDocuments',
  'committeeDocuments',
  'plenarySessionDocuments',
  'externalDocuments',
  'declarations',
  'corporateBodies',
] as const;
 
/**
 * Check whether the fetched data contains any substantive EP data.
 *
 * Returns `true` when at least one data category has non-empty arrays.
 * Used to gate analysis execution — analysis should not run on empty data.
 *
 * @param data - Raw fetched data record
 * @returns true if any substantive data is present
 */
export function hasSubstantiveData(data: Record<string, unknown>): boolean {
  for (const key of SUBSTANTIVE_DATA_KEYS) {
    const arr = safeArr(data, key);
    if (arr.length > 0) return true;
  }
  return false;
}
 
/** Type for analysis method markdown builder functions */
export type MarkdownBuilder = (fetchedData: Record<string, unknown>, date: string) => string;