Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 | 7x 474x 109x 109x 109x 109x 7x 12x 20x 20x 10x 10x 10x 1865x 1865x 32x 29x 155x 410x 132x 132x 30x 30x 6x 36x 7x 95x 967x 967x 66x | // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
/**
* @module Generators/Pipeline/AnalysisHelpers
* @description Shared utility functions for the analysis pipeline.
*
* Provides sanitization, data coercion, markdown formatting, and file I/O
* helpers consumed by the category-specific analysis builder modules
* (`analysis-classification.ts`, `analysis-threats.ts`, `analysis-risk.ts`,
* `analysis-existing.ts`) and the orchestrator (`analysis-stage.ts`).
*/
import type { ConfidenceLevel } from '../../types/index.js';
import type { ClassificationInput } from '../../types/political-classification.js';
import type { ThreatAssessmentInput } from '../../types/political-threats.js';
import { atomicWrite } from '../../utils/file-utils.js';
import type { AnalysisMethod } from './analysis-stage.js';
// ─── Markdown constants ───────────────────────────────────────────────────────
/** Empty table row placeholder for 6-column tables */
export const EMPTY_TABLE_ROW_6 = '| — | — | — | — | — | — |';
// ─── Sanitization helpers ─────────────────────────────────────────────────────
/**
* Sanitize untrusted text for safe use in a Markdown table cell.
*
* Escapes pipe characters, backslashes, and HTML entities, then normalizes
* whitespace to prevent table layout corruption from external MCP data.
*
* @param input - Untrusted cell text
* @returns Sanitized text safe for Markdown table cells
*/
export function sanitizeCell(input: string): string {
return input
.replace(/\\/g, '\\\\')
.replace(/\|/g, '\\|')
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/[\r\n]+/g, ' ')
.trim();
}
/**
* Sanitize a document identifier for safe use as a filesystem filename.
*
* Replaces characters unsafe for filenames with hyphens, collapses runs of
* hyphens, trims, and lowercases. When the result exceeds 80 characters,
* a deterministic hash suffix is appended to avoid collisions between IDs
* that share the same first 80 characters. Falls back to a deterministic
* hash of the input when the sanitized result is empty.
*
* @param id - Raw document identifier (e.g. "TA-10-2026-0094", procedure reference)
* @returns Filesystem-safe identifier string (max 80 chars)
*/
export function sanitizeDocumentId(id: string): string {
const full = id
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-/, '')
.replace(/-$/, '');
Iif (!full) {
let hash = 0;
for (let i = 0; i < id.length; i++) {
hash = ((hash << 5) - hash + id.charCodeAt(i)) | 0;
}
return `anon-${Math.abs(hash).toString(36).slice(0, 12)}`;
}
Iif (full.length > 80) {
let hash = 0;
for (let i = 0; i < id.length; i++) {
hash = ((hash << 5) - hash + id.charCodeAt(i)) | 0;
}
return `${full.slice(0, 72)}-${Math.abs(hash).toString(36).slice(0, 7)}`;
}
return full;
}
// ─── Data coercion helpers ────────────────────────────────────────────────────
/** All feed array keys that contain individually-analysable documents */
export const DOCUMENT_FEED_KEYS = [
'adoptedTexts',
'procedures',
'documents',
'plenaryDocuments',
'committeeDocuments',
'plenarySessionDocuments',
'externalDocuments',
'events',
] as const;
/**
* Extract a human-readable identifier from a raw feed item.
*
* Tries common EP data shapes (`docId`, `procedureId`, `id`, `eventId`,
* `title`) and falls back to a deterministic hash of the item's JSON
* representation for truly anonymous items, ensuring reproducibility.
*
* @param item - Raw feed item object
* @returns Best-effort identifier string
*/
export function extractDocumentId(item: Record<string, unknown>): string {
for (const key of ['docId', 'procedureId', 'id', 'eventId']) {
const val = item[key]; // eslint-disable-line security/detect-object-injection -- keys are string literals
if (typeof val === 'string' && val.length > 0) return val;
}
const title = item['title'];
if (typeof title === 'string' && title.length > 0) {
const repr = JSON.stringify(item);
let hash = 0;
for (let i = 0; i < repr.length; i++) {
hash = ((hash << 5) - hash + repr.charCodeAt(i)) | 0;
}
return `${title.slice(0, 50)}-${Math.abs(hash).toString(36).slice(0, 8)}`;
}
const repr = JSON.stringify(item);
let hash = 0;
for (let i = 0; i < repr.length; i++) {
hash = ((hash << 5) - hash + repr.charCodeAt(i)) | 0;
}
return `anonymous-${Math.abs(hash).toString(36)}`;
}
/**
* Extract a human-readable title from a raw feed item.
*
* @param item - Raw feed item object
* @returns Title string or fallback
*/
export function extractDocumentTitle(item: Record<string, unknown>): string {
const title = item['title'];
Eif (typeof title === 'string' && title.length > 0) return title;
const label = item['label'] ?? item['name'] ?? item['description'];
Iif (typeof label === 'string' && label.length > 0) return label;
return 'Untitled document';
}
/**
* Safely extract an array from fetchedData by key.
* @param data - Raw fetched data record
* @param key - Key to extract
* @returns Array or empty array if missing/invalid
*/
export function safeArr(data: Record<string, unknown>, key: string): readonly unknown[] {
const val = data[key]; // eslint-disable-line security/detect-object-injection -- key is a literal string from caller
return Array.isArray(val) ? val : [];
}
/**
* Cast fetchedData to ClassificationInput for the classification functions.
* @param data - Raw fetched data record
* @returns ClassificationInput-compatible object
*/
export function toClassificationInput(data: Record<string, unknown>): ClassificationInput {
return data as ClassificationInput;
}
/**
* Cast fetchedData to ThreatAssessmentInput for the threat assessment functions.
* @param data - Raw fetched data record
* @returns ThreatAssessmentInput-compatible object
*/
export function toThreatInput(data: Record<string, unknown>): ThreatAssessmentInput {
return {
votingRecords: safeArr(data, 'votingRecords'),
coalitionData: safeArr(data, 'coalitions'),
mepInfluence: safeArr(data, 'mepUpdates'),
procedures: safeArr(data, 'procedures'),
anomalies: safeArr(data, 'anomalies'),
questions: safeArr(data, 'questions'),
};
}
// ─── Markdown helpers ─────────────────────────────────────────────────────────
/**
* Build a YAML-frontmatter header block for analysis markdown files.
*
* @param method - Analysis method identifier
* @param date - ISO date of the analysis
* @param confidence - Confidence level for this result
* @returns Markdown frontmatter string
*/
export function buildMarkdownHeader(
method: AnalysisMethod | string,
date: string,
confidence: ConfidenceLevel
): string {
return `---
method: ${method}
date: ${date}
confidence: ${confidence}
generated: ${new Date().toISOString()}
source: pipeline
---
`;
}
/**
* Write a text file to disk.
*
* Used for both analysis markdown files and the analysis `manifest.json`.
*
* @param filePath - Absolute file path
* @param content - File content as a UTF-8 string
*/
export function writeTextFile(filePath: string, content: string): void {
atomicWrite(filePath, content);
}
// ─── Mermaid chart helpers ────────────────────────────────────────────────────
/**
* Map an impact level to a numeric value for Mermaid pie charts.
*
* @param level - Impact level string (e.g. 'none', 'low', 'moderate', 'high', 'critical')
* @returns Numeric value for chart rendering
*/
export function impactToNum(level: string): number {
const map: Record<string, number> = {
none: 5,
low: 20,
moderate: 45,
high: 70,
critical: 90,
};
return map[level.toLowerCase()] ?? 30;
}
/**
* Map an impact level string to a coloured indicator emoji.
*
* @param level - Impact level string
* @returns Emoji indicator
*/
export function impactIndicator(level: string): string {
const lower = level.toLowerCase();
return lower === 'high' || lower === 'critical' ? '🔴' : lower === 'moderate' ? '🟡' : '🟢';
}
/**
* Return the name of the highest-impact dimension from an impact matrix.
*
* @param matrix - Impact matrix with five dimension levels
* @param matrix.legislativeImpact - Legislative impact level
* @param matrix.coalitionImpact - Coalition impact level
* @param matrix.publicOpinionImpact - Public opinion impact level
* @param matrix.institutionalImpact - Institutional impact level
* @param matrix.economicImpact - Economic impact level
* @returns Name of the dimension with the highest impact score
*/
export function highestImpactDimension(matrix: {
legislativeImpact: string;
coalitionImpact: string;
publicOpinionImpact: string;
institutionalImpact: string;
economicImpact: string;
}): string {
return (
[
{ name: 'Legislative', level: matrix.legislativeImpact },
{ name: 'Coalition', level: matrix.coalitionImpact },
{ name: 'Public Opinion', level: matrix.publicOpinionImpact },
{ name: 'Institutional', level: matrix.institutionalImpact },
{ name: 'Economic', level: matrix.economicImpact },
].sort((a, b) => impactToNum(b.level) - impactToNum(a.level))[0]?.name ?? 'N/A'
);
}
// ─── Substantive data check ───────────────────────────────────────────────────
/** Keys in fetchedData that count as substantive EP data */
const SUBSTANTIVE_DATA_KEYS = [
'events',
'procedures',
'adoptedTexts',
'documents',
'votingRecords',
'coalitions',
'questions',
'mepUpdates',
'plenaryDocuments',
'committeeDocuments',
'plenarySessionDocuments',
'externalDocuments',
'declarations',
'corporateBodies',
] as const;
/**
* Check whether the fetched data contains any substantive EP data.
*
* Returns `true` when at least one data category has non-empty arrays.
* Used to gate analysis execution — analysis should not run on empty data.
*
* @param data - Raw fetched data record
* @returns true if any substantive data is present
*/
export function hasSubstantiveData(data: Record<string, unknown>): boolean {
for (const key of SUBSTANTIVE_DATA_KEYS) {
const arr = safeArr(data, key);
if (arr.length > 0) return true;
}
return false;
}
/** Type for analysis method markdown builder functions */
export type MarkdownBuilder = (fetchedData: Record<string, unknown>, date: string) => string;
|