generators synthesis-summary.ts

100% Statements 97/97
92.53% Branches 62/67
100% Functions 15/15
100% Lines 88/88
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8x
 
 
 
 
 
 
 
8x
 
 
 
 
 
 
 
8x
 
 
 
 
 
 
8x
 
 
8x
 
 
 
 
 
 
 
 
 
 
 
 
 
69x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94x
94x
 
92x
94x
94x
94x
 
94x
94x
94x
94x
 
94x
 
 
 
 
 
 
 
 
 
 
 
 
 
84x
84x
84x
 
 
 
 
 
 
 
 
 
11x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92x
 
 
92x
92x
 
 
3x
3x
4x
4x
2x
 
 
 
1x
 
 
 
 
 
 
 
 
 
 
 
 
 
12x
 
10x
10x
97x
 
 
10x
7x
1x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41x
41x
 
40x
40x
137x
137x
 
31x
28x
106x
 
93x
92x
 
 
 
40x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12x
 
12x
2x
2x
 
 
 
91x
10x
7x
 
 
 
 
 
10x
6x
 
 
 
 
 
10x
10x
7x
7x
5x
 
 
 
 
 
 
10x
4x
 
 
6x
5x
 
 
 
 
10x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8x
 
8x
8x
 
8x
88x
88x
88x
 
88x
 
 
 
 
 
 
 
 
 
8x
235x
 
 
8x
8x
8x
8x
 
8x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8x
 
 
23x
 
 
 
8x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17x
 
 
  // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
 
/**
 * @module Generators/SynthesisSummary
 * @description Aggregation engine that reads per-file analysis outputs and
 * produces a synthesis summary — a single intelligence briefing consumed by
 * article generators to determine narrative direction, headline selection,
 * and publication priority.
 *
 * The synthesiser:
 * 1. Scans the analysis date directory for markdown files
 * 2. Extracts YAML frontmatter (method, confidence) from each
 * 3. Counts SWOT mentions and risk-level keywords
 * 4. Ranks findings by confidence and produces editorial recommendations
 *
 * @see analysis/templates/synthesis-summary.md
 */
 
import fs from 'fs';
import path from 'path';
import { randomUUID } from 'crypto';
import type { ConfidenceLevel } from '../types/analysis.js';
import type {
  SynthesisFinding,
  AggregatedSWOT,
  RiskOverview,
  SynthesisSummary,
} from '../types/significance.js';
 
// ─── Constants ────────────────────────────────────────────────────────────────
 
/** Case-insensitive patterns for detecting SWOT mentions in analysis text */
const SWOT_PATTERNS: Readonly<Record<keyof AggregatedSWOT, RegExp>> = {
  strengths: /\bstrength/giu,
  weaknesses: /\bweakness/giu,
  opportunities: /\bopportunit/giu,
  threats: /\bthreat/giu,
};
 
/** Case-insensitive patterns for detecting risk-level mentions */
const RISK_PATTERNS: Readonly<Record<keyof RiskOverview, RegExp>> = {
  critical: /\bcritical\b/giu,
  high: /\bhigh[- ]risk\b/giu,
  medium: /\bmedium[- ]risk\b/giu,
  low: /\blow[- ]risk\b/giu,
};
 
/** Confidence value ordering (higher = better) */
const CONFIDENCE_RANK: Readonly<Record<ConfidenceLevel, number>> = {
  high: 3,
  medium: 2,
  low: 1,
};
 
/** Filename of the synthesis output itself — excluded from scanning to prevent self-contamination */
const SYNTHESIS_OUTPUT_FILENAME = 'synthesis-summary.md';
 
/** Subdirectory containing per-document analysis — excluded to prevent I/O bloat and skewed aggregation */
const DOCUMENTS_SUBDIR = 'documents';
 
// ─── Markdown sanitization ────────────────────────────────────────────────────
 
/**
 * Sanitize untrusted text for safe use in a Markdown table cell.
 *
 * Escapes pipe characters, backslashes, and HTML entities, then normalizes
 * whitespace to prevent table layout corruption.
 *
 * @param input - Untrusted cell text
 * @returns Sanitized text safe for Markdown table cells
 */
function sanitizeMdCell(input: string): string {
  return input
    .replace(/\\/gu, '\\\\')
    .replace(/\|/gu, '\\|')
    .replace(/&/gu, '&amp;')
    .replace(/</gu, '&lt;')
    .replace(/>/gu, '&gt;')
    .replace(/[\r\n]+/gu, ' ')
    .trim();
}
 
// ─── Frontmatter extraction ───────────────────────────────────────────────────
 
/** Parsed YAML frontmatter fields relevant to synthesis */
interface ParsedFrontmatter {
  readonly method: string;
  readonly confidence: ConfidenceLevel;
  readonly date: string;
}
 
/**
 * Parse YAML frontmatter from a markdown file's content.
 *
 * Extracts `method`, `confidence`, and `date` fields from the `---` delimited
 * YAML block at the start of the file.  Returns null when no valid frontmatter
 * is found.
 *
 * @param content - Raw markdown content
 * @returns Parsed frontmatter or null
 */
export function parseFrontmatter(content: string): ParsedFrontmatter | null {
  const match = /^---\r?\n([\s\S]*?)\r?\n---/u.exec(content);
  if (!match) return null;
 
  const yaml = match[1] ?? '';
  const methodMatch = /^method:\s+(\S.*)$/mu.exec(yaml);
  const confidenceMatch = /^confidence:\s+(\S.*)$/mu.exec(yaml);
  const dateMatch = /^date:\s+(\S.*)$/mu.exec(yaml);
 
  const method = methodMatch?.[1]?.trim() ?? 'unknown';
  const rawConf = confidenceMatch?.[1]?.trim().toLowerCase() ?? 'low';
  const confidence: ConfidenceLevel = rawConf === 'high' || rawConf === 'medium' ? rawConf : 'low';
  const date = dateMatch?.[1]?.trim() ?? '';
 
  return { method, confidence, date };
}
 
// ─── Text analysis ────────────────────────────────────────────────────────────
 
/**
 * Count regex pattern matches in a body of text.
 *
 * @param text - Source text to scan
 * @param pattern - RegExp with global flag
 * @returns Number of matches
 */
function countMatches(text: string, pattern: RegExp): number {
  // Reset lastIndex for global regexps to avoid stale state
  pattern.lastIndex = 0;
  const matches = text.match(pattern);
  return matches ? matches.length : 0;
}
 
/**
 * Aggregate SWOT mention counts from a body of text.
 *
 * @param text - Combined analysis text
 * @returns SWOT counts
 */
export function aggregateSWOT(text: string): AggregatedSWOT {
  return {
    strengths: countMatches(text, SWOT_PATTERNS.strengths),
    weaknesses: countMatches(text, SWOT_PATTERNS.weaknesses),
    opportunities: countMatches(text, SWOT_PATTERNS.opportunities),
    threats: countMatches(text, SWOT_PATTERNS.threats),
  };
}
 
/**
 * Aggregate risk-level mention counts from a body of text.
 *
 * @param text - Combined analysis text
 * @returns Risk level counts
 */
export function aggregateRisks(text: string): RiskOverview {
  return {
    critical: countMatches(text, RISK_PATTERNS.critical),
    high: countMatches(text, RISK_PATTERNS.high),
    medium: countMatches(text, RISK_PATTERNS.medium),
    low: countMatches(text, RISK_PATTERNS.low),
  };
}
 
/**
 * Extract the first non-empty non-frontmatter heading or paragraph as a
 * one-line summary from a markdown file.
 *
 * @param content - Raw markdown content
 * @returns One-line summary string
 */
export function extractSummaryLine(content: string): string {
  // Strip frontmatter
  const body = content.replace(/^---[\s\S]*?---\s*/u, '');
 
  // Try first heading (# followed by at least one space and then non-space content)
  const headingMatch = /^#+\s(\S.*)$/mu.exec(body);
  if (headingMatch?.[1]) return headingMatch[1].trim();
 
  // Fall back to first non-empty line
  const lines = body.split('\n');
  for (const line of lines) {
    const trimmed = line.trim();
    if (trimmed.length > 0 && !trimmed.startsWith('|') && !trimmed.startsWith('```')) {
      return trimmed.slice(0, 200);
    }
  }
 
  return 'No summary available';
}
 
// ─── Confidence aggregation ──────────────────────────────────────────────────
 
/**
 * Determine the overall confidence level from a set of findings.
 *
 * Uses majority vote: whichever confidence level appears most often wins.
 *
 * @param findings - Findings with individual confidence levels
 * @returns Aggregated confidence level
 */
export function aggregateConfidence(findings: readonly SynthesisFinding[]): ConfidenceLevel {
  if (findings.length === 0) return 'low';
 
  const counts: Record<ConfidenceLevel, number> = { high: 0, medium: 0, low: 0 };
  for (const f of findings) {
    counts[f.confidence]++;
  }
 
  if (counts.high >= counts.medium && counts.high >= counts.low) return 'high';
  if (counts.medium >= counts.low) return 'medium';
  return 'low';
}
 
// ─── Directory scanning ──────────────────────────────────────────────────────
 
/**
 * Recursively find all `.md` analysis files under a directory.
 *
 * Excludes:
 * - The synthesis output file itself (prevents self-contamination on re-runs)
 * - The `documents/` subdirectory (per-document analysis can bloat I/O and skew aggregation)
 *
 * @param dir - Absolute directory path
 * @returns Array of absolute file paths
 */
export function findMarkdownFiles(dir: string): readonly string[] {
  const results: string[] = [];
  if (!fs.existsSync(dir)) return results;
 
  const entries = fs.readdirSync(dir, { withFileTypes: true });
  for (const entry of entries) {
    const fullPath = path.join(dir, entry.name);
    if (entry.isDirectory()) {
      // Skip the documents/ subdirectory to avoid per-document analysis bloat
      if (entry.name === DOCUMENTS_SUBDIR) continue;
      results.push(...findMarkdownFiles(fullPath));
    } else if (entry.isFile() && entry.name.endsWith('.md')) {
      // Skip the synthesis output itself to prevent self-contamination
      if (entry.name === SYNTHESIS_OUTPUT_FILENAME) continue;
      results.push(fullPath);
    }
  }
 
  return results;
}
 
// ─── Editorial recommendations ───────────────────────────────────────────────
 
/**
 * Generate editorial recommendations based on aggregated analysis data.
 *
 * @param findings - Ranked findings
 * @param swot - Aggregated SWOT counts
 * @param risks - Risk level distribution
 * @returns Array of recommendation strings
 */
export function generateEditorialRecommendations(
  findings: readonly SynthesisFinding[],
  swot: AggregatedSWOT,
  risks: RiskOverview
): readonly string[] {
  const recommendations: string[] = [];
 
  if (findings.length === 0) {
    recommendations.push('No analysis files found — verify pipeline execution.');
    return recommendations;
  }
 
  // High-confidence findings drive lead stories
  const highConfCount = findings.filter((f) => f.confidence === 'high').length;
  if (highConfCount > 0) {
    recommendations.push(
      `${highConfCount} high-confidence finding(s) available for lead story selection.`
    );
  }
 
  // Risk-driven recommendations
  if (risks.critical > 0) {
    recommendations.push(
      `${risks.critical} critical-risk mention(s) detected — consider priority coverage.`
    );
  }
 
  // SWOT balance indicator
  const totalSwot = swot.strengths + swot.weaknesses + swot.opportunities + swot.threats;
  if (totalSwot > 0) {
    const threatRatio = swot.threats / totalSwot;
    if (threatRatio > 0.4) {
      recommendations.push(
        'Threat-heavy SWOT balance — narrative may benefit from opportunity framing.'
      );
    }
  }
 
  // Volume recommendation
  if (findings.length >= 10) {
    recommendations.push(
      `${findings.length} analysis files processed — consider multi-article output.`
    );
  } else if (findings.length <= 2) {
    recommendations.push(
      'Limited analysis coverage — consider consolidating into a single digest article.'
    );
  }
 
  return recommendations;
}
 
// ─── Main synthesis ──────────────────────────────────────────────────────────
 
/**
 * Build a synthesis summary from all analysis files in a date directory.
 *
 * Scans the directory recursively for `.md` analysis files, parses their
 * frontmatter, extracts findings, aggregates SWOT and risk mentions, and
 * produces a {@link SynthesisSummary} object.
 *
 * @param dateOutputDir - Absolute path to the date-scoped analysis directory
 * @param date - ISO date string (YYYY-MM-DD)
 * @returns Synthesis summary object
 */
export function buildSynthesisSummary(dateOutputDir: string, date: string): SynthesisSummary {
  const files = findMarkdownFiles(dateOutputDir);
 
  const findings: SynthesisFinding[] = [];
  let combinedText = '';
 
  for (const filePath of files) {
    const content = fs.readFileSync(filePath, 'utf-8');
    const frontmatter = parseFrontmatter(content);
    combinedText += content + '\n';
 
    findings.push({
      method: frontmatter?.method ?? 'unknown',
      file: path.basename(filePath),
      confidence: frontmatter?.confidence ?? 'low',
      summary: extractSummaryLine(content),
    });
  }
 
  // Sort findings: high confidence first, then medium, then low.
  // The heading in the output says "Top Findings by Confidence" to match.
  findings.sort(
    (a, b) => (CONFIDENCE_RANK[b.confidence] ?? 0) - (CONFIDENCE_RANK[a.confidence] ?? 0)
  );
 
  const swot = aggregateSWOT(combinedText);
  const riskOverview = aggregateRisks(combinedText);
  const overallConfidence = aggregateConfidence(findings);
  const editorialRecommendations = generateEditorialRecommendations(findings, swot, riskOverview);
 
  return {
    synthesisId: `SYN-${date}-${randomUUID().slice(0, 8).toUpperCase()}`,
    date,
    documentsAnalyzed: files.length,
    overallConfidence,
    topFindings: findings.slice(0, 5),
    swot,
    riskOverview,
    editorialRecommendations,
  };
}
 
/**
 * Generate a markdown report from a synthesis summary.
 *
 * Follows the template format defined in `analysis/templates/synthesis-summary.md`.
 *
 * @param summary - Computed synthesis summary
 * @returns Markdown string
 */
export function formatSynthesisMarkdown(summary: SynthesisSummary): string {
  const findingsRows = summary.topFindings
    .map(
      (f, i) =>
        `| ${i + 1} | ${sanitizeMdCell(f.file)} | ${sanitizeMdCell(f.method)} | ${f.confidence} | ${sanitizeMdCell(f.summary.slice(0, 80))} |`
    )
    .join('\n');
 
  return `---
method: synthesis-summary
date: ${summary.date}
confidence: ${summary.overallConfidence}
generated: ${new Date().toISOString()}
---
 
# 🧩 Synthesis Summary — ${summary.date}
 
## 📋 Synthesis Context
 
| Field | Value |
|-------|-------|
| **Synthesis ID** | \`${summary.synthesisId}\` |
| **Analysis Date** | \`${summary.date}\` |
| **Documents Analyzed** | ${summary.documentsAnalyzed} |
| **Overall Confidence** | ${summary.overallConfidence.toUpperCase()} |
 
---
 
## 🏆 Top Findings by Confidence
 
| Rank | File | Method | Confidence | Summary |
|:----:|------|--------|:----------:|---------|
${findingsRows || '| — | — | — | — | — |'}
 
---
 
## 💪 Aggregated SWOT Summary
 
| Dimension | Count |
|-----------|:-----:|
| ✅ Strengths | ${summary.swot.strengths} |
| ⚠️ Weaknesses | ${summary.swot.weaknesses} |
| 🚀 Opportunities | ${summary.swot.opportunities} |
| 🔴 Threats | ${summary.swot.threats} |
 
---
 
## ⚖️ Risk Landscape Summary
 
| Level | Mentions |
|-------|:--------:|
| 🔴 Critical | ${summary.riskOverview.critical} |
| 🟠 High | ${summary.riskOverview.high} |
| 🟡 Medium | ${summary.riskOverview.medium} |
| 🟢 Low | ${summary.riskOverview.low} |
 
---
 
## 🎯 Editorial Recommendations
 
${summary.editorialRecommendations.map((r) => `- ${r}`).join('\n')}
`;
}