aggregator prior-run-diff.js

65.81% Statements 77/117
65.47% Branches 55/84
71.42% Functions 10/14
69.81% Lines 74/106
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1x
1x
 
 
 
1x
 
1x
 
 
 
 
 
 
 
1x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12x
12x
 
1x
 
 
 
 
 
 
 
 
 
 
 
 
19x
19x
 
 
 
17x
81x
 
 
 
11x
 
 
 
16x
9x
9x
16x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29x
29x
10x
 
19x
19x
 
19x
2x
 
17x
1x
 
16x
2x
 
14x
 
 
 
 
 
 
 
 
 
 
 
12x
12x
 
12x
 
12x
 
 
 
 
 
12x
1x
 
 
 
 
 
 
 
 
 
11x
12x
4x
 
 
 
 
 
 
 
 
 
 
7x
7x
 
12x
12x
 
 
 
12x
 
12x
12x
 
12x
22x
22x
22x
12x
 
 
 
 
 
 
10x
 
 
 
 
 
 
 
 
7x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7x
 
 
 
 
 
 
 
 
 
22x
13x
13x
 
 
9x
9x
5x
 
 
4x
1x
 
 
3x
1x
 
 
3x
5x
4x
 
 
 
 
7x
7x
7x
9x
 
 
7x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1x
1x
 
1x
 
 
  #!/usr/bin/env node
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
 
/**
 * Prior-run diff helper for the re-run merge rule.
 *
 * Reads `manifest.json.history[]` from a same-day analysis folder and
 * classifies every artifact as **at-floor** (carry-forward) or **below-floor**
 * (rewrite). The result — a `priorRunDiff` plan — is written to stdout as
 * JSON and can be consumed by Stage A of the analysis workflow.
 *
 * Controlled by the `ENABLE_PRIOR_RUN_MERGE` environment variable:
 *   - `ENABLE_PRIOR_RUN_MERGE=true`  → normal operation (produce plan)
 *   - unset / any other value        → short-circuit: emit plan with
 *                                       `enabled: false` and empty arrays
 *
 * Invocation:
 *   node scripts/aggregator/prior-run-diff.js <runDir>
 *   npm run prior-run-diff -- analysis/daily/2026-04-26/week-in-review
 *
 * Exit codes:
 *   0  — plan emitted successfully (or feature disabled)
 *   1  — runDir missing or invalid
 *   2  — bad CLI usage
 *
 * Output (stdout, JSON):
 * ```json
 * {
 *   "enabled": true,
 *   "runDir": "analysis/daily/2026-04-26/week-in-review",
 *   "articleType": "week-in-review",
 *   "priorRunId": "week-in-review-run-1714128000",
 *   "carryForward": [
 *     {
 *       "relativePath": "intelligence/synthesis-summary.md",
 *       "lines": 250,
 *       "floor": 180,
 *       "source": "carry-forward-from:week-in-review-run-1714128000"
 *     }
 *   ],
 *   "rewrite": [
 *     {
 *       "relativePath": "intelligence/stakeholder-map.md",
 *       "lines": 120,
 *       "floor": 240,
 *       "reason": "short:120<240"
 *     }
 *   ]
 * }
 * ```
 *
 * The `source` value on each carry-forward entry follows the schema:
 *   `"carry-forward-from:<runId>"`
 * which Stage B writes into `manifest.json.artifactSources` (additive,
 * backward-compatible with the existing schema).
 */
 
import fs from 'node:fs';
import path from 'node:path';
import process from 'node:process';
import { fileURLToPath } from 'node:url';
 
const ROOT = process.cwd();
const DEFAULT_MIN_LINES = 30;
 
// Artifacts that must contain at least one Mermaid fenced block.
// Mirrors the directory-based heuristic in validate-analysis-completeness.js.
const DIAGRAM_DIRS = new Set(['intelligence', 'classification', 'risk-scoring', 'threat-assessment']);
 
const PLACEHOLDER_PATTERNS = [
  /\[AI_ANALYSIS_REQUIRED\]/,
  /AI_ANALYSIS_PENDING/,
  /\[TO BE FILLED\]/,
  /\[TBD\]/i,
  /^TODO:/m,
];
 
const META_DOC_HINT_RE = /(template-instructions|placeholder reference|TODO list of)/i;
 
// ─── helpers ────────────────────────────────────────────────────────────────
 
function usage(code = 2) {
  const msg = [
    'Usage: node scripts/aggregator/prior-run-diff.js <runDir>',
    '',
    '  <runDir>   Path to analysis/daily/<date>/<slug>/',
    '',
    'Environment:',
    '  ENABLE_PRIOR_RUN_MERGE=true   Enable the carry-forward classifier',
    '                                (default: disabled — emits empty plan)',
    '',
    'Example:',
    '  npm run prior-run-diff -- analysis/daily/2026-04-26/week-in-review',
  ].join('\n');
  process.stderr.write(`${msg}\n`);
  process.exit(code);
}
 
function parseArgs(argv) {
  const args = argv.slice(2);
  if (args.length === 0) usage(2);
  const opts = { runDir: null, thresholdsPath: null };
  for (let i = 0; i < args.length; i += 1) {
    const a = args[i];
    if (a === '--help' || a === '-h') usage(0);
    else if (a === '--thresholds') {
      opts.thresholdsPath = args[i + 1];
      if (!opts.thresholdsPath) usage(2);
      i += 1;
    } else if (!opts.runDir) {
      opts.runDir = a;
    } else {
      usage(2);
    }
  }
  if (!opts.runDir) usage(2);
  return opts;
}
 
function safeReadJson(filePath) {
  try {
    return JSON.parse(fs.readFileSync(filePath, 'utf8'));
  } catch {
    return null;
  }
}
 
function loadThresholds(customPath) {
  const p = customPath
    ? path.resolve(ROOT, customPath)
    : path.resolve(ROOT, 'analysis/methodologies/reference-quality-thresholds.json');
  if (!fs.existsSync(p)) return null;
  return safeReadJson(p);
}
 
function countLines(content) {
  Iif (!content) return 0;
  return content.split(/\r?\n/).length;
}
 
function hasPlaceholders(content) {
  Iif (META_DOC_HINT_RE.test(content)) return false;
  return PLACEHOLDER_PATTERNS.some((re) => re.test(content));
}
 
function hasMermaid(content) {
  return /(^|\n)```mermaid\s/i.test(content);
}
 
function isDiagramRequired(relativePath, mermaidRequiredList) {
  if (mermaidRequiredList && mermaidRequiredList.includes(relativePath)) return true;
  const idx = relativePath.replace(/\\/g, '/').indexOf('/');
  const dir = idx === -1 ? '' : relativePath.slice(0, idx);
  return DIAGRAM_DIRS.has(dir);
}
 
// ─── classifier ─────────────────────────────────────────────────────────────
 
/**
 * Classify a single artifact as at-floor (carry-forward) or below-floor
 * (rewrite).
 *
 * @param {string} runDir        - Absolute path to the analysis run folder.
 * @param {string} relativePath  - Artifact path relative to runDir.
 * @param {number} floor         - Minimum line count for this artifact.
 * @param {string[]} mermaidRequiredList - Paths that must contain a mermaid block.
 * @returns {{ atFloor: boolean, lines: number, floor: number, reason: string|null }}
 */
export function classifyArtifact(runDir, relativePath, floor, mermaidRequiredList) {
  const abs = path.join(runDir, relativePath);
  if (!fs.existsSync(abs)) {
    return { atFloor: false, lines: 0, floor, reason: 'missing' };
  }
  const content = fs.readFileSync(abs, 'utf8');
  const lines = countLines(content);
 
  if (lines < floor) {
    return { atFloor: false, lines, floor, reason: `short:${lines}<${floor}` };
  }
  if (hasPlaceholders(content)) {
    return { atFloor: false, lines, floor, reason: 'placeholders' };
  }
  if (isDiagramRequired(relativePath, mermaidRequiredList) && !hasMermaid(content)) {
    return { atFloor: false, lines, floor, reason: 'mermaid:missing' };
  }
  return { atFloor: true, lines, floor, reason: null };
}
 
/**
 * Build the `priorRunDiff` plan for a same-day analysis folder.
 *
 * @param {string} runDir              - Absolute path to the run folder.
 * @param {object|null} thresholdsJson - Parsed reference-quality-thresholds.json.
 * @param {boolean} enabled            - Whether the feature is enabled.
 * @returns {object} The diff plan (serialisable to JSON).
 */
export function buildPriorRunDiff(runDir, thresholdsJson, enabled) {
  const manifestPath = path.join(runDir, 'manifest.json');
  const manifest = safeReadJson(manifestPath);
 
  const relRunDir = path.relative(ROOT, runDir);
  const articleType =
    manifest?.articleType ||
    manifest?.article_type ||
    (Array.isArray(manifest?.articleTypes) ? manifest.articleTypes[0] : null) ||
    manifest?.runType ||
    'unknown';
 
  if (!enabled) {
    return {
      enabled: false,
      runDir: relRunDir,
      articleType,
      priorRunId: null,
      carryForward: [],
      rewrite: [],
    };
  }
 
  const history = Array.isArray(manifest?.history) ? manifest.history : [];
  if (history.length === 0) {
    return {
      enabled: true,
      runDir: relRunDir,
      articleType,
      priorRunId: null,
      carryForward: [],
      rewrite: [],
    };
  }
 
  // Use the most recent history entry as the "prior run".
  const priorEntry = history[history.length - 1];
  const priorRunId = priorEntry?.runId ?? 'unknown';
 
  const perArtifactFloors = thresholdsJson?.thresholds?.[articleType] ?? {};
  const mermaidRequiredList = thresholdsJson?.structuralRequirements?.mermaidRequired ?? [];
 
  // Build the candidate artifact path set from threshold keys and
  // manifest-declared files used by this prior-run diff helper.
  const allRelPaths = collectArtifactPaths(manifest, perArtifactFloors);
 
  const carryForward = [];
  const rewrite = [];
 
  for (const relativePath of allRelPaths) {
    const floor = Math.max(DEFAULT_MIN_LINES, perArtifactFloors[relativePath] ?? 0);
    const result = classifyArtifact(runDir, relativePath, floor, mermaidRequiredList);
    if (result.atFloor) {
      carryForward.push({
        relativePath,
        lines: result.lines,
        floor: result.floor,
        source: `carry-forward-from:${priorRunId}`,
      });
    } else {
      rewrite.push({
        relativePath,
        lines: result.lines,
        floor: result.floor,
        reason: result.reason,
      });
    }
  }
 
  return {
    enabled: true,
    runDir: relRunDir,
    articleType,
    priorRunId,
    carryForward,
    rewrite,
  };
}
 
/**
 * Collect all artifact relative paths from the manifest files section and
 * the per-artifact threshold keys — deduped and sorted.
 *
 * @param {object|null} manifest        - Parsed manifest.json.
 * @param {object}      perArtifactFloors - Per-artifact floor map.
 * @returns {string[]} Sorted unique relative paths.
 */
function collectArtifactPaths(manifest, perArtifactFloors) {
  const set = new Set(Object.keys(perArtifactFloors));
 
  /**
   * Recursively collect artifact paths from nested manifest file entries.
   * Only string values and `{ path: string }` objects are treated as paths;
   * arbitrary object keys (e.g. language codes like `en`/`sv`) are NOT added.
   *
   * @param {unknown} value - Manifest file entry or nested value.
   */
  function addArtifactPaths(value) {
    if (typeof value === 'string') {
      set.add(value);
      return;
    }
 
    if (Array.isArray(value)) {
      for (const entry of value) addArtifactPaths(entry);
      return;
    }
 
    if (!value || typeof value !== 'object') {
      return;
    }
 
    if (typeof value.path === 'string') {
      set.add(value.path);
    }
 
    for (const [key, nestedValue] of Object.entries(value)) {
      if (key !== 'path') {
        addArtifactPaths(nestedValue);
      }
    }
  }
 
  const files = manifest?.files;
  Eif (files && typeof files === 'object') {
    for (const value of Object.values(files)) {
      addArtifactPaths(value);
    }
  }
  return Array.from(set).sort();
}
 
// ─── CLI entry point ─────────────────────────────────────────────────────────
 
function main() {
  const opts = parseArgs(process.argv);
  const runDir = path.resolve(ROOT, opts.runDir);
 
  if (!fs.existsSync(runDir) || !fs.statSync(runDir).isDirectory()) {
    process.stderr.write(`error: runDir does not exist or is not a directory: ${runDir}\n`);
    process.exit(1);
  }
 
  const enabled = process.env['ENABLE_PRIOR_RUN_MERGE'] === 'true';
  const thresholdsJson = loadThresholds(opts.thresholdsPath);
  const plan = buildPriorRunDiff(runDir, thresholdsJson, enabled);
 
  process.stdout.write(`${JSON.stringify(plan, null, 2)}\n`);
  process.exit(0);
}
 
// Guard: only run as CLI, not when imported as a module by tests.
// Compare resolved/real paths so `npm run` (which may pass a relative argv[1])
// still triggers `main()`.
const currentModulePath = fileURLToPath(import.meta.url);
const invokedScriptPath = process.argv[1] ? path.resolve(process.argv[1]) : null;
 
Iif (invokedScriptPath === currentModulePath) {
  main();
}