pipeline output-stage.ts

100% Statements 52/52
79.48% Branches 31/39
100% Functions 6/6
100% Lines 49/49
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7x
 
 
 
 
 
 
 
 
 
 
 
 
 
18x
 
18x
4x
1x
 
3x
 
4x
 
 
14x
5x
5x
 
 
9x
9x
9x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13x
13x
 
13x
7x
7x
6x
2x
4x
4x
 
 
13x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7x
 
6x
 
 
6x
6x
6x
 
6x
4x
4x
 
 
 
 
 
 
 
 
 
 
4x
 
 
 
 
 
 
 
 
 
 
4x
4x
4x
4x
 
 
 
 
4x
4x
5x
3x
 
2x
2x
1x
 
1x
1x
 
 
4x
 
 
 
 
 
6x
 
 
 
 
 
 
 
 
 
 
6x
6x
 
  // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
 
/**
 * @module Generators/Pipeline/OutputStage
 * @description File writing and metadata update pipeline stage.
 *
 * All functions accept explicit options/path arguments rather than reading
 * module-level state, so they are trivial to test in isolation.
 */
 
import fs from 'fs';
import path from 'path';
import type { GenerationStats, GenerationResult } from '../../types/index.js';
import { formatDateForSlug, atomicWrite } from '../../utils/file-utils.js';
 
// ─── Output options ───────────────────────────────────────────────────────────
 
/** Runtime flags that control how articles are written to disk */
export interface OutputOptions {
  /** When true no files are written (dry run mode) */
  dryRun: boolean;
  /** When true skip articles that already exist on disk */
  skipExisting: boolean;
  /** Absolute path to the news output directory */
  newsDir: string;
}
 
// ─── File-write helpers ───────────────────────────────────────────────────────
 
/** Log prefix for write operations */
const DRY_RUN_PREFIX = '  [DRY RUN]';
 
/**
 * Write a single HTML file to the news directory.
 *
 * Respects `dryRun` and `skipExisting` flags: returns `false` without writing
 * in either case.
 *
 * @param html - Full HTML content to write
 * @param filename - Target filename (relative to `options.newsDir`)
 * @param options - Output flags and directory path
 * @returns `true` when the file was actually written
 */
export function writeArticleFile(html: string, filename: string, options: OutputOptions): boolean {
  const filepath = path.join(options.newsDir, filename);
 
  if (options.skipExisting && fs.existsSync(filepath)) {
    if (options.dryRun) {
      console.log(`${DRY_RUN_PREFIX} Would skip (already exists): ${filename}`);
    } else {
      console.log(`  ⏭️  Skipped (already exists): ${filename}`);
    }
    return false;
  }
 
  if (options.dryRun) {
    console.log(`${DRY_RUN_PREFIX} Would write: ${filename}`);
    return false;
  }
 
  atomicWrite(filepath, html);
  console.log(`  ✅ Wrote: ${filename}`);
  return true;
}
 
/**
 * Write a language-specific article file and update the generation stats.
 *
 * @param html - Full HTML content to write
 * @param slug - Article slug (e.g. `"2025-01-15-week-ahead"`)
 * @param lang - Language code suffix (e.g. `"en"`)
 * @param options - Output flags and directory path
 * @param stats - Mutable stats object to increment counters on
 * @returns `true` when the file was actually written
 */
export function writeSingleArticle(
  html: string,
  slug: string,
  lang: string,
  options: OutputOptions,
  stats: GenerationStats
): boolean {
  const filename = `${slug}-${lang}.html`;
  const written = writeArticleFile(html, filename, options);
 
  if (written) {
    stats.generated += 1;
    stats.articles.push(filename);
  } else if (options.skipExisting && fs.existsSync(path.join(options.newsDir, filename))) {
    stats.skipped += 1;
  } else Eif (options.dryRun) {
    stats.dryRun += 1;
  }
 
  return written;
}
 
/**
 * Persist a generation metadata JSON file to the metadata directory.
 * If a metadata file already exists for today, merges the current run's stats
 * and results with the existing ones so multiple workflow runs on the same day
 * do not overwrite each other's data.
 * Skips writing when `dryRun` is true.
 *
 * @param stats - Final generation statistics
 * @param results - Per-article-type generation results
 * @param usedMCP - Whether the MCP client was connected during this run
 * @param metadataDir - Absolute path to the metadata output directory
 * @param dryRun - When true the file is not written
 */
export function writeGenerationMetadata(
  stats: GenerationStats,
  results: GenerationResult[],
  usedMCP: boolean,
  metadataDir: string,
  dryRun: boolean
): void {
  if (dryRun) return;
 
  const metadataPath = path.join(metadataDir, `generation-${formatDateForSlug()}.json`);
 
  // Merge with existing metadata when another workflow already ran today
  let mergedStats = { ...stats };
  let mergedResults = [...results];
  let mergedUsedMCP = usedMCP;
 
  if (fs.existsSync(metadataPath)) {
    try {
      const existing = JSON.parse(fs.readFileSync(metadataPath, 'utf-8')) as {
        generated?: number | undefined;
        skipped?: number | undefined;
        dryRun?: number | undefined;
        errors?: number | undefined;
        articles?: string[] | undefined;
        results?: GenerationResult[] | undefined;
        usedMCP?: boolean | undefined;
      };
 
      // Accumulate counters from both runs
      mergedStats = {
        ...mergedStats,
        generated: (existing.generated ?? 0) + stats.generated,
        skipped: (existing.skipped ?? 0) + stats.skipped,
        dryRun: (existing.dryRun ?? 0) + stats.dryRun,
        errors: (existing.errors ?? 0) + stats.errors,
        // Merge article lists, removing any duplicates
        articles: [...new Set([...(existing.articles ?? []), ...stats.articles])],
      };
 
      // Keep prior results; append new ones (dedup by slug if present)
      const existingResults: GenerationResult[] = existing.results ?? [];
      const newSlugs = new Set(results.map((r) => r.slug).filter(Boolean));
      const priorResults = existingResults.filter((r) => !newSlugs.has(r.slug));
      const combinedResults = [...priorResults, ...results];
 
      // Additionally de-duplicate entries that do not have a slug by using a
      // stable structural key (JSON representation). This prevents repeated
      // same-day runs from accumulating duplicate slug-less error entries.
      const seenAnonymousKeys = new Set<string>();
      mergedResults = combinedResults.filter((result) => {
        if (result.slug) {
          return true;
        }
        const key = JSON.stringify(result);
        if (seenAnonymousKeys.has(key)) {
          return false;
        }
        seenAnonymousKeys.add(key);
        return true;
      });
      // usedMCP is true if either run connected to MCP
      mergedUsedMCP = mergedUsedMCP || (existing.usedMCP ?? false);
    } catch {
      // If the existing file is malformed, proceed with current run's data only
    }
  }
 
  const metadata = {
    timestamp: mergedStats.timestamp,
    generated: mergedStats.generated,
    skipped: mergedStats.skipped,
    dryRun: mergedStats.dryRun,
    errors: mergedStats.errors,
    articles: mergedStats.articles,
    results: mergedResults,
    usedMCP: mergedUsedMCP,
  };
 
  atomicWrite(metadataPath, JSON.stringify(metadata, null, 2));
  console.log(`📝 Metadata written to: ${metadataPath}`);
}