All files / src/generators/pipeline output-stage.ts

100% Statements 52/52
79.48% Branches 31/39
100% Functions 6/6
100% Lines 49/49

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193                                                              7x                           18x   18x 4x 1x   3x   4x     14x 5x 5x     9x 9x 9x                                       13x 13x   13x 7x 7x 6x 2x 4x 4x     13x                                             7x   6x     6x 6x 6x   6x 4x 4x                     4x                     4x 4x 4x 4x         4x 4x 5x 3x   2x 2x 1x   1x 1x     4x           6x                     6x 6x    
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
 
/**
 * @module Generators/Pipeline/OutputStage
 * @description File writing and metadata update pipeline stage.
 *
 * All functions accept explicit options/path arguments rather than reading
 * module-level state, so they are trivial to test in isolation.
 */
 
import fs from 'fs';
import path from 'path';
import type { GenerationStats, GenerationResult } from '../../types/index.js';
import { formatDateForSlug, atomicWrite } from '../../utils/file-utils.js';
 
// ─── Output options ───────────────────────────────────────────────────────────
 
/** Runtime flags that control how articles are written to disk */
export interface OutputOptions {
  /** When true no files are written (dry run mode) */
  dryRun: boolean;
  /** When true skip articles that already exist on disk */
  skipExisting: boolean;
  /** Absolute path to the news output directory */
  newsDir: string;
}
 
// ─── File-write helpers ───────────────────────────────────────────────────────
 
/** Log prefix for write operations */
const DRY_RUN_PREFIX = '  [DRY RUN]';
 
/**
 * Write a single HTML file to the news directory.
 *
 * Respects `dryRun` and `skipExisting` flags: returns `false` without writing
 * in either case.
 *
 * @param html - Full HTML content to write
 * @param filename - Target filename (relative to `options.newsDir`)
 * @param options - Output flags and directory path
 * @returns `true` when the file was actually written
 */
export function writeArticleFile(html: string, filename: string, options: OutputOptions): boolean {
  const filepath = path.join(options.newsDir, filename);
 
  if (options.skipExisting && fs.existsSync(filepath)) {
    if (options.dryRun) {
      console.log(`${DRY_RUN_PREFIX} Would skip (already exists): ${filename}`);
    } else {
      console.log(`  ⏭️  Skipped (already exists): ${filename}`);
    }
    return false;
  }
 
  if (options.dryRun) {
    console.log(`${DRY_RUN_PREFIX} Would write: ${filename}`);
    return false;
  }
 
  atomicWrite(filepath, html);
  console.log(`  ✅ Wrote: ${filename}`);
  return true;
}
 
/**
 * Write a language-specific article file and update the generation stats.
 *
 * @param html - Full HTML content to write
 * @param slug - Article slug (e.g. `"2025-01-15-week-ahead"`)
 * @param lang - Language code suffix (e.g. `"en"`)
 * @param options - Output flags and directory path
 * @param stats - Mutable stats object to increment counters on
 * @returns `true` when the file was actually written
 */
export function writeSingleArticle(
  html: string,
  slug: string,
  lang: string,
  options: OutputOptions,
  stats: GenerationStats
): boolean {
  const filename = `${slug}-${lang}.html`;
  const written = writeArticleFile(html, filename, options);
 
  if (written) {
    stats.generated += 1;
    stats.articles.push(filename);
  } else if (options.skipExisting && fs.existsSync(path.join(options.newsDir, filename))) {
    stats.skipped += 1;
  } else Eif (options.dryRun) {
    stats.dryRun += 1;
  }
 
  return written;
}
 
/**
 * Persist a generation metadata JSON file to the metadata directory.
 * If a metadata file already exists for today, merges the current run's stats
 * and results with the existing ones so multiple workflow runs on the same day
 * do not overwrite each other's data.
 * Skips writing when `dryRun` is true.
 *
 * @param stats - Final generation statistics
 * @param results - Per-article-type generation results
 * @param usedMCP - Whether the MCP client was connected during this run
 * @param metadataDir - Absolute path to the metadata output directory
 * @param dryRun - When true the file is not written
 */
export function writeGenerationMetadata(
  stats: GenerationStats,
  results: GenerationResult[],
  usedMCP: boolean,
  metadataDir: string,
  dryRun: boolean
): void {
  if (dryRun) return;
 
  const metadataPath = path.join(metadataDir, `generation-${formatDateForSlug()}.json`);
 
  // Merge with existing metadata when another workflow already ran today
  let mergedStats = { ...stats };
  let mergedResults = [...results];
  let mergedUsedMCP = usedMCP;
 
  if (fs.existsSync(metadataPath)) {
    try {
      const existing = JSON.parse(fs.readFileSync(metadataPath, 'utf-8')) as {
        generated?: number | undefined;
        skipped?: number | undefined;
        dryRun?: number | undefined;
        errors?: number | undefined;
        articles?: string[] | undefined;
        results?: GenerationResult[] | undefined;
        usedMCP?: boolean | undefined;
      };
 
      // Accumulate counters from both runs
      mergedStats = {
        ...mergedStats,
        generated: (existing.generated ?? 0) + stats.generated,
        skipped: (existing.skipped ?? 0) + stats.skipped,
        dryRun: (existing.dryRun ?? 0) + stats.dryRun,
        errors: (existing.errors ?? 0) + stats.errors,
        // Merge article lists, removing any duplicates
        articles: [...new Set([...(existing.articles ?? []), ...stats.articles])],
      };
 
      // Keep prior results; append new ones (dedup by slug if present)
      const existingResults: GenerationResult[] = existing.results ?? [];
      const newSlugs = new Set(results.map((r) => r.slug).filter(Boolean));
      const priorResults = existingResults.filter((r) => !newSlugs.has(r.slug));
      const combinedResults = [...priorResults, ...results];
 
      // Additionally de-duplicate entries that do not have a slug by using a
      // stable structural key (JSON representation). This prevents repeated
      // same-day runs from accumulating duplicate slug-less error entries.
      const seenAnonymousKeys = new Set<string>();
      mergedResults = combinedResults.filter((result) => {
        if (result.slug) {
          return true;
        }
        const key = JSON.stringify(result);
        if (seenAnonymousKeys.has(key)) {
          return false;
        }
        seenAnonymousKeys.add(key);
        return true;
      });
      // usedMCP is true if either run connected to MCP
      mergedUsedMCP = mergedUsedMCP || (existing.usedMCP ?? false);
    } catch {
      // If the existing file is malformed, proceed with current run's data only
    }
  }
 
  const metadata = {
    timestamp: mergedStats.timestamp,
    generated: mergedStats.generated,
    skipped: mergedStats.skipped,
    dryRun: mergedStats.dryRun,
    errors: mergedStats.errors,
    articles: mergedStats.articles,
    results: mergedResults,
    usedMCP: mergedUsedMCP,
  };
 
  atomicWrite(metadataPath, JSON.stringify(metadata, null, 2));
  console.log(`📝 Metadata written to: ${metadataPath}`);
}