All files / src/aggregator/metadata editorial-highlight.ts

75.86% Statements 22/29
76.66% Branches 23/30
100% Functions 3/3
100% Lines 19/19

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105                                                                                  15x 15x 9x 9x   9x   9x 9x 9x                                   50x   49x           49x                       18x   85x         18x 18x   18x 50x 8x   10x    
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
 
/**
 * @module Aggregator/Metadata/EditorialHighlight
 * @description Primary editorial-artefact highlight resolver. Walks the
 * canonical list of editorial artefacts inside a run directory and returns
 * the best `{headline, summary}` pair — either a non-generic H1, a named
 * priority finding, or a stripped category-affix core — for use as the
 * article `<title>` and `<meta description>`.
 *
 * Depends on {@link artifact-walker} for shared discovery helpers and
 * {@link translated-sibling} for the translated-sibling filter.
 */
 
import fs from 'fs';
import {
  EDITORIAL_ARTEFACT_CANDIDATES,
  safeReaddir,
  scanCandidatesForHighlight,
} from './artifact-walker.js';
import { isTranslatedSiblingBrief } from './translated-sibling.js';
 
/**
 * Resolver output filenames that must NEVER be walked as a source by the
 * top-level fallback scan in {@link extractArtifactHighlight}. These are
 * either the resolver's own output (`article.md`, `article-meta.json`)
 * or per-language renderings that contain transcluded metadata-banner
 * lines (`**Threat Level:** …`, `**Key Assumptions Check**: …`) that
 * `priority-finding-highlight.ts` Pattern C would falsely accept as
 * editorial headlines. See the regression catalogue documented in
 * `scripts/validate-article-seo.js` for the smoking-gun live-site
 * defects (2026-05-22 week-ahead `<title>Threat Level</title>`,
 * 2026-05-22 committee-reports `<title>Key Assumptions Check</title>`).
 *
 * Returns `true` for resolver-output filenames.
 *
 * @param filename - Bare filename (no path), e.g. `article.md`
 * @returns `true` when the file is a resolver output and must be skipped
 */
export function isResolverOutputArtefact(filename: string): boolean {
  Iif (!filename) return true;
  if (filename === 'article.md') return true;
  Iif (filename === 'article-meta.json') return true;
  Iif (filename === 'article-meta.jsonl') return true;
  // Per-language article renderings: `article.<lang>.md`, `article_<lang>.md`.
  Iif (/^article[._][a-z]{2,3}\.md$/iu.test(filename)) return true;
  // Build sidecar files emitted by the generator pipeline.
  Iif (filename.endsWith('.html')) return true;
  Iif (filename === 'render-log.json') return true;
  return false;
}
 
/**
 * Attempt to read the first H1 and first prose paragraph from the first
 * existing artefact under {@link EDITORIAL_ARTEFACT_CANDIDATES}. Returns
 * `null` when no candidate artefact exists.
 *
 * @param runDir - Absolute run directory path
 * @param articleType - Article type slug (used by {@link isGenericHeading})
 * @param date - ISO run date (used by {@link isGenericHeading})
 * @returns `{headline, summary}` where either field may be empty
 */
export function extractArtifactHighlight(
  runDir: string,
  articleType: string,
  date: string
): { readonly headline: string; readonly summary: string } | null {
  if (!runDir || !fs.existsSync(runDir)) return null;
 
  const direct = scanCandidatesForHighlight(
    runDir,
    EDITORIAL_ARTEFACT_CANDIDATES,
    articleType,
    date
  );
  if (direct.headline) return { headline: direct.headline, summary: direct.summary };
 
  // Top-level fallback scan — used only when none of the canonical
  // editorial artefacts produced a non-generic H1. We must NOT pick up
  // translated sibling briefs (`executive-brief_<lang>.md`,
  // `synthesis-summary_<lang>.md`, …) here, because their H1s are
  // legitimate localized headlines that the English-only
  // {@link isGenericHeading} detector cannot recognise as boilerplate.
  // Letting them through poisoned the English `<title>` and
  // `<meta description>` for the 2026-05-15 batch with Arabic content
  // from `executive-brief_ar.md`. See {@link isTranslatedSiblingBrief}
  // and the regression test in `test/unit/article-metadata.test.js`.
  const topLevel = safeReaddir(runDir).filter(
    (f) =>
      f.endsWith('.md') &&
      f !== 'manifest.json' &&
      !isTranslatedSiblingBrief(f) &&
      !isResolverOutputArtefact(f)
  );
  const fallback = scanCandidatesForHighlight(runDir, topLevel, articleType, date);
  Iif (fallback.headline) return { headline: fallback.headline, summary: fallback.summary };
 
  const summaryOnly = direct.summary || fallback.summary;
  if (summaryOnly) {
    return { headline: '', summary: summaryOnly };
  }
  return null;
}