Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | 15x 15x 9x 9x 9x 9x 9x 9x 50x 49x 49x 18x 85x 18x 18x 18x 50x 8x 10x | // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
/**
* @module Aggregator/Metadata/EditorialHighlight
* @description Primary editorial-artefact highlight resolver. Walks the
* canonical list of editorial artefacts inside a run directory and returns
* the best `{headline, summary}` pair — either a non-generic H1, a named
* priority finding, or a stripped category-affix core — for use as the
* article `<title>` and `<meta description>`.
*
* Depends on {@link artifact-walker} for shared discovery helpers and
* {@link translated-sibling} for the translated-sibling filter.
*/
import fs from 'fs';
import {
EDITORIAL_ARTEFACT_CANDIDATES,
safeReaddir,
scanCandidatesForHighlight,
} from './artifact-walker.js';
import { isTranslatedSiblingBrief } from './translated-sibling.js';
/**
* Resolver output filenames that must NEVER be walked as a source by the
* top-level fallback scan in {@link extractArtifactHighlight}. These are
* either the resolver's own output (`article.md`, `article-meta.json`)
* or per-language renderings that contain transcluded metadata-banner
* lines (`**Threat Level:** …`, `**Key Assumptions Check**: …`) that
* `priority-finding-highlight.ts` Pattern C would falsely accept as
* editorial headlines. See the regression catalogue documented in
* `scripts/validate-article-seo.js` for the smoking-gun live-site
* defects (2026-05-22 week-ahead `<title>Threat Level</title>`,
* 2026-05-22 committee-reports `<title>Key Assumptions Check</title>`).
*
* Returns `true` for resolver-output filenames.
*
* @param filename - Bare filename (no path), e.g. `article.md`
* @returns `true` when the file is a resolver output and must be skipped
*/
export function isResolverOutputArtefact(filename: string): boolean {
Iif (!filename) return true;
if (filename === 'article.md') return true;
Iif (filename === 'article-meta.json') return true;
Iif (filename === 'article-meta.jsonl') return true;
// Per-language article renderings: `article.<lang>.md`, `article_<lang>.md`.
Iif (/^article[._][a-z]{2,3}\.md$/iu.test(filename)) return true;
// Build sidecar files emitted by the generator pipeline.
Iif (filename.endsWith('.html')) return true;
Iif (filename === 'render-log.json') return true;
return false;
}
/**
* Attempt to read the first H1 and first prose paragraph from the first
* existing artefact under {@link EDITORIAL_ARTEFACT_CANDIDATES}. Returns
* `null` when no candidate artefact exists.
*
* @param runDir - Absolute run directory path
* @param articleType - Article type slug (used by {@link isGenericHeading})
* @param date - ISO run date (used by {@link isGenericHeading})
* @returns `{headline, summary}` where either field may be empty
*/
export function extractArtifactHighlight(
runDir: string,
articleType: string,
date: string
): { readonly headline: string; readonly summary: string } | null {
if (!runDir || !fs.existsSync(runDir)) return null;
const direct = scanCandidatesForHighlight(
runDir,
EDITORIAL_ARTEFACT_CANDIDATES,
articleType,
date
);
if (direct.headline) return { headline: direct.headline, summary: direct.summary };
// Top-level fallback scan — used only when none of the canonical
// editorial artefacts produced a non-generic H1. We must NOT pick up
// translated sibling briefs (`executive-brief_<lang>.md`,
// `synthesis-summary_<lang>.md`, …) here, because their H1s are
// legitimate localized headlines that the English-only
// {@link isGenericHeading} detector cannot recognise as boilerplate.
// Letting them through poisoned the English `<title>` and
// `<meta description>` for the 2026-05-15 batch with Arabic content
// from `executive-brief_ar.md`. See {@link isTranslatedSiblingBrief}
// and the regression test in `test/unit/article-metadata.test.js`.
const topLevel = safeReaddir(runDir).filter(
(f) =>
f.endsWith('.md') &&
f !== 'manifest.json' &&
!isTranslatedSiblingBrief(f) &&
!isResolverOutputArtefact(f)
);
const fallback = scanCandidatesForHighlight(runDir, topLevel, articleType, date);
Iif (fallback.headline) return { headline: fallback.headline, summary: fallback.summary };
const summaryOnly = direct.summary || fallback.summary;
if (summaryOnly) {
return { headline: '', summary: summaryOnly };
}
return null;
}
|