All files / src/aggregator article-metadata.ts

91.24% Statements 198/217
87.14% Branches 122/140
100% Functions 25/25
93.29% Lines 167/179

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815                                                                                                                                                                                                                                        3x     3x     3x                                             3x                                             3x                                                                     138x     95x 56x 52x 50x 47x 45x     45x 43x     567x       37x 37x 720x 720x 720x           7x         30x   29x                               58x                                             362x 2x 2x 2x 362x                     359x 2x 2x 2x 359x                       29x 35x 35x   31x       28x 28x 28x   1x                       33x 119x 119x 28x 28x 24x   9x                       77x   116x                                   61x 61x   60x 60x                   60x 60x 237x 227x 222x         44x 44x       44x                     44x                                   14x     13x 27x 27x 13x 13x 13x 13x 12x 12x         1x 1x                     1x                       13x 13x       13x   13x 13x 16x 16x 1x 1x   15x 2x 2x   13x   13x                     1x 1x                                             35x       35x 35x 35x   35x 490x             490x             35x                                                     490x     336x   28x   28x   28x   14x   14x   14x   14x   14x                             39x 39x   38x   38x 38x 38x 38x                         37x 37x 37x                           37x 37x                     76x 75x 75x                   76x 76x 76x 76x                                     672x 644x           42x 42x 84x 84x   42x 42x                             24x     24x 11x 11x 11x               13x 13x 13x 9x             4x 1x     3x                     24x 24x 24x   24x         24x 336x 336x 336x           336x 336x     336x       336x 336x   336x                     24x                     672x 1320x        
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
 
/**
 * @module Aggregator/ArticleMetadata
 * @description Resolve per-language `{title, description}` for an article
 * rendered by the aggregator pipeline. The resolver follows a strict
 * priority ladder that prefers *real editorial highlights* over boring,
 * repeated templates — satisfying the core SEO requirement that every
 * published article carry a unique, content-reflective headline and
 * description in every language variant.
 *
 * Priority ladder (per language, highest wins):
 *
 * 1. **Manifest override** — `manifest.title` / `manifest.description` on
 *    the analysis-run manifest, either as a plain string (applied to every
 *    language) or a `LanguageMap<string>` object for explicit per-language
 *    values. Authored by Stage-B agents when they have an editorial
 *    headline for the day.
 * 2. **Artefact editorial H1** — first `# …` heading from the first
 *    substantive artefact under the run directory (e.g.
 *    `intelligence/synthesis-summary.md`, `breaking-news-analysis.md`).
 *    Accepted only when the heading is not a generic
 *    `${humanize(articleType)} — ${date}` form.
 * 3. **Aggregated-markdown H1** — the first `# …` heading in the aggregator
 *    output, accepted under the same non-generic rule. In practice this
 *    tier rarely fires because the aggregator itself writes the generic
 *    default, but it covers hand-edited or legacy aggregates.
 * 4. **First strong prose paragraph** — the first line of the aggregated
 *    Markdown that survives {@link shouldSkipDescriptionLine}. Used for
 *    `description`; also used for `title` as a last editorial-content
 *    resort when every heading-level source is generic.
 * 5. **Localized template** — the per-article-type `*_TITLES` generator
 *    from `src/constants/language-articles.ts`. Always parameterised by
 *    date (or derived values), so the title changes from run to run even
 *    when this last tier fires — but still the "boring repeated" option.
 *
 * English highlights (tiers 2–4) are reserved for the `en` language
 * variant; non-English variants skip them and drop to the localized
 * template (tier 5) unless an explicit `manifest.title.<lang>` /
 * `manifest.description.<lang>` override is present. This guarantees
 * every variant's `<title>` and `<meta description>` are in the correct
 * locale even while the article body itself is still rendered from an
 * English source (until per-language body translations ship).
 */
 
import fs from 'fs';
import path from 'path';
import { ALL_LANGUAGES, getLocalizedString } from '../constants/language-core.js';
import {
  BREAKING_NEWS_TITLES,
  COMMITTEE_REPORTS_TITLES,
  MONTH_AHEAD_TITLES,
  MONTHLY_REVIEW_TITLES,
  MOTIONS_TITLES,
  PROPOSITIONS_TITLES,
  WEEK_AHEAD_TITLES,
  WEEKLY_REVIEW_TITLES,
} from '../constants/language-articles.js';
import type { LangTitleSubtitle, LanguageCode, LanguageMap } from '../types/index.js';
 
/** One resolved `(title, description)` pair for a single language. */
export interface ResolvedMetadataEntry {
  readonly title: string;
  readonly description: string;
}
 
/** Fully resolved metadata — one entry per supported language. */
export type ResolvedMetadata = LanguageMap<ResolvedMetadataEntry>;
 
/**
 * Raw manifest subset consumed by the resolver. Deliberately narrower
 * than the full {@link AnalysisManifest} shape so the resolver stays
 * usable for backport (which only has the manifest in text form) and for
 * callers that don't need the full typed structure.
 */
export interface MetadataManifest {
  readonly articleType?: string;
  readonly date?: string;
  readonly runId?: string;
  /**
   * Optional editorial-title override. `string` is applied to every
   * language; an object allows explicit per-language overrides.
   */
  readonly title?: string | Partial<Record<LanguageCode, string>>;
  /**
   * Optional editorial-description override. Same shape rules as
   * {@link title}.
   */
  readonly description?: string | Partial<Record<LanguageCode, string>>;
  /**
   * Optional committee code (e.g. `ENVI`) used by
   * {@link COMMITTEE_REPORTS_TITLES} when the template fallback fires.
   */
  readonly committee?: string;
}
 
/** Inputs to {@link resolveArticleMetadata}. */
export interface ResolveMetadataOptions {
  /** Article type slug (e.g. `breaking`, `motions`, `week-ahead`). */
  readonly articleType: string;
  /** ISO date of the run (`YYYY-MM-DD`). */
  readonly date: string;
  /** Aggregated Markdown document body (after provenance/header). */
  readonly markdown: string;
  /** Parsed analysis manifest (may be empty for legacy/backport callers). */
  readonly manifest?: MetadataManifest;
  /**
   * Absolute path to the analysis run directory so the resolver can
   * peek at individual artefact files. Omit for callers that only have
   * the aggregated Markdown (the artefact-H1 tier is then skipped).
   */
  readonly runDir?: string;
}
 
/** Maximum `<meta description>` length we will emit. */
const DESCRIPTION_MAX_LENGTH = 300;
 
/** Maximum `<title>` length — anything longer is truncated with an ellipsis. */
const TITLE_MAX_LENGTH = 140;
 
/** Ordered list of artefact filenames that typically carry the editorial H1. */
const EDITORIAL_ARTEFACT_CANDIDATES: readonly string[] = [
  'intelligence/synthesis-summary.md',
  'intelligence/executive-summary.md',
  'intelligence/intelligence-briefing.md',
  'executive-summary.md',
  'intelligence-briefing.md',
  'synthesis-summary.md',
  'breaking-news-analysis.md',
  'committee-activity-report.md',
  'legislative-pipeline-analysis.md',
  'weekly-outlook.md',
  'monthly-outlook.md',
  'week-in-review.md',
  'month-in-review.md',
  'motions-analysis.md',
  'propositions-analysis.md',
];
 
/**
 * Emoji-banner prefixes that Stage-B agents use to decorate metadata rows
 * (e.g. `📋 Analysis Owner:`). Any line starting with one of these is
 * metadata, never prose.
 */
const EMOJI_BANNER_CHARS = [
  '📋',
  '📅',
  '🔍',
  '🏛',
  '📰',
  '📊',
  '🏷',
  '📈',
  '📉',
  '⚠',
  '🔔',
  '🎯',
  '🗳',
  '🏢',
  '📄',
];
 
/**
 * Label prefixes that a prose description must never start with. Every
 * entry matches case-insensitively at the start of a trimmed line, followed
 * by optional space and a colon.
 */
const METADATA_LINE_PREFIXES: readonly string[] = [
  'Analysis Date',
  'Analysis Owner',
  'Article Type',
  'Assessment Date',
  'Classification',
  'Classification Date',
  'Confidence',
  'Data Sources',
  'Document Type',
  'Generated',
  'Last Updated',
  'Parliamentary Status',
  'Parliamentary Term',
  'Period',
  'Run',
  'Run ID',
  'Series',
  'Series Run',
  'SPDX-FileCopyrightText',
  'SPDX-License-Identifier',
  'Type',
  'Window',
];
 
/**
 * Return `true` when a line cannot serve as a prose description. Rejects
 * Markdown structural lines (headings, blockquotes, tables, HTML),
 * mermaid/chart directives, emoji-banner metadata rows, and the known
 * `Key: value` banners that Stage-B agents emit as artefact preamble.
 *
 * @param line - Trimmed line from the aggregated Markdown source
 * @returns `true` when the line is not prose and should be skipped
 */
export function shouldSkipDescriptionLine(line: string): boolean {
  if (line.length === 0) return true;
 
  // Markdown structural openers
  if (line.startsWith('#')) return true;
  if (line.startsWith('>')) return true;
  if (line.startsWith('<')) return true;
  if (line.startsWith('|')) return true;
  if (line.startsWith('---') || line.startsWith('===')) return true;
  Iif (line.startsWith('```') || line.startsWith('~~~')) return true;
 
  // Mermaid / chart init blocks and the `title <text>` directive inside them
  if (line.startsWith('%%')) return true;
  if (/^title\s/i.test(line)) return true;
 
  // Emoji-banner metadata rows
  if (EMOJI_BANNER_CHARS.some((char) => line.startsWith(char))) return true;
 
  // `Key: value` metadata banners. Match plain text, bold `**Key**`,
  // and italic `*Key*` variants.
  const labelSource = line.replace(/^\*+/, '').replace(/^\*\*/, '').replace(/^_+/, '').trim();
  for (const prefix of METADATA_LINE_PREFIXES) {
    const lower = labelSource.toLowerCase();
    const prefixLower = prefix.toLowerCase();
    if (
      lower.startsWith(`${prefixLower}:`) ||
      lower.startsWith(`${prefixLower} :`) ||
      lower.startsWith(`${prefixLower}**:`) ||
      lower.startsWith(`${prefixLower}*:`)
    ) {
      return true;
    }
  }
 
  // Pure punctuation / decorative separators
  if (/^[-*_=~.]{3,}$/.test(line)) return true;
 
  return false;
}
 
/**
 * Strip inline Markdown decorations so we can use the remaining text as
 * plain-text meta-tag content. Removes link syntax, emphasis, inline code
 * backticks, and HTML-entity fragments that the Markdown source sometimes
 * smuggles in. Keeps the visible text readable.
 *
 * @param raw - Trimmed Markdown line
 * @returns Plain-text variant
 */
export function stripInlineMarkdown(raw: string): string {
  // All inner character classes are length-bounded to eliminate the
  // polynomial-regex worst case that CodeQL flags on uncontrolled input —
  // none of these decorations are legitimately longer than 500 chars.
  return raw
    .replace(/!\[([^\]\n]{0,500})\]\(([^)\n]{0,500})\)/g, '$1') // ![alt](img) — must precede [text](url)
    .replace(/\[([^\]\n]{1,500})\]\(([^)\n]{0,500})\)/g, '$1') // [text](url) → text
    .replace(/`([^`\n]{1,500})`/g, '$1') // inline code
    .replace(/\*\*([^*\n]{1,500})\*\*/g, '$1') // **bold**
    .replace(/__([^_\n]{1,500})__/g, '$1') // __bold__
    .replace(/\*([^*\n]{1,500})\*/g, '$1') // *italic*
    .replace(/_([^_\n]{1,500})_/g, '$1') // _italic_
    .replace(/~~([^~\n]{1,500})~~/g, '$1') // ~~strike~~
    .replace(/\s+/g, ' ')
    .trim();
}
 
/**
 * Clamp a string to {@link DESCRIPTION_MAX_LENGTH} characters, appending
 * an ellipsis when truncation actually happens. Does not break words if
 * avoidable — a trailing partial word is trimmed back to the previous
 * space first.
 *
 * @param text - Raw description text
 * @returns Truncated description with trailing ellipsis when clipped
 */
export function truncateDescription(text: string): string {
  if (text.length <= DESCRIPTION_MAX_LENGTH) return text;
  const cut = text.slice(0, DESCRIPTION_MAX_LENGTH - 3);
  const lastSpace = cut.lastIndexOf(' ');
  const safe = lastSpace > DESCRIPTION_MAX_LENGTH - 60 ? cut.slice(0, lastSpace) : cut;
  return `${safe.replace(/[.,;:—-]+$/, '')}…`;
}
 
/**
 * Clamp a title to {@link TITLE_MAX_LENGTH} characters in the same
 * word-boundary-preserving fashion as {@link truncateDescription}.
 *
 * @param text - Raw title text
 * @returns Truncated title with trailing ellipsis when clipped
 */
export function truncateTitle(text: string): string {
  if (text.length <= TITLE_MAX_LENGTH) return text;
  const cut = text.slice(0, TITLE_MAX_LENGTH - 3);
  const lastSpace = cut.lastIndexOf(' ');
  const safe = lastSpace > TITLE_MAX_LENGTH - 40 ? cut.slice(0, lastSpace) : cut;
  return `${safe.replace(/[.,;:—-]+$/, '')}…`;
}
 
/**
 * Return the first Markdown H1 (`# …`) in the supplied text, stripped of
 * the leading `#` and trailing anchor syntax. Returns an empty string when
 * no H1 is present.
 *
 * @param markdown - Markdown source
 * @returns Plain-text H1, or empty string when none found
 */
export function extractFirstH1(markdown: string): string {
  for (const raw of markdown.split('\n')) {
    const line = raw.trim();
    if (!line.startsWith('#')) continue;
    // Accept `# Title` but not `## Sub-heading`.
    if (!/^#\s+/.test(line)) continue;
    // Strip the leading `# ` marker, then trim trailing `#` characters
    // without an unbounded `\s*#+\s*$` regex (CodeQL flags that form as
    // polynomial on pathological repeated-`#` input).
    let text = line.replace(/^#\s+/, '').trimEnd();
    while (text.endsWith('#')) text = text.slice(0, -1).trimEnd();
    return stripInlineMarkdown(text);
  }
  return '';
}
 
/**
 * Walk every line of the Markdown source and return the first line that
 * survives {@link shouldSkipDescriptionLine}. Inline Markdown decorations
 * are stripped and the result is truncated to fit `<meta description>`.
 *
 * @param markdown - Markdown source
 * @returns Prose description, or empty string when nothing qualifies
 */
export function extractStrongProseLine(markdown: string): string {
  for (const raw of markdown.split('\n')) {
    const line = raw.trim();
    if (shouldSkipDescriptionLine(line)) continue;
    const plain = stripInlineMarkdown(line);
    if (plain.length < 40) continue;
    return truncateDescription(plain);
  }
  return '';
}
 
/**
 * Humanise an `article-type` slug the same way the aggregator does (see
 * `src/aggregator/analysis-aggregator.ts:humanize`). Kept in sync by value
 * — we deliberately do not import the private helper.
 *
 * @param slug - Slug like `week-ahead` or `breaking_news`
 * @returns Title-cased humanised form (`Week Ahead`, `Breaking News`)
 */
export function humanizeSlug(slug: string): string {
  return slug
    .split(/[-_]/g)
    .map((seg) => (seg ? seg.charAt(0).toUpperCase() + seg.slice(1) : seg))
    .join(' ')
    .trim();
}
 
/**
 * Return `true` when the supplied heading matches the generic
 * `${humanize(articleType)} — ${date}` form that the aggregator writes as
 * its default document title. Accepts em-dash, en-dash, and ASCII hyphen
 * separators, and matches the `breaking-breaking` variant that some
 * same-day collision runs produce.
 *
 * @param heading - Plain-text heading (post-{@link stripInlineMarkdown})
 * @param articleType - Article type slug
 * @param date - ISO date string
 * @returns `true` when the heading carries no editorial information
 */
export function isGenericHeading(heading: string, articleType: string, date: string): boolean {
  const normalized = heading.trim().replace(/\s+/g, ' ');
  if (normalized === '') return true;
 
  const human = humanizeSlug(articleType);
  const patterns = [
    `${human} — ${date}`,
    `${human} - ${date}`,
    `${human} – ${date}`,
    `${human}: ${date}`,
    `${human} ${date}`,
  ];
 
  // Also accept the collision-suffix pattern (e.g. `Breaking Breaking — …`)
  // and the auto-generated "EU Parliament <Type> — <date>" legacy form.
  const humanRedundant = `${human} ${human}`;
  for (const p of patterns) {
    if (normalized === p) return true;
    if (normalized === `EU Parliament ${p}`) return true;
    if (normalized === `${humanRedundant} — ${date}`) return true;
  }
 
  // The bare `${human} — <anything>` with nothing extra is also generic.
  // eslint-disable-next-line security/detect-non-literal-regexp -- `human` derives from a sanitised slug via escapeRegex
  const trailingDateOnly = new RegExp(`^${escapeRegex(human)}\\s*[—–-]\\s*[\\d-]+$`, 'u');
  Iif (trailingDateOnly.test(normalized)) {
    return true;
  }
 
  return false;
}
 
/**
 * Escape regex metacharacters so a dynamic string can be embedded safely
 * in a pattern built at runtime.
 *
 * @param input - Raw string
 * @returns Regex-safe form of {@link input}
 */
function escapeRegex(input: string): string {
  return input.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
 
/**
 * Attempt to read the first H1 and first prose paragraph from the first
 * existing artefact under {@link EDITORIAL_ARTEFACT_CANDIDATES}. Returns
 * `null` when no candidate artefact exists.
 *
 * @param runDir - Absolute run directory path
 * @param articleType - Article type slug (used by {@link isGenericHeading})
 * @param date - ISO run date (used by {@link isGenericHeading})
 * @returns `{headline, summary}` where either field may be empty
 */
export function extractArtifactHighlight(
  runDir: string,
  articleType: string,
  date: string
): { readonly headline: string; readonly summary: string } | null {
  if (!runDir || !fs.existsSync(runDir)) return null;
 
  // Direct candidate lookup — cheap and deterministic.
  for (const rel of EDITORIAL_ARTEFACT_CANDIDATES) {
    const abs = path.join(runDir, rel);
    if (!fs.existsSync(abs)) continue;
    const body = readArtefactBody(abs);
    const headline = extractFirstH1(body);
    Iif (!headline) continue;
    if (isGenericHeading(headline, articleType, date)) continue;
    const summary = extractStrongProseLine(body);
    return { headline: truncateTitle(headline), summary };
  }
 
  // Fallback: walk the top-level `.md` files in the run dir once, looking
  // for any that starts with `#` and has a non-generic headline.
  const topLevel = safeReaddir(runDir).filter((f) => f.endsWith('.md'));
  for (const rel of topLevel) {
    if (rel === 'manifest.json') continue;
    const abs = path.join(runDir, rel);
    const body = readArtefactBody(abs);
    const headline = extractFirstH1(body);
    if (!headline) continue;
    if (isGenericHeading(headline, articleType, date)) continue;
    const summary = extractStrongProseLine(body);
    return { headline: truncateTitle(headline), summary };
  }
 
  return null;
}
 
/**
 * Read an artefact file, skipping any SPDX HTML-comment header rows so the
 * first-H1 / first-prose logic is never derailed by the REUSE preamble.
 *
 * @param abs - Absolute file path
 * @returns File contents with SPDX comment lines dropped
 */
function readArtefactBody(abs: string): string {
  let text: string;
  try {
    text = fs.readFileSync(abs, 'utf8');
  } catch {
    return '';
  }
  const lines = text.split('\n');
  // Drop a run of leading `<!--` SPDX/provenance comments plus blank lines.
  let i = 0;
  while (i < lines.length) {
    const line = (lines[i] ?? '').trim();
    if (line === '') {
      i++;
      continue;
    }
    if (line.startsWith('<!--') && line.endsWith('-->')) {
      i++;
      continue;
    }
    break;
  }
  return lines.slice(i).join('\n');
}
 
/**
 * `fs.readdirSync` wrapped to never throw for missing or unreadable
 * directories.
 *
 * @param dir - Absolute directory path
 * @returns Entries in {@link dir} or `[]` when unreadable
 */
function safeReaddir(dir: string): string[] {
  try {
    return fs.readdirSync(dir);
  } catch {
    return [];
  }
}
 
/**
 * Build the per-language `{title, description}` pair using the
 * article-type–specific `*_TITLES` generator from
 * `src/constants/language-articles.ts`. This is the last-resort tier and
 * is always parameterised by date (or equivalent), so even when it fires
 * the result is not identical across runs of the same type.
 *
 * @param articleType - Article type slug
 * @param date - ISO run date
 * @param committee - Optional committee code (used by `committee-reports`)
 * @returns Per-language `LangTitleSubtitle`
 */
export function buildTemplateFallback(
  articleType: string,
  date: string,
  committee?: string
): LanguageMap<LangTitleSubtitle> {
  const map: Record<LanguageCode, LangTitleSubtitle> = Object.create(null) as Record<
    LanguageCode,
    LangTitleSubtitle
  >;
  const weekRange = deriveWeekRange(date);
  const monthLabel = deriveMonthLabel(date);
  const committeeLabel = committee && committee.trim().length > 0 ? committee : 'Main Committees';
 
  for (const lang of ALL_LANGUAGES) {
    const entry = templateForType(lang, articleType, {
      date,
      weekStart: weekRange.start,
      weekEnd: weekRange.end,
      month: monthLabel,
      committee: committeeLabel,
    });
    Object.defineProperty(map, lang, {
      value: entry,
      enumerable: true,
      writable: true,
      configurable: true,
    });
  }
  return map;
}
 
/** Inputs for {@link templateForType}. */
interface TemplateInputs {
  readonly date: string;
  readonly weekStart: string;
  readonly weekEnd: string;
  readonly month: string;
  readonly committee: string;
}
 
/**
 * Dispatch an article-type slug to the matching localized template
 * generator. Unknown types get a uniform fallback built from
 * {@link humanizeSlug} and the run date.
 *
 * @param lang - Target language code
 * @param articleType - Article type slug
 * @param inputs - Pre-derived inputs used by the generators
 * @returns `LangTitleSubtitle` for the requested language
 */
function templateForType(
  lang: LanguageCode,
  articleType: string,
  inputs: TemplateInputs
): LangTitleSubtitle {
  switch (articleType) {
    case 'breaking':
    case 'breaking-breaking':
      return getLocalizedString(BREAKING_NEWS_TITLES, lang)(inputs.date);
    case 'committee-reports':
      return getLocalizedString(COMMITTEE_REPORTS_TITLES, lang)(inputs.committee);
    case 'motions':
      return getLocalizedString(MOTIONS_TITLES, lang)(inputs.date);
    case 'propositions':
      return getLocalizedString(PROPOSITIONS_TITLES, lang)();
    case 'week-ahead':
      return getLocalizedString(WEEK_AHEAD_TITLES, lang)(inputs.weekStart, inputs.weekEnd);
    case 'month-ahead':
      return getLocalizedString(MONTH_AHEAD_TITLES, lang)(inputs.month);
    case 'week-in-review':
      return getLocalizedString(WEEKLY_REVIEW_TITLES, lang)(inputs.weekStart, inputs.weekEnd);
    case 'month-in-review':
      return getLocalizedString(MONTHLY_REVIEW_TITLES, lang)(inputs.month);
    default:
      return {
        title: `${humanizeSlug(articleType)} — ${inputs.date}`,
        subtitle: `EU Parliament analysis — ${inputs.date}`,
      };
  }
}
 
/**
 * Parse an ISO date and return the `[start, end]` week range as ISO
 * strings. Week starts on Monday and ends on the following Sunday.
 *
 * @param date - ISO date string (`YYYY-MM-DD`)
 * @returns `{ start, end }` both in `YYYY-MM-DD` form
 */
export function deriveWeekRange(date: string): { readonly start: string; readonly end: string } {
  const parsed = parseIsoDate(date);
  if (!parsed) return { start: date, end: date };
  // getUTCDay(): 0 = Sunday, 1 = Monday, …
  const day = parsed.getUTCDay();
  // Shift so Monday = 0, Sunday = 6.
  const shift = (day + 6) % 7;
  const startMs = parsed.getTime() - shift * 86_400_000;
  const endMs = startMs + 6 * 86_400_000;
  return { start: formatIsoDate(new Date(startMs)), end: formatIsoDate(new Date(endMs)) };
}
 
/**
 * Return a human-friendly month label for an ISO date — English month
 * name + four-digit year (e.g. `April 2026`). The non-English template
 * generators accept this same label verbatim because they interpolate it
 * into a localized sentence rather than translating the month itself.
 *
 * @param date - ISO date string
 * @returns Month label, or the input when parsing fails
 */
export function deriveMonthLabel(date: string): string {
  const parsed = parseIsoDate(date);
  Iif (!parsed) return date;
  const monthNames = [
    'January',
    'February',
    'March',
    'April',
    'May',
    'June',
    'July',
    'August',
    'September',
    'October',
    'November',
    'December',
  ];
  const name = monthNames[parsed.getUTCMonth()] ?? '';
  return `${name} ${parsed.getUTCFullYear()}`.trim();
}
 
/**
 * Parse an ISO date string as UTC midnight. Returns `null` for malformed
 * input so callers can skip month/week derivation gracefully.
 *
 * @param iso - ISO date string
 * @returns Parsed `Date` or `null`
 */
function parseIsoDate(iso: string): Date | null {
  if (!/^\d{4}-\d{2}-\d{2}$/.test(iso)) return null;
  const parsed = new Date(`${iso}T00:00:00Z`);
  return Number.isNaN(parsed.getTime()) ? null : parsed;
}
 
/**
 * Format a `Date` as `YYYY-MM-DD` in UTC.
 *
 * @param d - Date object
 * @returns ISO date string
 */
function formatIsoDate(d: Date): string {
  const y = d.getUTCFullYear();
  const m = String(d.getUTCMonth() + 1).padStart(2, '0');
  const day = String(d.getUTCDate()).padStart(2, '0');
  return `${y}-${m}-${day}`;
}
 
/**
 * Extract a manifest override value for a single language. Accepts either
 * a plain string (applied to every language) or a `LanguageMap` object.
 *
 * @param value - Raw manifest value (string or per-lang object)
 * @param lang - Target language code
 * @returns Override string, or empty string when absent
 */
function manifestOverrideFor(
  value: string | Partial<Record<LanguageCode, string>> | undefined,
  lang: LanguageCode
): string {
  // A plain string is a blanket editorial override — the operator is
  // telling the resolver "use this exact text for every language". This
  // is the one path where a single string is applied cross-locale; the
  // operator takes responsibility for its language.
  if (typeof value === 'string') return value.trim();
  if (!value) return '';
  // Per-language object: respect ONLY the explicit entry for `lang`. We
  // deliberately do NOT fall back to the `en` entry for non-English
  // variants — otherwise an EN-only override would leak English into
  // every other locale's <title>. Missing languages fall through to the
  // localized template tier.
  const map = new Map<string, string>();
  for (const key of Object.keys(value)) {
    const v = (value as Record<string, unknown>)[key];
    Eif (typeof v === 'string') map.set(key, v);
  }
  const entry = map.get(lang);
  return typeof entry === 'string' ? entry.trim() : '';
}
 
/**
 * Internal: best editorial `{headline, summary}` pair available from the
 * aggregator output and artefacts, independent of language. Used for
 * tiers 2–4.
 *
 * @param opts - Resolver inputs
 * @returns Editorial content derived from English source
 */
function resolveEditorialContent(opts: ResolveMetadataOptions): {
  readonly headline: string;
  readonly summary: string;
} {
  const { articleType, date, markdown, runDir } = opts;
 
  // Tier 2: first non-generic H1 in the first substantive artefact.
  if (runDir) {
    const highlight = extractArtifactHighlight(runDir, articleType, date);
    Eif (highlight?.headline) {
      return {
        headline: highlight.headline,
        summary: highlight.summary,
      };
    }
  }
 
  // Tier 3: first non-generic H1 in the aggregated Markdown itself.
  const aggregatedH1 = extractFirstH1(markdown);
  const aggregatedSummary = extractStrongProseLine(markdown);
  if (aggregatedH1 && !isGenericHeading(aggregatedH1, articleType, date)) {
    return {
      headline: truncateTitle(aggregatedH1),
      summary: aggregatedSummary,
    };
  }
 
  // Tier 4: first strong prose paragraph (title = same prose clipped).
  if (aggregatedSummary) {
    return { headline: truncateTitle(aggregatedSummary), summary: aggregatedSummary };
  }
 
  return { headline: '', summary: '' };
}
 
/**
 * Resolve per-language `{title, description}` for one article following
 * the priority ladder documented at the top of this module.
 *
 * @param opts - Resolver inputs ({@link ResolveMetadataOptions})
 * @returns One `{title, description}` entry per supported language
 */
export function resolveArticleMetadata(opts: ResolveMetadataOptions): ResolvedMetadata {
  const manifest = opts.manifest ?? {};
  const editorial = resolveEditorialContent(opts);
  const template = buildTemplateFallback(opts.articleType, opts.date, manifest.committee);
 
  const result: Record<LanguageCode, ResolvedMetadataEntry> = Object.create(null) as Record<
    LanguageCode,
    ResolvedMetadataEntry
  >;
 
  for (const lang of ALL_LANGUAGES) {
    const manifestTitle = manifestOverrideFor(manifest.title, lang);
    const manifestDescription = manifestOverrideFor(manifest.description, lang);
    const fallback = template[lang];
 
    // Non-English languages must not inherit the English editorial
    // headline/summary — they would render a non-locale title in a
    // localized chrome. We skip tiers 2–4 for non-EN and drop straight to
    // the localized template (or explicit manifest override when provided).
    const useEditorial = lang === 'en';
    const titleCandidates = useEditorial
      ? [manifestTitle, editorial.headline, fallback.title]
      : [manifestTitle, fallback.title];
    const descCandidates = useEditorial
      ? [manifestDescription, editorial.summary, fallback.subtitle]
      : [manifestDescription, fallback.subtitle];
 
    const title = pickFirstNonEmpty(titleCandidates) || fallback.title;
    const description = pickFirstNonEmpty(descCandidates) || fallback.subtitle;
 
    Object.defineProperty(result, lang, {
      value: {
        title: truncateTitle(title),
        description: truncateDescription(description),
      },
      enumerable: true,
      writable: true,
      configurable: true,
    });
  }
 
  return result;
}
 
/**
 * Return the first non-empty, trimmed entry from a candidate list, or
 * the empty string when every entry is blank.
 *
 * @param candidates - Ordered list of candidate strings
 * @returns First non-empty entry
 */
function pickFirstNonEmpty(candidates: readonly string[]): string {
  for (const c of candidates) {
    if (typeof c === 'string' && c.trim().length > 0) return c.trim();
  }
  return '';
}