All files / src/aggregator/metadata briefing-highlight-headline.ts

100% Statements 33/33
86.36% Branches 19/22
100% Functions 4/4
100% Lines 26/26

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120                                                      24x                                                             25567x     5581x 370329x 489x 489x     5205x                                 5581x     5581x 5581x     5205x 5205x       1112x 1112x 1112x 1112x       839x 839x 3915x 3915x 78x         761x 761x 761x 761x   1x    
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
 
/**
 * @module Aggregator/Metadata/BriefingHighlightHeadline
 * @description Headline derivation primitives extracted from
 * {@link briefing-highlight.ts} so that module stays below the 600-line
 * drift-guard cap (see `test/unit/source-file-size.test.js`). Pure leaf
 * module — depends only on {@link truncateTitle} from `text-utils.js`
 * and the {@link stripTradecraftLabels} helper from
 * {@link briefing-highlight-sections.ts}.
 *
 * Headline derivation kicks in when a brief's `## Strategic Intelligence
 * Summary` block has *no* `### Sub-section` heading underneath it — the
 * resolver falls back to the section's first prose paragraph and we
 * mine it for a journalist-quality top-line.
 */
 
import { truncateTitle } from './text-utils.js';
import { stripTradecraftLabels } from './briefing-highlight-sections.js';
 
/**
 * Patterns that indicate a "news hook" — the most compelling claim in a
 * paragraph. Journalist editors call this the "nut graf" or "top line."
 * {@link extractNewsHookSentence} returns the first sentence that
 * matches any of these signals.
 */
const NEWS_HOOK_PATTERNS: readonly RegExp[] = [
  /\blandmark\b/i,
  /\bmost (?:significant|consequential|ambitious|contentious|comprehensive)\b/i,
  /\bunprecedented\b/i,
  /\bhistoric(?:ally)?\b/i,
  /\bfirst[\s-](?:ever|time)\b/i,
  /\boverhaul\b/i,
  /\breshape[sd]?\b/i,
  /\brecord[\s-]/i,
  /\bsweeping\b/i,
  /\bbreakthrough\b/i,
  /\bparadox\b/i,
  /\bgame[\s-]chang/i,
  /\bturning[\s-]point\b/i,
  /\bcrisis\b/i,
  /\bshowdown\b/i,
  /\bfracture[sd]?\b/i,
];
 
/**
 * Extract the most newsworthy sentence from a paragraph. Looks for
 * sentences containing strong editorial signals (superlatives, novelty
 * claims, dramatic verbs) rather than always taking the first sentence
 * which is typically bland context-setting.
 *
 * @param paragraph - Cleaned paragraph text
 * @returns The most compelling sentence, or '' if none found
 */
function extractNewsHookSentence(paragraph: string): string {
  // Split into sentences (handles ". ", "! ", "? " boundaries — plus
  // CJK 。!? and Arabic ؟ which have no trailing space).
  const sentences = paragraph.split(/(?<=[.!?])\s+|(?<=[。!?؟])/).filter((s) => s.length > 20);
 
  // Find the first sentence with a news hook signal
  for (const sentence of sentences) {
    if (NEWS_HOOK_PATTERNS.some((re) => re.test(sentence))) {
      const result = truncateTitle(sentence);
      if (result) return result;
    }
  }
  return '';
}
 
/**
 * Derive a usable headline from a paragraph when no explicit `### …`
 * sub-heading is available. Uses a journalist's editorial hierarchy:
 *
 * 1. Find the sentence with the strongest news hook (superlatives, novelty)
 * 2. Fall back to the first sentence via `truncateTitle`
 * 3. Extract a clause at a natural boundary (comma, semicolon, dash)
 * 4. Hard-cut at word boundary as last resort
 *
 * @param paragraph - Source paragraph (already normalized)
 * @returns Headline string, or `''` when no usable clause can be derived
 */
export function deriveHeadlineFromParagraph(paragraph: string): string {
  // Strip tradecraft labels before headline derivation.
  const cleaned = stripTradecraftLabels(paragraph);
 
  // Priority 1: Find the most newsworthy sentence (superlatives, drama).
  const newsHook = extractNewsHookSentence(cleaned);
  if (newsHook) return newsHook;
 
  // Priority 2: First sentence via truncateTitle.
  const direct = truncateTitle(cleaned);
  if (direct) return direct;
 
  // Priority 3: Extract the first sentence and try truncateTitle.
  // Recognise CJK 。!? and Arabic ؟ in addition to Western . ! ?.
  const sentenceMatch = /^(.*?(?:[.!?](?=\s|$)|[。!?؟]))/.exec(cleaned);
  Eif (sentenceMatch?.[1]) {
    const sentenceResult = truncateTitle(sentenceMatch[1]);
    if (sentenceResult) return sentenceResult;
  }
 
  // Priority 4: Take text up to first significant clause separator.
  const CLAUSE_SEPARATORS = [', ', '; ', ' — ', ' – ', ' - '] as const;
  for (const sep of CLAUSE_SEPARATORS) {
    const idx = cleaned.indexOf(sep, 30);
    if (idx > 0 && idx <= 140) {
      return cleaned.slice(0, idx).trim();
    }
  }
 
  // Final fallback: hard-cut at 120 chars on a word boundary.
  Eif (cleaned.length > 120) {
    const slice = cleaned.slice(0, 120);
    const lastSpace = slice.lastIndexOf(' ');
    if (lastSpace > 60) return slice.slice(0, lastSpace).trim();
  }
  return cleaned.length <= 140 ? cleaned : '';
}