Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | 24x 25567x 5581x 370329x 489x 489x 5205x 5581x 5581x 5581x 5205x 5205x 1112x 1112x 1112x 1112x 839x 839x 3915x 3915x 78x 761x 761x 761x 761x 1x | // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
/**
* @module Aggregator/Metadata/BriefingHighlightHeadline
* @description Headline derivation primitives extracted from
* {@link briefing-highlight.ts} so that module stays below the 600-line
* drift-guard cap (see `test/unit/source-file-size.test.js`). Pure leaf
* module — depends only on {@link truncateTitle} from `text-utils.js`
* and the {@link stripTradecraftLabels} helper from
* {@link briefing-highlight-sections.ts}.
*
* Headline derivation kicks in when a brief's `## Strategic Intelligence
* Summary` block has *no* `### Sub-section` heading underneath it — the
* resolver falls back to the section's first prose paragraph and we
* mine it for a journalist-quality top-line.
*/
import { truncateTitle } from './text-utils.js';
import { stripTradecraftLabels } from './briefing-highlight-sections.js';
/**
* Patterns that indicate a "news hook" — the most compelling claim in a
* paragraph. Journalist editors call this the "nut graf" or "top line."
* {@link extractNewsHookSentence} returns the first sentence that
* matches any of these signals.
*/
const NEWS_HOOK_PATTERNS: readonly RegExp[] = [
/\blandmark\b/i,
/\bmost (?:significant|consequential|ambitious|contentious|comprehensive)\b/i,
/\bunprecedented\b/i,
/\bhistoric(?:ally)?\b/i,
/\bfirst[\s-](?:ever|time)\b/i,
/\boverhaul\b/i,
/\breshape[sd]?\b/i,
/\brecord[\s-]/i,
/\bsweeping\b/i,
/\bbreakthrough\b/i,
/\bparadox\b/i,
/\bgame[\s-]chang/i,
/\bturning[\s-]point\b/i,
/\bcrisis\b/i,
/\bshowdown\b/i,
/\bfracture[sd]?\b/i,
];
/**
* Extract the most newsworthy sentence from a paragraph. Looks for
* sentences containing strong editorial signals (superlatives, novelty
* claims, dramatic verbs) rather than always taking the first sentence
* which is typically bland context-setting.
*
* @param paragraph - Cleaned paragraph text
* @returns The most compelling sentence, or '' if none found
*/
function extractNewsHookSentence(paragraph: string): string {
// Split into sentences (handles ". ", "! ", "? " boundaries — plus
// CJK 。!? and Arabic ؟ which have no trailing space).
const sentences = paragraph.split(/(?<=[.!?])\s+|(?<=[。!?؟])/).filter((s) => s.length > 20);
// Find the first sentence with a news hook signal
for (const sentence of sentences) {
if (NEWS_HOOK_PATTERNS.some((re) => re.test(sentence))) {
const result = truncateTitle(sentence);
if (result) return result;
}
}
return '';
}
/**
* Derive a usable headline from a paragraph when no explicit `### …`
* sub-heading is available. Uses a journalist's editorial hierarchy:
*
* 1. Find the sentence with the strongest news hook (superlatives, novelty)
* 2. Fall back to the first sentence via `truncateTitle`
* 3. Extract a clause at a natural boundary (comma, semicolon, dash)
* 4. Hard-cut at word boundary as last resort
*
* @param paragraph - Source paragraph (already normalized)
* @returns Headline string, or `''` when no usable clause can be derived
*/
export function deriveHeadlineFromParagraph(paragraph: string): string {
// Strip tradecraft labels before headline derivation.
const cleaned = stripTradecraftLabels(paragraph);
// Priority 1: Find the most newsworthy sentence (superlatives, drama).
const newsHook = extractNewsHookSentence(cleaned);
if (newsHook) return newsHook;
// Priority 2: First sentence via truncateTitle.
const direct = truncateTitle(cleaned);
if (direct) return direct;
// Priority 3: Extract the first sentence and try truncateTitle.
// Recognise CJK 。!? and Arabic ؟ in addition to Western . ! ?.
const sentenceMatch = /^(.*?(?:[.!?](?=\s|$)|[。!?؟]))/.exec(cleaned);
Eif (sentenceMatch?.[1]) {
const sentenceResult = truncateTitle(sentenceMatch[1]);
if (sentenceResult) return sentenceResult;
}
// Priority 4: Take text up to first significant clause separator.
const CLAUSE_SEPARATORS = [', ', '; ', ' — ', ' – ', ' - '] as const;
for (const sep of CLAUSE_SEPARATORS) {
const idx = cleaned.indexOf(sep, 30);
if (idx > 0 && idx <= 140) {
return cleaned.slice(0, idx).trim();
}
}
// Final fallback: hard-cut at 120 chars on a word boundary.
Eif (cleaned.length > 120) {
const slice = cleaned.slice(0, 120);
const lastSpace = slice.lastIndexOf(' ');
if (lastSpace > 60) return slice.slice(0, lastSpace).trim();
}
return cleaned.length <= 140 ? cleaned : '';
}
|