Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 | 21x 21x 21x 21x 1739x 1739x 769x 769x 769x 1751x 1751x 1751x 1751x 1749x 1747x 1749x 1749x 1747x 1745x 1739x 970x 21x | // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
/**
* Title-rejection predicates shared by the metadata resolver and the
* SEO validation gate.
*
* Every English `<title>` and `<meta description>` on the site is
* resolved from the run's `executive-brief.md` via
* {@link ./resolve-helpers.ts}. Bold-prose labels inside the brief
* (`**Strategic significance:** …`, `**Threat Level:** …`,
* `**Key Assumptions Check:** …`) and trailing ellipsis fragments
* from over-budget strong-prose paragraphs have leaked into the
* `<title>` surface (216-article audit, 2026-05-24). This module
* provides the canonical denylist + structural rejection rules so
* resolver and validator stay in lock-step.
*
* NEVER inline these predicates — duplicating the denylist makes the
* validator and resolver drift, which is exactly how the bad titles
* shipped in the first place.
*/
/**
* Bold-prose labels that appear inside `executive-brief.md` as
* `**Label:** …` lines. The priority-finding extractor was treating
* the bold label as a headline; the resolver now rejects these as
* usable titles.
*
* Keep entries lowercase and exact — matching is case-insensitive
* after trimming and stripping a trailing `:`, `…`, `.`.
*/
const SECTION_HEADER_DENYLIST: readonly string[] = Object.freeze([
'strategic significance',
'event description',
'key intelligence',
'threat level',
'close to adoption',
'convergence themes',
'convergence theme',
'key assumptions check',
'risk assessment',
'stakeholder map',
'intelligence summary',
'session overview',
'situation summary',
'priority analysis',
'priority intelligence items',
'priority intelligence item',
'lead story',
'bluf',
'tl;dr',
'60-second read',
'classification',
'confidence summary',
'methodological notes',
'source reliability assessment',
'corrections and caveats',
'forward look',
'top three action items',
'political landscape summary',
'external environment summary',
'coalition & bloc summary',
'coalition and bloc summary',
'week ahead',
'week in review',
'month ahead',
'month in review',
'year ahead',
'term outlook',
'quarter ahead',
'election cycle',
// single-noun outputs that occasionally surface from H2/H3 walks
'overview',
'background',
'context',
'analysis',
'summary',
'conclusion',
'recommendations',
]);
const SECTION_HEADER_SET: ReadonlySet<string> = new Set(SECTION_HEADER_DENYLIST);
/** Ellipsis at end of string (Unicode `…` or ASCII `...`). */
const ELLIPSIS_TAIL_RE = /(?:\u2026|\.\.\.)\s*$/u;
/**
* Adopted-text doc-ID (`TA-10-2026-0160`) — these are procedure
* identifiers, never editorial titles.
*/
const DOC_ID_RE = /^TA-\d+-\d{4}-\d{3,4}$/iu;
/**
* Detect a candidate that is really a complete sentence rather than a
* headline (e.g. `Routine inter-sessional day, no breaking signal.`,
* `EP10 enters the second half of its mandate with a structurally
* constrained but operational grand coalition.`).
*
* Gold-standard brief H1s never end with a period — they are
* noun-phrase headlines (`EU Parliament Year Ahead (May 2026 – May
* 2027)`, `EP Committee Reports · Week of 2026-05-14–21`). A trailing
* single `.` (NOT `…` and NOT `...`) on a ≥4-word candidate is the
* cleanest signal that we are looking at sentence prose leaked from a
* BLUF / lede paragraph rather than an editorial headline.
*
* @param value - Title candidate
* @returns `true` when the candidate looks like a complete sentence.
*/
function looksLikeFullSentence(value: string): boolean {
const trimmed = value.trim();
// Must end with exactly one period — `…` and `...` are caught by
// looksLikeEllipsisCut, and other terminal punctuation (`?`, `!`,
// `:`) is left to lower-priority filters.
if (!/[^.]\.\s*$/u.test(trimmed)) return false;
Iif (/\.\.\.\s*$|\u2026\s*$/u.test(trimmed)) return false;
const wordCount = trimmed.split(/\s+/u).length;
return wordCount >= 4;
}
/**
* `true` when the candidate is a bold-prose section header that
* leaked through the priority-finding extractor (e.g. `Strategic
* significance`, `Threat Level`).
*
* @param value - Title candidate
* @returns `true` when the candidate matches the section-header denylist.
*/
export function looksLikeSectionHeader(value: string): boolean {
Iif (!value) return false;
const normalised = value
.toLowerCase()
.replace(/[\u2026.:!?]+\s*$/u, '')
.replace(/^[*_\s]+/u, '')
.replace(/[*_\s]+$/u, '')
.trim();
Iif (!normalised) return false;
return SECTION_HEADER_SET.has(normalised);
}
/**
* `true` when the candidate ends with `…` or `...` (was truncated
* over the title budget).
*
* @param value - Title candidate
* @returns `true` when the candidate has a trailing ellipsis.
*/
export function looksLikeEllipsisCut(value: string): boolean {
return ELLIPSIS_TAIL_RE.test(value);
}
/**
* `true` when the candidate is a bare adopted-text doc-ID.
*
* @param value - Title candidate
* @returns `true` when the candidate matches the `TA-NN-YYYY-NNNN` shape.
*/
export function looksLikeDocId(value: string): boolean {
return DOC_ID_RE.test(value.trim());
}
/**
* Master rejection predicate. Returns the reason code (one of
* `section-header`, `ellipsis-cut`, `doc-id`, `sentence-fragment`)
* when the candidate should be rejected, or `null` when it is
* usable.
*
* @param value - Title candidate
* @returns Reason code, or `null` when the candidate is usable.
*/
export function findTitleRejectionReason(
value: string
): 'section-header' | 'ellipsis-cut' | 'doc-id' | 'sentence-fragment' | null {
Iif (!value) return null;
if (looksLikeEllipsisCut(value)) return 'ellipsis-cut';
if (looksLikeDocId(value)) return 'doc-id';
if (looksLikeSectionHeader(value)) return 'section-header';
if (looksLikeFullSentence(value)) return 'sentence-fragment';
return null;
}
/** Exposed for unit tests + the SEO validator. */
export const TITLE_REJECTION_DENYLIST: readonly string[] = SECTION_HEADER_DENYLIST;
|