Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 | 21x 21x 21x 9x 9x 18x 18x 171x 1x 1x 1x 1x 1x 9x 9x 9x 99x 9x 9x 14x 14x 98x 98x 98x 14x 22x 9x 9x 9x 9x 9x 14x 14x 14x 14x 14x 14x 9x | // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
/**
* @module Aggregator/Metadata/PriorityFindingCleaning
* @description Headline-cleaning helpers extracted from
* `priority-finding-highlight.ts` to keep that module under the
* 600-line drift-guard budget enforced by
* `test/unit/source-file-size.test.js`.
*
* Public entry point: {@link cleanPriorityHeadline} โ normalises a
* raw bold-title / heading string by stripping priority decorations
* (`๐ด CRITICAL โ `), editorial prefixes (`Trigger 1: `), trailing
* confidence markers (`๐ด CRITICAL`), and parenthesised tail metadata
* (`(TA-10-2026-0160, 2026-04-30)`).
*
* Bounded-context rules:
* - **Pure helpers** โ no I/O, no globals.
* - **Deterministic** โ same input always produces same output.
* - **Reusable** โ every helper accepts a plain string and returns a
* plain string; suitable for property-tests.
*/
import { stripInlineMarkdown } from './text-utils.js';
/**
* Leading priority-label tokens stripped by {@link cleanPriorityHeadline}
* (`๐ด CRITICAL โ Title` โ `Title`). Kept as a list to bypass the
* unsafe-regex lint by avoiding deep alternation in a single pattern.
*/
const PRIORITY_LABEL_TOKENS: readonly string[] = [
'CRITICAL',
'HIGH PRIORITY',
'HIGH',
'MEDIUM PRIORITY',
'MEDIUM',
'LOW PRIORITY',
'LOW',
'URGENT',
'ALERT',
'PRIORITY',
];
/**
* Trailing confidence-marker tokens stripped by
* {@link cleanPriorityHeadline}. Same rationale as
* {@link PRIORITY_LABEL_TOKENS}.
*/
const PRIORITY_TRAILING_TOKENS: readonly string[] = [
'CRITICAL',
'HIGH PRIORITY',
'HIGH',
'MEDIUM PRIORITY',
'MEDIUM',
'LOW PRIORITY',
'LOW',
];
/**
* Leading editorial-prefix tokens stripped by
* {@link cleanPriorityHeadline} (`Trigger 1: Title` โ `Title`).
*/
const PRIORITY_LEADING_PREFIX_TOKENS: readonly string[] = [
'Trigger',
'Dossier',
'Priority',
'Finding',
'Item',
'Highlight',
'Top',
'Story',
'Alert',
'Judgement',
'Judgment',
];
/**
* Strip a leading priority decoration (`๐ด `, `CRITICAL โ `) from a
* candidate headline. Extracted from {@link cleanPriorityHeadline} to
* keep cognitive complexity within budget.
*
* @param text - Candidate headline (already trimmed)
* @returns Headline with the leading decoration removed
*/
function stripPriorityLeadingDecoration(text: string): string {
let out = text;
for (let pass = 0; pass < 2; pass++) {
out = out.replace(/^[^\p{L}\p{N}]+/u, '').trim();
for (const token of PRIORITY_LABEL_TOKENS) {
if (out.toLowerCase().startsWith(token.toLowerCase())) {
const rest = out.slice(token.length).trim();
const sep = rest.match(/^[:โโ-]\s*(.+)$/u);
Eif (sep?.[1]) {
out = sep[1].trim();
break;
}
}
}
}
return out;
}
/**
* Strip a leading editorial prefix (`Trigger 1: `, `Dossier 2: `) and a
* stray leading ordinal (`1. `, `2.1 `) from a candidate headline.
*
* @param text - Candidate headline
* @returns Headline with the leading editorial decoration removed
*/
function stripPriorityLeadingPrefix(text: string): string {
let out = text;
for (const token of PRIORITY_LEADING_PREFIX_TOKENS) {
Eif (!out.toLowerCase().startsWith(token.toLowerCase())) continue;
const rest = out.slice(token.length);
const match = rest.match(/^\s+\d+\s*[:โโ-]\s*(.+)$/u);
if (match?.[1]) {
out = match[1];
break;
}
}
// Drop a stray leading "1. " / "2) " ordinal.
out = out.replace(/^\d+[.):ยท\s]\s*/u, '');
return out;
}
/**
* Strip a trailing confidence marker (`๐ด CRITICAL`, `๐ก MEDIUM`) from a
* candidate headline. Single pass โ caller invokes inside a fixed-point
* loop.
*
* @param text - Candidate headline
* @returns Headline with the trailing confidence marker removed
*/
function stripPriorityTrailingMarker(text: string): string {
let out = text;
for (const token of PRIORITY_TRAILING_TOKENS) {
const pattern = new RegExp(`\\s+[^\\p{L}\\p{N}\\s]?\\s*${token}\\s*$`, 'iu');
const next = out.replace(pattern, '');
Iif (next !== out) {
out = next;
break;
}
}
return out;
}
/**
* Strip the trailing parenthesised metadata that briefs append to every
* priority-finding name โ procedure codes, dates, committee tags. The
* regex is intentionally non-greedy so it removes only the LAST
* parenthesised group on the line.
*
* @param text - Headline or paragraph text
* @returns Text with the trailing `(โฆ)` stripped
*/
export function stripPriorityTailMetadata(text: string): string {
return text.replace(/\s*\([^()]{3,80}\)\s*$/u, '').trim();
}
/**
* Normalise a priority-finding headline: drop the
* `Trigger N:` / `Dossier N:` / leading-numeric prefix, strip trailing
* parenthesised metadata (`(TA-10-2026-0160, 2026-04-30)`,
* `(ITRE/ENVI)`), and trim residual punctuation. The result is a
* headline-shaped string suitable for `<title>` use.
*
* @param raw - Raw bold-title or heading text
* @returns Cleaned headline (may be empty after stripping)
*/
export function cleanPriorityHeadline(raw: string): string {
let text = stripInlineMarkdown(raw).trim();
text = stripPriorityLeadingDecoration(text);
text = stripPriorityLeadingPrefix(text);
// Trailing cleanup runs in a fixed-point loop so combined patterns
// like "Title (Confidence, 80%): ๐ด" collapse all the way down to
// "Title".
let previous = '';
while (previous !== text) {
previous = text;
text = stripPriorityTrailingMarker(text);
text = stripPriorityTailMetadata(text);
// Drop a single trailing emoji left after metadata stripping.
text = text.replace(/\s+[^\p{L}\p{N}\s]+\s*$/u, '');
// Drop trailing colons / dashes left over.
text = text.replace(/[\s:โโ-]+$/u, '');
text = text.trim();
}
return text;
}
|