All files / src/aggregator reader-friendly-transform.ts

96.19% Statements 101/105
76.47% Branches 52/68
100% Functions 17/17
98.86% Lines 87/88

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216                        17x   17x               17x                       17x                                                     290x 290x 290x 290x 47102x 47102x 23406x 23406x   23696x 23695x 23695x 23695x 23695x 23695x 16844x 16844x   23695x   290x       290x 290x 290x   290x 1450x 1450x   290x 1x 1x   290x 1x 1x   290x                   23406x 23406x 23406x 23406x       23695x             23695x     2x 2x 2x 2x 2x 2x 2x           23695x     2x 2x 2x     23695x 3x 3x         16844x 3x 3x         16844x 16844x 84220x 84182x 84182x 3x 3x   16844x       2x       5x 5x 5x     5x       5x 5x 5x 4x 4x       290x   289x   290x 54x   54x     290x 54x 54x 54x 54x       54x    
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
 
/**
 * @module Aggregator/ReaderFriendlyTransform
 * @description Reader-facing post-processing transform for rendered article
 * HTML. Expands first-use jargon, links EP adopted-text references, and
 * contextualizes internal pipeline markers for public output.
 */
 
import { escapeHTML } from '../utils/file-utils.js';
 
const GLOSSARY_ID = 'reader-friendly-glossary';
 
const ACRONYM_EXPANSIONS: Readonly<Record<string, string>> = {
  BLUF: 'Bottom Line Up Front',
  KIF: 'Key Intelligence Findings',
  OIR: 'Own-Initiative Resolution',
  INI: 'Own-initiative procedure',
  EPCA: 'European Parliament Committee Analysis',
};
 
const WEP_PHRASES: Readonly<Record<string, string>> = {
  ALMOSTCERTAINLY: 'almost certainly',
  HIGHLYLIKELY: 'highly likely',
  VERYLIKELY: 'very likely',
  LIKELY: 'likely',
  PROBABLE: 'probably',
  POSSIBLE: 'possibly',
  UNLIKELY: 'unlikely',
  HIGHLYUNLIKELY: 'highly unlikely',
  VERYUNLIKELY: 'very unlikely',
};
 
const ADMIRALTY_LABELS: Readonly<Record<string, string>> = {
  A1: 'Source: Official EP records (highest reliability)',
  A2: 'Source: Verified institutional reporting (very high reliability)',
  B1: 'Source: Corroborated reporting (high reliability)',
  B2: 'Source: Corroborated reporting (good reliability)',
  B3: 'Source: Multi-source reporting (moderate reliability)',
  'B2-B3': 'Source: Multi-source reporting (moderate reliability)',
  C1: 'Source: Partially corroborated reporting (medium reliability)',
  C2: 'Source: Partially corroborated reporting (medium reliability)',
  C3: 'Source: Limited corroboration (lower reliability)',
};
 
interface TransformState {
  readonly expandedAcronyms: Set<string>;
  readonly expandedWepBands: Set<string>;
  readonly expandedAdmiraltyGrades: Set<string>;
  insideAnchor: boolean;
  insideAbbr: boolean;
}
 
/**
 * Transform rendered article HTML into a reader-friendlier presentation.
 *
 * @param html - Rendered HTML body fragment
 * @returns Reader-friendly HTML body fragment
 */
export function applyReaderFriendlyTransform(html: string): string {
  const state = createInitialState(html);
  const withGlossary = injectReaderGlossary(html);
  const parts = withGlossary.split(/(<[^>]+>)/g);
  for (let i = 0; i < parts.length; i++) {
    const part = parts[i] ?? '';
    if (part.startsWith('<')) {
      updateTagContext(part, state);
      continue;
    }
    if (state.insideAbbr) continue;
    let text = part;
    text = contextualizeInternalMarkers(text);
    text = replaceFirstWepBand(text, state);
    text = replaceFirstAdmiraltyGrade(text, state);
    if (!state.insideAnchor) {
      text = linkEpAdoptedTextRefs(text);
      text = injectFirstUseAbbr(text, state.expandedAcronyms);
    }
    parts[i] = text;
  }
  return parts.join('');
}
 
function createInitialState(html: string): TransformState {
  const expandedAcronyms = new Set<string>();
  const expandedWepBands = new Set<string>();
  const expandedAdmiraltyGrades = new Set<string>();
 
  for (const acronym of Object.keys(ACRONYM_EXPANSIONS)) {
    const matcher = new RegExp(`<abbr[^>]*>\\s*${acronym}\\s*</abbr>`, 'iu');
    if (matcher.test(html)) expandedAcronyms.add(acronym);
  }
  for (const match of html.matchAll(/data-wep-band="([A-Z]+)"/g)) {
    const key = match[1];
    Eif (key) expandedWepBands.add(key);
  }
  for (const match of html.matchAll(/data-admiralty-grade="([A-Z0-9-]+)"/g)) {
    const key = match[1];
    Eif (key) expandedAdmiraltyGrades.add(key);
  }
  return {
    expandedAcronyms,
    expandedWepBands,
    expandedAdmiraltyGrades,
    insideAnchor: false,
    insideAbbr: false,
  };
}
 
function updateTagContext(tag: string, state: TransformState): void {
  if (/^<a\b/i.test(tag)) state.insideAnchor = true;
  if (/^<\/a\b/i.test(tag)) state.insideAnchor = false;
  if (/^<abbr\b/i.test(tag)) state.insideAbbr = true;
  if (/^<\/abbr\b/i.test(tag)) state.insideAbbr = false;
}
 
function contextualizeInternalMarkers(input: string): string {
  return input
    .replace(/\bdegraded-feeds mode\b/giu, 'limited-source mode')
    .replace(/\bdegraded-feeds\b/giu, 'limited-source')
    .replace(/\bKB-ESTIMATE\b/gu, 'analysis estimate');
}
 
function replaceFirstWepBand(input: string, state: TransformState): string {
  return input.replace(
    /\bWEP:\s*([A-Za-z][A-Za-z -]+?)\s*\(([^)]+)\)/giu,
    (match, rawBand, range) => {
      const band = String(rawBand ?? '').trim();
      const key = normalizeBandKey(band);
      const phrase = WEP_PHRASES[key];
      Iif (!phrase || state.expandedWepBands.has(key)) return match;
      state.expandedWepBands.add(key);
      const wepRange = String(range ?? '').trim();
      return `<span class="reader-friendly-wep" data-wep-band="${escapeHTML(key)}">${escapeHTML(phrase)} (WEP: ${escapeHTML(wepRange)})</span>`;
    }
  );
}
 
function replaceFirstAdmiraltyGrade(input: string, state: TransformState): string {
  const withRanges = input.replace(
    /\bAdmiralty\s+([A-F])\s*([1-6])\s*-\s*([A-F])?\s*([1-6])\b/giu,
    (match, leftClass, leftScore, rightClass, rightScore) => {
      const effectiveRightClass = String(rightClass ?? '').trim() || String(leftClass ?? '');
      const key = `${String(leftClass ?? '').toUpperCase()}${String(leftScore ?? '')}-${effectiveRightClass.toUpperCase()}${String(rightScore ?? '')}`;
      return replaceAdmiraltyWithLabel(match, key, state);
    }
  );
  return withRanges.replace(/\bAdmiralty\s+([A-F])\s*([1-6])\b/giu, (match, cls, score) => {
    const key = `${String(cls ?? '').toUpperCase()}${String(score ?? '')}`;
    return replaceAdmiraltyWithLabel(match, key, state);
  });
}
 
function linkEpAdoptedTextRefs(input: string): string {
  return input.replace(/\b(TA-(\d+)-(\d+)-(\d+))\b/g, (_match, full, term, year, serial) => {
    const href = `https://www.europarl.europa.eu/doceo/document/TA-${term}-${year}-${serial}_EN.html`;
    return `<a href="${href}" rel="noopener external" target="_blank">${full}</a>`;
  });
}
 
function injectFirstUseAbbr(input: string, expandedAcronyms: Set<string>): string {
  let text = input;
  for (const [acronym, full] of Object.entries(ACRONYM_EXPANSIONS)) {
    if (expandedAcronyms.has(acronym)) continue;
    const matcher = new RegExp(`\\b${acronym}\\b`, 'u');
    if (!matcher.test(text)) continue;
    expandedAcronyms.add(acronym);
    text = text.replace(matcher, `<abbr title="${escapeHTML(full)}">${escapeHTML(acronym)}</abbr>`);
  }
  return text;
}
 
function normalizeBandKey(raw: string): string {
  return raw.replace(/[^A-Za-z]/g, '').toUpperCase();
}
 
function normalizeAdmiraltyKey(raw: string): string {
  const compact = raw.toUpperCase().replace(/\s+/g, '');
  const impliedRange = compact.match(/^([A-F])([1-6])-([1-6])$/);
  Iif (impliedRange) {
    return `${impliedRange[1]}${impliedRange[2]}-${impliedRange[1]}${impliedRange[3]}`;
  }
  return compact;
}
 
function replaceAdmiraltyWithLabel(match: string, key: string, state: TransformState): string {
  const normalized = normalizeAdmiraltyKey(key);
  const label = ADMIRALTY_LABELS[normalized];
  if (!label || state.expandedAdmiraltyGrades.has(normalized)) return match;
  state.expandedAdmiraltyGrades.add(normalized);
  return `<span class="reader-friendly-admiralty" data-admiralty-grade="${escapeHTML(normalized)}">${escapeHTML(label)}</span>`;
}
 
function injectReaderGlossary(html: string): string {
  if (html.includes(`id="${GLOSSARY_ID}"`)) return html;
  const hasGuideSection =
    html.indexOf('id="reader-intelligence-guide"') !== -1 ||
    html.indexOf("id='reader-intelligence-guide'") !== -1;
  if (!hasGuideSection) return html;
  const glossary = buildReaderGlossary();
  const headingAnchor =
    html.indexOf('id="reader-intelligence-guide-heading"') !== -1
      ? html.indexOf('id="reader-intelligence-guide-heading"')
      : html.indexOf("id='reader-intelligence-guide-heading'");
  Iif (headingAnchor === -1) return `${glossary}\n${html}`;
  const headingClose = html.indexOf('</h2>', headingAnchor);
  Iif (headingClose === -1) return `${glossary}\n${html}`;
  const insertAt = headingClose + '</h2>'.length;
  return `${html.slice(0, insertAt)}\n${glossary}${html.slice(insertAt)}`;
}
 
function buildReaderGlossary(): string {
  return `<details id="${GLOSSARY_ID}" class="reader-friendly-glossary"><summary>How to read this analysis</summary><p>This article uses confidence and source-quality notation. The guide below translates specialist shorthand into plain-English wording for general readers.</p><ul><li><strong>Source confidence:</strong> Admiralty grades are shown in reader-friendly text on first use.</li><li><strong>Probability language:</strong> WEP bands are translated to phrases like “likely” or “almost certainly”.</li><li><strong>Acronyms:</strong> first uses are expanded with abbreviations for accessibility.</li></ul></details>`;
}