political-intelligence markdown.ts

100% Statements 24/24
100% Branches 7/7
100% Functions 5/5
100% Lines 23/23
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112x
112x
112x
112x
112x
131x
21x
21x
 
110x
 
112x
 
 
 
 
 
 
 
 
 
 
108x
132x
132x
105x
 
 
3x
 
 
 
 
 
 
 
 
 
 
 
 
 
183x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105x
 
105x
105x
 
2x
 
103x
103x
103x
 
  // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
 
/**
 * @module Generators/PoliticalIntelligence/Markdown
 * @description Pure parsing utilities for analysis Markdown files.
 * Lifted out of `political-intelligence.ts` so the parsing logic
 * (emoji-stripping, H1 extraction, stem humanization) can be
 * unit-tested in isolation and reused by future renderers (e.g.
 * sitemap entries, RSS descriptions, news-indexes meta-builders).
 *
 * **No I/O imports** other than `fs`/`path` for the single
 * file-reading helper {@link parseMarkdownMeta}; everything else is
 * pure string manipulation.
 */
 
import fs from 'fs';
 
/**
 * Strip a leading emoji token (and trailing whitespace) from a heading
 * line, repeatedly, so headings like `🚀 ⚠️ Risk Scoring` become `Risk
 * Scoring`.
 *
 * The implementation peels the string character-by-character via
 * `String.prototype[Symbol.iterator]` to correctly handle astral-plane
 * pictographics, VS-16 (`\uFE0F`), and ZWJ sequences — without the
 * nested quantifier patterns that would trigger
 * `security/detect-unsafe-regex`.
 *
 * @param text - Heading text (without the leading `# `)
 * @returns Trimmed text with any leading emoji tokens removed
 */
export function stripLeadingEmoji(text: string): string {
  const isPictographic = /\p{Extended_Pictographic}/u;
  const isModifier = /[\uFE0F\u200D]/u;
  const chars = [...text]; // iterates by Unicode code point
  let i = 0;
  for (const ch of chars) {
    if (isPictographic.test(ch) || isModifier.test(ch) || /\s/.test(ch)) {
      i++;
      continue;
    }
    break;
  }
  return chars.slice(i).join('').trim();
}
 
/**
 * Extract the first `# H1` heading from a list of lines.
 *
 * @param lines - Markdown source split on newlines
 * @param fallback - Value returned when no H1 is found
 * @returns Extracted heading text or the fallback
 */
export function extractH1Title(lines: string[], fallback: string): string {
  for (const line of lines) {
    const h1 = /^#\s+(.+?)\s*$/.exec(line);
    if (h1?.[1]) {
      return stripLeadingEmoji(h1[1]);
    }
  }
  return fallback;
}
 
/**
 * Humanize a filename stem (e.g. `per-artifact-methodologies` →
 * `Per Artifact Methodologies`).
 *
 * Replaces dashes/underscores with spaces and Title-Cases each word.
 * Used as a fallback when a Markdown file does not provide an H1.
 *
 * @param stem - Filename stem to humanize
 * @returns Title-cased stem with dashes/underscores replaced by spaces
 */
export function humanize(stem: string): string {
  return stem.replace(/[-_]+/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
}
 
/**
 * Extract a title and short description from the top of a Markdown
 * file. Uses the first H1 (`# …`) line as title (falling back to a
 * humanized stem).
 *
 * The `description` field is intentionally left **empty**: for the
 * political-intelligence index we use a curated per-file, per-language
 * description table (`getCuratedDescription` in
 * `political-intelligence-descriptions.ts`) instead of scraping the
 * first paragraph of each Markdown file. Scraping proved fragile — it
 * leaked document-metadata headers (`📋 Document Owner: CEO | 📄
 * Version…`) and template separators (`---`) into the rendered cards.
 * Leaving it empty here forces the renderer to go through the curated
 * table.
 *
 * @param fullPath - Absolute path to a Markdown file
 * @param stem - Filename stem used as title fallback
 * @returns `{ title, description }` — description is always `''`
 */
export function parseMarkdownMeta(
  fullPath: string,
  stem: string
): { title: string; description: string } {
  const fallbackTitle = humanize(stem);
  let content: string;
  try {
    content = fs.readFileSync(fullPath, 'utf-8');
  } catch {
    return { title: fallbackTitle, description: '' };
  }
  const lines = content.split(/\r?\n/);
  const title = extractH1Title(lines, fallbackTitle);
  return { title, description: '' };
}