Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | 112x 112x 112x 112x 112x 131x 21x 21x 110x 112x 108x 132x 132x 105x 3x 183x 105x 105x 105x 2x 103x 103x 103x | // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
/**
* @module Generators/PoliticalIntelligence/Markdown
* @description Pure parsing utilities for analysis Markdown files.
* Lifted out of `political-intelligence.ts` so the parsing logic
* (emoji-stripping, H1 extraction, stem humanization) can be
* unit-tested in isolation and reused by future renderers (e.g.
* sitemap entries, RSS descriptions, news-indexes meta-builders).
*
* **No I/O imports** other than `fs`/`path` for the single
* file-reading helper {@link parseMarkdownMeta}; everything else is
* pure string manipulation.
*/
import fs from 'fs';
/**
* Strip a leading emoji token (and trailing whitespace) from a heading
* line, repeatedly, so headings like `π β οΈ Risk Scoring` become `Risk
* Scoring`.
*
* The implementation peels the string character-by-character via
* `String.prototype[Symbol.iterator]` to correctly handle astral-plane
* pictographics, VS-16 (`\uFE0F`), and ZWJ sequences β without the
* nested quantifier patterns that would trigger
* `security/detect-unsafe-regex`.
*
* @param text - Heading text (without the leading `# `)
* @returns Trimmed text with any leading emoji tokens removed
*/
export function stripLeadingEmoji(text: string): string {
const isPictographic = /\p{Extended_Pictographic}/u;
const isModifier = /[\uFE0F\u200D]/u;
const chars = [...text]; // iterates by Unicode code point
let i = 0;
for (const ch of chars) {
if (isPictographic.test(ch) || isModifier.test(ch) || /\s/.test(ch)) {
i++;
continue;
}
break;
}
return chars.slice(i).join('').trim();
}
/**
* Extract the first `# H1` heading from a list of lines.
*
* @param lines - Markdown source split on newlines
* @param fallback - Value returned when no H1 is found
* @returns Extracted heading text or the fallback
*/
export function extractH1Title(lines: string[], fallback: string): string {
for (const line of lines) {
const h1 = /^#\s+(.+?)\s*$/.exec(line);
if (h1?.[1]) {
return stripLeadingEmoji(h1[1]);
}
}
return fallback;
}
/**
* Humanize a filename stem (e.g. `per-artifact-methodologies` β
* `Per Artifact Methodologies`).
*
* Replaces dashes/underscores with spaces and Title-Cases each word.
* Used as a fallback when a Markdown file does not provide an H1.
*
* @param stem - Filename stem to humanize
* @returns Title-cased stem with dashes/underscores replaced by spaces
*/
export function humanize(stem: string): string {
return stem.replace(/[-_]+/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
}
/**
* Extract a title and short description from the top of a Markdown
* file. Uses the first H1 (`# β¦`) line as title (falling back to a
* humanized stem).
*
* The `description` field is intentionally left **empty**: for the
* political-intelligence index we use a curated per-file, per-language
* description table (`getCuratedDescription` in
* `political-intelligence-descriptions.ts`) instead of scraping the
* first paragraph of each Markdown file. Scraping proved fragile β it
* leaked document-metadata headers (`π Document Owner: CEO | π
* Versionβ¦`) and template separators (`---`) into the rendered cards.
* Leaving it empty here forces the renderer to go through the curated
* table.
*
* @param fullPath - Absolute path to a Markdown file
* @param stem - Filename stem used as title fallback
* @returns `{ title, description }` β description is always `''`
*/
export function parseMarkdownMeta(
fullPath: string,
stem: string
): { title: string; description: string } {
const fallbackTitle = humanize(stem);
let content: string;
try {
content = fs.readFileSync(fullPath, 'utf-8');
} catch {
return { title: fallbackTitle, description: '' };
}
const lines = content.split(/\r?\n/);
const title = extractH1Title(lines, fallbackTitle);
return { title, description: '' };
}
|