Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 | 40x 40x 40x 40x 40x 40x 40x 40x 40x 39x 17x 17x 17x 124x 48x 48x 72x 72x 72x 3x 3x 69x 21x 21x 48x 48x 91x 91x 91x 20x 71x 71x 103x 103x 20x 20x 20x 20x 20x 4x 16x 83x 83x 83x 83x 44x 39x 39x 15x 15x 15x 15x 15x 24x 37x 37x 37x 37x 37x 37x 37x 37x 48x 48x 12x 12x 112x 112x 3x 3x 109x 1x 1x 108x 103x 40x 40x 40x 38x 38x 38x 38x 37x 37x 37x 37x 37x 37x 37x 37x 1x 40x 42x 40x 42x 40x 42x 40x 42x 40x 40x 3x 3x 3x 3x 3x 38x 38x 38x 38x 38x 38x 38x 38x 38x 38x 38x 4839x 4839x 138x 138x 120x 4839x 4839x 120x 4839x 38x 38x 38x 4839x 38x | // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
/**
* @module Aggregator/MarkdownRenderer
* @description Markdown-to-HTML renderer for the aggregated article.
*
* Uses `markdown-it` with a focused plugin stack:
* - `markdown-it-anchor` — slugged `id`s on every heading
* - `markdown-it-footnote` — footnote reference support for artifacts
* - `markdown-it-attrs` — `{.class #id}` suffixes for table/fence styling
* - `markdown-it-deflist` — definition lists in stakeholder artifacts
*
* A custom fence override transforms ```` ```mermaid ```` blocks into
* `<pre class="mermaid" role="img" aria-label="...">…</pre>` so the
* vendored client-side `mermaid.esm.min.mjs` (shipped under `js/vendor/`)
* can progressively enhance them. No network calls, no inline script,
* CSP `script-src 'self'` preserved.
*/
import MarkdownIt from 'markdown-it';
import anchor from 'markdown-it-anchor';
import footnote from 'markdown-it-footnote';
import attrs from 'markdown-it-attrs';
import deflist from 'markdown-it-deflist';
import type Token from 'markdown-it/lib/token.mjs';
/** Options controlling `renderMarkdown`. */
export interface RenderOptions {
/**
* Optional accessible label builder for mermaid figures. Receives the
* zero-based mermaid block index and the raw mermaid source; returns
* the `aria-label` used on the wrapping `<figure>`. Defaults to
* `"Mermaid diagram N"`.
*/
readonly mermaidLabel?: (index: number, body: string) => string;
}
/** Output from `renderMarkdown`. */
export interface RenderedMarkdown {
/** Full HTML body fragment (no `<html>` / `<head>` wrapper). */
readonly html: string;
/** Table-of-contents entries harvested from the heading stream. */
readonly toc: readonly TocEntry[];
/** Number of mermaid blocks rendered. */
readonly mermaidCount: number;
}
/** One entry in the generated table of contents. */
export interface TocEntry {
/** Heading level (2–6). */
readonly level: number;
/** Slugged id used as the fragment anchor. */
readonly slug: string;
/** Heading text (escaped for display). */
readonly text: string;
}
/**
* Build a preconfigured markdown-it instance. Exposed so callers (e.g.
* tests) can inspect plugin configuration without re-rendering.
*
* @returns Configured MarkdownIt instance with anchor, footnote, attrs,
* deflist, mermaid fence override, and table wrapping installed
*/
export function buildMarkdownIt(): MarkdownIt {
const md = new MarkdownIt({
html: true, // artifacts already contain hand-authored HTML wrappers
linkify: false, // avoid surprising auto-linking of plain text URLs
typographer: false, // keep exact punctuation
breaks: false,
});
md.use(anchor, {
level: [2, 3, 4, 5, 6],
permalink: anchor.permalink.headerLink({ safariReaderFix: true }),
slugify: slugify,
});
md.use(footnote);
md.use(attrs, { allowedAttributes: ['id', 'class'] });
md.use(deflist);
installMermaidFence(md);
installTableWrapper(md);
installImageLazyLoading(md);
return md;
}
/**
* Strip a leading YAML front matter block from a Markdown document. Generated
* `article.md` files are Jekyll-compatible, but the deterministic HTML
* renderer must render the body, not the metadata fence.
*
* @param markdown - Markdown with optional `---` front matter at byte 0
* @returns Markdown body with the front matter removed
*/
export function stripMarkdownFrontMatter(markdown: string): string {
if (!markdown.startsWith('---\n')) return markdown;
const end = markdown.indexOf('\n---\n', 4);
Iif (end === -1) return markdown;
return markdown.slice(end + 5).replace(/^\n+/, '');
}
/**
* Slugify a heading text into a stable URL fragment.
*
* @param text - Heading text (may contain unicode punctuation / marks)
* @returns Slug of up to 80 ASCII-ish characters, with dashes as separators
*/
export function slugify(text: string): string {
return (
text
.toLowerCase()
.normalize('NFKD')
// Strip combining diacritical marks (Unicode range U+0300..U+036F)
.replace(/[\u0300-\u036F]/g, '')
// Strip general punctuation and supplemental punctuation
.replace(/[\u2000-\u206F]/g, '')
.replace(/[\u2E00-\u2E7F]/g, '')
.replace(/[^\p{L}\p{N}\s-]/gu, '')
.replace(/\s+/g, '-')
.replace(/-+/g, '-')
.replace(/^-|-$/g, '')
.slice(0, 80)
);
}
/**
* Walk the leading lines of a mermaid block, skipping blank lines,
* single-line `%%…%%` comments, and (possibly multi-line)
* `%%{init: { … }}%%` directives, until we hit the first directive
* line. Returns `true` when that directive line is the
* `quadrantChart` keyword.
*
* Extracted from {@link sanitizeMermaidQuadrantChart} to keep the
* sanitizer's cognitive complexity below the project's lint cap.
*
* @param lines - Mermaid block split on `\n`
* @returns `true` when the block declares a quadrantChart
*/
function isQuadrantChartBlock(lines: readonly string[]): boolean {
let inInitDirective = false;
for (const line of lines) {
const trimmed = line.trim();
Iif (trimmed === '') continue;
if (inInitDirective) {
if (trimmed.endsWith('}}%%')) inInitDirective = false;
continue;
}
if (trimmed.startsWith('%%{')) {
if (!trimmed.endsWith('}}%%')) inInitDirective = true;
continue;
}
Iif (trimmed.startsWith('%%')) continue;
return /^quadrantChart\b/.test(trimmed);
}
return false;
}
/**
* Wrap an unquoted mermaid label in double quotes, escaping any
* embedded `\` and `"` so the lexer still sees a single string token.
* A label that is already fully quoted (matched leading and trailing
* `"`, length ≥ 2) is preserved byte-for-byte.
*
* Backslashes must be escaped **before** double quotes so an input
* containing `\` is not double-processed by the second pass.
*
* @param raw - Raw label text harvested by the per-line regex
* @returns Quoted label suitable for emission inside a quadrantChart
*/
function quoteMermaidLabel(raw: string): string {
const trimmed = raw.trim();
Iif (trimmed === '') return raw;
if (trimmed.length >= 2 && trimmed.startsWith('"') && trimmed.endsWith('"')) {
return trimmed;
}
const inner = trimmed.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
return `"${inner}"`;
}
/**
* Rewrite a single non-directive line inside a quadrantChart block,
* auto-quoting axis directives, quadrant labels, and data points. The
* `title` line is intentionally untouched because the mermaid v11
* lexer accepts arbitrary text after the `title` keyword.
*
* The regex literals use `-{2}>` instead of the literal three-byte
* `-->` token so CodeQL's "bad HTML filtering regexp" heuristic does
* not mistake the mermaid axis-arrow grammar for an HTML-comment-end
* sanitiser. The two patterns are byte-equivalent.
*
* @param line - One line from the mermaid block
* @returns Rewritten line, or the original when no shape matched
*/
function rewriteQuadrantChartLine(line: string): string {
let m = line.match(/^(\s*(?:x-axis|y-axis)\s+)(.+?)\s*-{2}>\s*(.+?)\s*$/);
if (m) {
// If the line already has a quoted label followed by a numeric
// axis-start (e.g. `x-axis "Probability" 0 --> 100`), leave it
// alone — re-quoting would swallow the numeric token into the
// label string and produce a broken `x-axis "\"…\" 0" --> "100"`.
const lhs = (m[2] ?? '').trim();
const rhs = (m[3] ?? '').trim();
const lhsHasQuotedLabel = /^"[^"]*"\s+\S/.test(lhs);
const rhsIsBareNumber = /^-?\d+$/.test(rhs) || /^-?\d+\.\d+$/.test(rhs);
if (lhsHasQuotedLabel && rhsIsBareNumber) {
return line;
}
return `${m[1]}${quoteMermaidLabel(m[2] ?? '')} --> ${quoteMermaidLabel(m[3] ?? '')}`;
}
m = line.match(/^(\s*(?:x-axis|y-axis)\s+)(.+?)\s*$/);
Iif (m && !/-{2}>/.test(m[2] ?? '')) {
return `${m[1]}${quoteMermaidLabel(m[2] ?? '')}`;
}
m = line.match(/^(\s*quadrant-[1-4]\s+)(.+?)\s*$/);
if (m) {
return `${m[1]}${quoteMermaidLabel(m[2] ?? '')}`;
}
m = line.match(/^(\s*)([^[\n]+?)\s*:\s*(\[\s*[\d.]+\s*,\s*[\d.]+\s*\])\s*$/);
if (m) {
const prefix = m[1] ?? '';
const label = m[2] ?? '';
const coords = m[3] ?? '';
Eif (!/^(?:x-axis|y-axis|quadrant-[1-4]|title|quadrantChart)\b/.test(label.trim())) {
return `${prefix}${quoteMermaidLabel(label)}: ${coords}`;
}
}
return line;
}
/**
* Decode the small set of HTML entities that Markdown authors (and
* upstream generators) occasionally pre-encode inside fenced mermaid
* blocks — typically & for & in political-group labels like
* S&D or Greens/EFA. Without this decode step, the subsequent
* escapeHtml pass would re-escape & to & and emit
* S&amp;D into the rendered HTML, which the Mermaid client
* library then renders verbatim instead of as S&D.
*
* Uses indexOf/split/join exclusively (no RegExp) to stay
* within CodeQL's safe-regex envelope. Only the canonical entity
* forms are decoded — anything more exotic (e.g. &) is left
* alone so we never accidentally swallow a literal that the author
* intended to keep encoded.
*
* @param content - Raw fenced-block content (post-sanitizeMermaidQuadrantChart)
* @returns Content with pre-encoded HTML entities normalised back to
* their literal characters, ready for a single escapeHtml.
*/
export function decodeMermaidPreEncodedEntities(content: string): string {
// Order matters: decode the named entities first (which all contain
// `&` followed by ASCII letters), then finally `&` itself so we
// don't double-decode `&lt;` -> `<`.
// Each replacement is a plain string `split(needle).join(replacement)`
// which is linear and trivially CodeQL-safe.
let out = content;
out = out.split('<').join('<');
out = out.split('>').join('>');
out = out.split('"').join('"');
out = out.split(''').join("'");
out = out.split(''').join("'");
out = out.split('&').join('&');
return out;
}
/**
* Auto-quote unquoted `quadrantChart` labels so the Mermaid v11 lexer
* accepts them. The Mermaid `quadrantChart` grammar treats unquoted
* labels as a restricted token class — em-dashes (`—`, U+2014),
* en-dashes (`–`, U+2013), ellipsis (`…`), parentheses, colons, and
* non-ASCII currency symbols (`€`) all trigger
* `Lexical error … Unrecognized text` and prevent the diagram from
* rendering. Sanitization is scoped to `quadrantChart` blocks only;
* other diagram types accept those characters in unquoted labels and
* are passed through unchanged.
*
* @param content - Raw mermaid fence body
* @returns The same content with `quadrantChart` labels auto-quoted;
* the input string is returned unchanged for non-quadrant
* diagrams or when no edits are required.
*/
export function sanitizeMermaidQuadrantChart(content: string): string {
const lines = content.split('\n');
if (!isQuadrantChartBlock(lines)) return content;
let directiveSpan = false;
return lines
.map((line) => {
const trimmed = line.trim();
if (directiveSpan) {
if (trimmed.endsWith('}}%%')) directiveSpan = false;
return line;
}
if (trimmed.startsWith('%%{') && !trimmed.endsWith('}}%%')) {
directiveSpan = true;
return line;
}
if (trimmed.startsWith('%%')) return line;
return rewriteQuadrantChartLine(line);
})
.join('\n');
}
/**
* Override the `fence` renderer so fenced `mermaid` blocks emit a
* `<pre class="mermaid">` wrapped in an accessible `<figure>`. Everything
* else falls back to the default renderer.
*
* Mermaid `quadrantChart` blocks are run through
* {@link sanitizeMermaidQuadrantChart} before HTML escaping so labels
* containing em-dashes or other Unicode punctuation render instead of
* leaking through as raw `<pre>` text.
*
* @param md - MarkdownIt instance to patch in-place
*/
function installMermaidFence(md: MarkdownIt): void {
const defaultFence =
md.renderer.rules.fence ??
((tokens, idx, opts, _env, self) => self.renderToken(tokens, idx, opts));
let mermaidIndex = 0;
md.renderer.rules.fence = (tokens, idx, opts, env, self) => {
const token = tokens[idx];
Iif (!token) return '';
const info = (token.info || '').trim().toLowerCase();
if (info === 'mermaid') {
const currentIndex = mermaidIndex++;
const env2 = env as { mermaidLabel?: RenderOptions['mermaidLabel'] };
const labelFn: RenderOptions['mermaidLabel'] =
env2.mermaidLabel ?? ((n) => `Mermaid diagram ${n + 1}`);
const label = md.utils.escapeHtml(labelFn(currentIndex, token.content));
const sanitized = sanitizeMermaidQuadrantChart(token.content);
const decoded = decodeMermaidPreEncodedEntities(sanitized);
const body = md.utils.escapeHtml(decoded);
return `<figure class="mermaid-figure" role="img" aria-label="${label}">\n<pre class="mermaid">${body}</pre>\n</figure>\n`;
}
return defaultFence(tokens, idx, opts, env, self);
};
}
/**
* Wrap every `<table>` in a `<div class="table-scroll">` for responsive
* horizontal overflow. The wrapper is announced as a region so assistive
* tech can surface the focus/scroll behaviour.
*
* @param md - MarkdownIt instance to patch in-place
*/
function installTableWrapper(md: MarkdownIt): void {
const defaultOpen =
md.renderer.rules.table_open ??
((tokens, idx, opts, _env, self) => self.renderToken(tokens, idx, opts));
const defaultClose =
md.renderer.rules.table_close ??
((tokens, idx, opts, _env, self) => self.renderToken(tokens, idx, opts));
md.renderer.rules.table_open = (tokens, idx, opts, env, self) =>
`<div class="table-scroll" role="region" tabindex="0">${defaultOpen(tokens, idx, opts, env, self)}`;
md.renderer.rules.table_close = (tokens, idx, opts, env, self) =>
`${defaultClose(tokens, idx, opts, env, self)}</div>`;
}
/**
* Add `loading="lazy"` and `decoding="async"` to every `<img>` rendered
* from Markdown syntax. Improves LCP/performance by deferring off-screen
* images and allowing async decode without blocking the main thread.
*
* @param md - MarkdownIt instance to patch in-place
*/
function installImageLazyLoading(md: MarkdownIt): void {
const defaultImage =
md.renderer.rules.image ??
((tokens, idx, opts, _env, self) => self.renderToken(tokens, idx, opts));
md.renderer.rules.image = (tokens, idx, opts, env, self) => {
const token = tokens[idx];
Eif (token) {
token.attrSet('loading', 'lazy');
token.attrSet('decoding', 'async');
}
return defaultImage(tokens, idx, opts, env, self);
};
}
/**
* Render aggregated Markdown into a sanitised HTML body fragment.
*
* @param markdown - Aggregated Markdown source produced by the aggregator
* @param options - Optional render hooks (e.g. custom mermaid aria-label)
* @returns {@link RenderedMarkdown} with HTML, TOC, and mermaid count
*/
export function renderMarkdown(markdown: string, options: RenderOptions = {}): RenderedMarkdown {
const md = buildMarkdownIt();
const env: { mermaidLabel?: RenderOptions['mermaidLabel'] } = {};
if (options.mermaidLabel) env.mermaidLabel = options.mermaidLabel;
const tokens = md.parse(stripMarkdownFrontMatter(markdown), env);
const toc = harvestToc(tokens);
const html = escapeUppercasePlaceholders(md.renderer.render(tokens, md.options, env));
const mermaidCount = countMermaidTokens(tokens);
return { html, toc, mermaidCount };
}
/**
* Escape non-HTML placeholder markers like `<N>` that appear in analysis prose.
* Lower-case tags are intentionally left untouched because artifacts may embed
* trusted HTML wrappers such as `<div>` and `<section>`.
*
* @param html - Rendered HTML fragment
* @returns HTML fragment with uppercase placeholder pseudo-tags escaped
*/
function escapeUppercasePlaceholders(html: string): string {
return html.replace(/<([A-Z][A-Z0-9_-]*)>/g, '<$1>');
}
/**
* Walk the token stream and collect heading entries for the TOC.
*
* @param tokens - Token stream produced by MarkdownIt's parser
* @returns Flat array of {@link TocEntry} items for H2–H6 headings
*/
function harvestToc(tokens: readonly Token[]): TocEntry[] {
const out: TocEntry[] = [];
for (let i = 0; i < tokens.length; i++) {
const token = tokens[i];
if (token?.type !== 'heading_open') continue;
const level = Number.parseInt(token.tag.slice(1), 10);
if (!Number.isFinite(level) || level < 2 || level > 6) continue;
const slug = typeof token.attrGet === 'function' ? token.attrGet('id') : null;
const inline = tokens[i + 1];
Iif (inline?.type !== 'inline') continue;
const text = (inline.content ?? '').trim();
out.push({ level, slug: slug ?? slugify(text), text });
}
return out;
}
/**
* Count fence tokens whose info string starts with `mermaid`.
*
* @param tokens - Token stream produced by MarkdownIt's parser
* @returns Number of mermaid fence tokens in the stream
*/
function countMermaidTokens(tokens: readonly Token[]): number {
let n = 0;
for (const token of tokens) {
if (token.type === 'fence' && (token.info ?? '').trim().toLowerCase() === 'mermaid') n++;
}
return n;
}
|