Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | 22x 22x 728x 5708x 499x 254x 254x 254x 254x 254x 728x 728x 229x 229x 229x 229x 499x 245x 245x 254x 33x 33x 254x 254x 34x | // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
/**
* @module Aggregator/Clean/StripBanners
* @description Drop ISMS/owner/classification banners (emoji rows,
* shields.io badges, `<p align="center">` blocks) from the head of an
* artifact, plus the `---` separator that typically follows them.
*/
/**
* Regex patterns identifying banner / document-owner / shields.io / center-pic
* lines that clutter the aggregate. All are line-level patterns; the caller
* applies them after front-matter strip.
*/
const BANNER_LINE_PATTERNS: readonly RegExp[] = [
/^\s*<p\s+align="center">/i,
/^\s*<\/p>\s*$/i,
/^\s*<img\s+[^>]{0,200}hack23\.com\/icon-/i,
/^\s*<h1\s+align="center">/i,
/^\s*<\/h1>\s*$/i,
/^\s*<a\s+href="#"><img\s+src="https:\/\/img\.shields\.io\//i,
/^\s*\*\*\s*📋\s*Document Owner/i,
/^\s*\*\*\s*🔄\s*Review Cycle/i,
/^\s*\*\*\s*🏢\s*Owner/i,
/^\s*<strong>\s*(?:📋|🔄|🏢)/i,
];
/**
* Line-level matcher for a standalone horizontal rule. Used to drop the
* `---` separator that usually follows the banner block.
*/
const HR_LINE = /^\s*---\s*$/;
/**
* Return true when the line should be stripped as banner content.
*
* @param line - Single line of Markdown
* @returns `true` if the line matches any banner pattern
*/
function isBannerLine(line: string): boolean {
for (const p of BANNER_LINE_PATTERNS) {
if (p.test(line)) return true;
}
return false;
}
/**
* Drop banner/metadata blocks from the head of the document. Rules:
* - A run of banner lines (contiguous, or separated only by blank lines) is
* removed. A trailing `---` horizontal rule immediately after the banner
* run is also removed.
* - Stops scanning as soon as we hit a line that looks like real content
* (headings, prose, tables, fences) that isn't a banner or blank.
*
* @param md - Raw Markdown source
* @returns `{ md, lines }` — stripped Markdown and count of removed lines
*/
export function stripBanners(md: string): { md: string; lines: number } {
const lines = md.split('\n');
let i = 0;
let bannerEnd = 0;
let stripped = 0;
while (i < lines.length) {
const line = lines[i] ?? '';
if (isBannerLine(line)) {
bannerEnd = i + 1;
stripped++;
i++;
continue;
}
if (line.trim() === '') {
i++;
continue;
}
if (bannerEnd > 0 && HR_LINE.test(line)) {
bannerEnd = i + 1;
stripped++;
}
break;
}
if (bannerEnd === 0) return { md, lines: 0 };
return { md: lines.slice(bannerEnd).join('\n').replace(/^\n+/, ''), lines: stripped };
}
|