Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | 251x 251x 251x 251x 251x 251x 251x 251x 251x 251x 251x 251x 251x 251x | // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
/**
* @module Aggregator/Clean/Pipeline
* @description Compose the per-step cleaners into the public
* {@link cleanArtifact} entry point.
*
* Applied transformations (in order):
* 1. Strip leading YAML front-matter (`---\n…\n---\n`).
* 2. Strip per-artifact SPDX tags.
* 3. Strip ISMS/owner/classification banners (emoji rows, shields.io badges,
* `<p align="center">` blocks, and the separator `---` that usually
* follows them).
* 4. Remove the artifact's own H1 and demote every other heading by one
* level so the aggregate has a single H1.
* 5. Strip the operational metadata preamble (`**Run:**`, `**Window:**`, …).
* 6. Rewrite repo-relative links/images to absolute GitHub URLs.
* 7. Deduplicate mermaid fence bodies on a per-document basis.
* 8. Collapse runs of 3+ newlines and append a final `\n`.
*/
import { dedupMermaid } from './dedupe-mermaid.js';
import { demoteHeadings } from './demote-headings.js';
import { rewriteLinks } from './rewrite-links.js';
import { stripBanners } from './strip-banners.js';
import { stripFrontMatter } from './strip-frontmatter.js';
import { stripArtifactMetadataPreamble } from './strip-preamble.js';
import { stripSpdxTags } from './strip-spdx.js';
/** Options controlling artifact cleanup. */
export interface CleanArtifactOptions {
/**
* Repo-relative path of the artifact being cleaned (e.g.
* `analysis/daily/2026-01-15/breaking-run1/intelligence/synthesis-summary.md`).
* Used to resolve relative links/images against.
*/
readonly artifactRelPath: string;
/**
* Shared set of mermaid-body hashes seen so far in the aggregate document.
* Identical blocks are replaced with a cross-reference comment; the caller
* owns the set so it persists across artifacts.
*/
readonly seenMermaidHashes?: Set<string>;
}
/** Result of {@link cleanArtifact}. */
export interface CleanArtifactResult {
/** Cleaned Markdown ready to be concatenated into the aggregate. */
readonly markdown: string;
/** Headings removed (for debugging/telemetry). */
readonly strippedH1s: number;
/** Banner/metadata lines removed. */
readonly strippedBannerLines: number;
/** Operational metadata preamble lines removed (e.g. **Run:** / **Window:** blocks). */
readonly strippedMetaLines: number;
/** Mermaid blocks deduplicated as a reference to a previous occurrence. */
readonly dedupedMermaidBlocks: number;
}
/**
* Apply all cleanup passes and return the normalised Markdown plus
* simple counters for telemetry/tests.
*
* @param source - Raw Markdown contents of the artifact file
* @param options - Cleanup options (artifact path, shared mermaid dedup set)
* @returns {@link CleanArtifactResult} with the normalised Markdown
*/
export function cleanArtifact(source: string, options: CleanArtifactOptions): CleanArtifactResult {
const seen = options.seenMermaidHashes ?? new Set<string>();
let md = stripFrontMatter(source);
md = stripSpdxTags(md).md;
const { md: mdAfterBanners, lines: strippedBannerLines } = stripBanners(md);
md = mdAfterBanners;
const { md: mdAfterHeadings, h1Count } = demoteHeadings(md);
md = mdAfterHeadings;
const { md: mdAfterMeta, lines: strippedMetaLines } = stripArtifactMetadataPreamble(md);
md = mdAfterMeta;
md = rewriteLinks(md, options.artifactRelPath);
const { md: mdAfterMermaid, deduped } = dedupMermaid(md, seen);
md = mdAfterMermaid;
md = md.replace(/\n{3,}/g, '\n\n').trimEnd() + '\n';
return {
markdown: md,
strippedH1s: h1Count,
strippedBannerLines,
strippedMetaLines,
dedupedMermaidBlocks: deduped,
};
}
|