All files / src/aggregator/clean strip-preamble.ts

95.23% Statements 20/21
80.76% Branches 21/26
100% Functions 1/1
100% Lines 19/19

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67                                22x                                         257x 257x   364x   257x 234x     23x 23x 135x 135x 112x   23x       23x 23x 23x 9x     23x 23x 23x    
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
 
/**
 * @module Aggregator/Clean/StripPreamble
 * @description Strip the operational metadata preamble (e.g.
 * `**Run:**` / `**Window:**` lines) that agent pipelines prepend to
 * artifacts before rendering.
 */
 
/**
 * Pattern matching an operational metadata line at the start of an artifact.
 * Examples: `**Run:** breaking-run-123`, `**Window:** 2026-04-24 00:00Z — 05:49Z`.
 * The pattern requires the line to start with `**<Word>**` followed by a colon
 * or whitespace so ordinary bold prose is not mistakenly treated as metadata.
 */
const METADATA_LINE_PATTERN = /^\*\*[A-Za-z][^*\n]*\*\*[:\s]/;
 
/**
 * Strip the operational metadata preamble that agent pipelines prepend to
 * artifacts. These are lines of the form `**Run:** …`, `**Window:** …`,
 * `**Methodology:** …`, etc., followed optionally by a standalone `---`
 * horizontal rule. They are agent-operational metadata that should not appear
 * in the published article.
 *
 * Algorithm:
 *  1. Skip leading blank lines (they don't count as metadata).
 *  2. If the first non-blank line does NOT match the metadata pattern, return
 *     the document unchanged (`lines: 0`).
 *  3. Otherwise consume all metadata lines and interspersed blank lines.
 *  4. If the next non-blank line is a standalone `---`, consume that too.
 *  5. Return the stripped Markdown and the count of lines removed.
 *
 * @param md - Markdown source (after banner/heading passes)
 * @returns `{ md, lines }` — stripped Markdown and number of lines removed
 */
export function stripArtifactMetadataPreamble(md: string): { md: string; lines: number } {
  const lines = md.split('\n');
  let i = 0;
 
  while (i < lines.length && (lines[i] ?? '').trim() === '') i++;
 
  if (i >= lines.length || !METADATA_LINE_PATTERN.test(lines[i] ?? '')) {
    return { md, lines: 0 };
  }
 
  let metaEnd = i;
  while (metaEnd < lines.length) {
    const line = lines[metaEnd] ?? '';
    if (METADATA_LINE_PATTERN.test(line) || line.trim() === '') {
      metaEnd++;
    } else {
      break;
    }
  }
 
  let scanAhead = metaEnd;
  while (scanAhead < lines.length && (lines[scanAhead] ?? '').trim() === '') scanAhead++;
  if (scanAhead < lines.length && /^\s*---\s*$/.test(lines[scanAhead] ?? '')) {
    metaEnd = scanAhead + 1;
  }
 
  const removed = metaEnd;
  const stripped = lines.slice(removed).join('\n').replace(/^\n+/, '');
  return { md: stripped, lines: removed };
}