All files / src/generators/shared html-escape.ts

100% Statements 21/21
100% Branches 18/18
100% Functions 5/5
100% Lines 20/20

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117                                                        4x                     3x                     11x 271x 271x 271x                       3x     8x                               11x 3x     8x 8x   2x   6x 2x   4x                         8x   8x 3x   5x    
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
 
/**
 * @module Generators/Shared/HtmlEscape
 * @description Centralized HTML/XML escaping utilities for the generation
 * layer. Wraps the existing `escapeHTML` from `utils/file-utils.ts` and
 * `escapeXML` from `sitemap/xml-utils.ts` with branded-type producers so
 * downstream code can prove at compile-time that a string has been
 * sanitized before interpolation.
 *
 * This module is the **single gateway** for creating branded
 * {@link SafeHtmlString} and {@link SafeXmlString} values. Template code
 * should accept only these types — never raw `string` — to prevent XSS.
 */
 
import { escapeHTML } from '../../utils/file-utils.js';
import { escapeXML } from '../sitemap/xml-utils.js';
import type { AbsoluteUrl, RelativeFilePath, SafeHtmlString, SafeXmlString } from './types.js';
 
/**
 * HTML-entity-escape a raw string and brand the result as safe for HTML
 * interpolation. Escapes `&`, `<`, `>`, `"`, and `'`.
 *
 * @param raw - Untrusted string (user input, file content, etc.)
 * @returns Branded {@link SafeHtmlString} safe for use in HTML templates
 */
export function toSafeHtml(raw: string): SafeHtmlString {
  return escapeHTML(raw) as SafeHtmlString;
}
 
/**
 * XML-entity-escape a raw string and brand the result as safe for XML
 * interpolation. Escapes `&`, `<`, `>`, `"`, and `'`.
 *
 * @param raw - Untrusted string (user input, file content, etc.)
 * @returns Branded {@link SafeXmlString} safe for use in XML templates
 */
export function toSafeXml(raw: string): SafeXmlString {
  return escapeXML(raw) as SafeXmlString;
}
 
/**
 * Test whether a string contains characters unsafe inside an HTML attribute
 * value — any of these in a URL would allow attribute breakout / injection.
 *
 * @param value - The string to test
 * @returns `true` if the string contains unsafe characters
 */
function hasUnsafeAttrChars(value: string): boolean {
  for (let i = 0; i < value.length; i++) {
    const code = value.charCodeAt(i);
    const ch = value[i];
    if (
      ch === '"' ||
      ch === "'" ||
      ch === '<' ||
      ch === '>' ||
      code <= 0x1f ||
      code === 0x7f ||
      ch === ' ' ||
      ch === '\t' ||
      ch === '\n' ||
      ch === '\r'
    ) {
      return true;
    }
  }
  return false;
}
 
/**
 * Validate and brand a string as an absolute HTTPS URL.
 * Validates using `new URL()` to ensure structural correctness, checks
 * that the protocol is `https:`, and rejects characters that are unsafe
 * in HTML attribute contexts (quotes, angle brackets, whitespace, control
 * characters) to prevent attribute-injection/XSS.
 *
 * @param url - Candidate URL string
 * @returns Branded {@link AbsoluteUrl}
 * @throws {Error} when `url` is not a valid absolute HTTPS URL or
 *   contains characters unsafe for HTML attribute interpolation
 */
export function toAbsoluteUrl(url: string): AbsoluteUrl {
  if (hasUnsafeAttrChars(url)) {
    throw new Error(`URL contains characters unsafe for HTML attributes: ${url.slice(0, 60)}`);
  }
  let parsed: URL;
  try {
    parsed = new URL(url);
  } catch {
    throw new Error(`Invalid URL: ${url.slice(0, 60)}`);
  }
  if (parsed.protocol !== 'https:') {
    throw new Error(`Expected absolute HTTPS URL, got: ${url.slice(0, 60)}`);
  }
  return url as AbsoluteUrl;
}
 
/**
 * Normalize and brand a file path as a relative POSIX path.
 * Strips leading slashes, converts backslashes to forward slashes, and
 * rejects path-traversal sequences (`..`) to prevent directory escape.
 *
 * @param filePath - Raw file path
 * @returns Branded {@link RelativeFilePath}
 * @throws {Error} when the path contains `..` traversal segments
 */
export function toRelativeFilePath(filePath: string): RelativeFilePath {
  const normalized = filePath.replace(/\\/g, '/').replace(/^\/+/, '');
  // Reject path traversal
  if (/(^|\/)\.\.($|\/)/.test(normalized)) {
    throw new Error(`Path traversal not allowed in relative file path: ${filePath.slice(0, 60)}`);
  }
  return normalized as RelativeFilePath;
}