]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
fix: add remark plugin to render raw HTML as literal text (#16505)
authorPascal <redacted>
Mon, 13 Oct 2025 08:55:32 +0000 (10:55 +0200)
committerGitHub <redacted>
Mon, 13 Oct 2025 08:55:32 +0000 (10:55 +0200)
* fix: add remark plugin to render raw HTML as literal text

Implemented a missing MDAST stage to neutralize raw HTML like major LLM WebUIs
do ensuring consistent and safe Markdown rendering

Introduced 'remarkLiteralHtml', a plugin that converts raw HTML nodes in the
Markdown AST into plain-text equivalents while preserving indentation and
line breaks. This ensures consistent rendering and prevents unintended HTML
execution, without altering valid Markdown structure

Kept 'remarkRehype' in the pipeline since it performs the required conversion
from MDAST to HAST for KaTeX, syntax highlighting, and HTML serialization

Refined the link-enhancement logic to skip unnecessary DOM rewrites,
fixing a subtle bug where extra paragraphs were injected after the first
line due to full innerHTML reconstruction, and ensuring links open in new
tabs only when required

Final pipeline: remarkGfm -> remarkMath -> remarkBreaks -> remarkLiteralHtml
-> remarkRehype -> rehypeKatex -> rehypeHighlight -> rehypeStringify

* fix: address review feedback from allozaur

* chore: update webui build output

tools/server/public/index.html.gz
tools/server/webui/package-lock.json
tools/server/webui/package.json
tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte
tools/server/webui/src/lib/constants/literal-html.ts [new file with mode: 0644]
tools/server/webui/src/lib/markdown/literal-html.ts [new file with mode: 0644]

index f1a7568d7c674a4c1abd3ae29ebc59f80c2f6d9c..c026f36c4844d18a06e10908c6be7696eb5aa28b 100644 (file)
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
index c300ecaa77562445f5b9f18d25c2def3580c8585..9cd6ef9138c95d254dd8600bbcfbe4f48536b0e8 100644 (file)
@@ -50,6 +50,7 @@
                                "eslint-plugin-svelte": "^3.0.0",
                                "fflate": "^0.8.2",
                                "globals": "^16.0.0",
+                               "mdast": "^3.0.0",
                                "mdsvex": "^0.12.3",
                                "playwright": "^1.53.0",
                                "prettier": "^3.4.2",
@@ -66,6 +67,7 @@
                                "tw-animate-css": "^1.3.5",
                                "typescript": "^5.0.0",
                                "typescript-eslint": "^8.20.0",
+                               "unified": "^11.0.5",
                                "uuid": "^13.0.0",
                                "vite": "^7.0.4",
                                "vite-plugin-devtools-json": "^0.2.0",
                                "node": ">=14.0.0"
                        }
                },
+               "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": {
+                       "version": "1.4.3",
+                       "dev": true,
+                       "inBundle": true,
+                       "license": "MIT",
+                       "optional": true,
+                       "dependencies": {
+                               "@emnapi/wasi-threads": "1.0.2",
+                               "tslib": "^2.4.0"
+                       }
+               },
+               "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": {
+                       "version": "1.4.3",
+                       "dev": true,
+                       "inBundle": true,
+                       "license": "MIT",
+                       "optional": true,
+                       "dependencies": {
+                               "tslib": "^2.4.0"
+                       }
+               },
+               "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": {
+                       "version": "1.0.2",
+                       "dev": true,
+                       "inBundle": true,
+                       "license": "MIT",
+                       "optional": true,
+                       "dependencies": {
+                               "tslib": "^2.4.0"
+                       }
+               },
+               "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": {
+                       "version": "0.2.11",
+                       "dev": true,
+                       "inBundle": true,
+                       "license": "MIT",
+                       "optional": true,
+                       "dependencies": {
+                               "@emnapi/core": "^1.4.3",
+                               "@emnapi/runtime": "^1.4.3",
+                               "@tybys/wasm-util": "^0.9.0"
+                       }
+               },
+               "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": {
+                       "version": "0.9.0",
+                       "dev": true,
+                       "inBundle": true,
+                       "license": "MIT",
+                       "optional": true,
+                       "dependencies": {
+                               "tslib": "^2.4.0"
+                       }
+               },
+               "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": {
+                       "version": "2.8.0",
+                       "dev": true,
+                       "inBundle": true,
+                       "license": "0BSD",
+                       "optional": true
+               },
                "node_modules/@tailwindcss/oxide-win32-arm64-msvc": {
                        "version": "4.1.11",
                        "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.11.tgz",
                                "url": "https://github.com/sponsors/wooorm"
                        }
                },
+               "node_modules/mdast": {
+                       "version": "3.0.0",
+                       "resolved": "https://registry.npmjs.org/mdast/-/mdast-3.0.0.tgz",
+                       "integrity": "sha512-xySmf8g4fPKMeC07jXGz971EkLbWAJ83s4US2Tj9lEdnZ142UP5grN73H1Xd3HzrdbU5o9GYYP/y8F9ZSwLE9g==",
+                       "dev": true,
+                       "license": "MIT"
+               },
                "node_modules/mdast-util-find-and-replace": {
                        "version": "3.0.2",
                        "resolved": "https://registry.npmjs.org/mdast-util-find-and-replace/-/mdast-util-find-and-replace-3.0.2.tgz",
index 7bf21bf57cb612e4b77ace15eb44213143ea6159..e073cd32f07e1fc13b85c699f434569a64b08571 100644 (file)
@@ -52,6 +52,7 @@
                "eslint-plugin-svelte": "^3.0.0",
                "fflate": "^0.8.2",
                "globals": "^16.0.0",
+               "mdast": "^3.0.0",
                "mdsvex": "^0.12.3",
                "playwright": "^1.53.0",
                "prettier": "^3.4.2",
@@ -68,6 +69,7 @@
                "tw-animate-css": "^1.3.5",
                "typescript": "^5.0.0",
                "typescript-eslint": "^8.20.0",
+               "unified": "^11.0.5",
                "uuid": "^13.0.0",
                "vite": "^7.0.4",
                "vite-plugin-devtools-json": "^0.2.0",
index 45b9c6debbbd5673396ed6fc3520cc6341be450f..24d29c2b3e51eca0a53bfb46c6737353e1e0af8e 100644 (file)
@@ -14,6 +14,7 @@
        import githubDarkCss from 'highlight.js/styles/github-dark.css?inline';
        import githubLightCss from 'highlight.js/styles/github.css?inline';
        import { mode } from 'mode-watcher';
+       import { remarkLiteralHtml } from '$lib/markdown/literal-html';
 
        interface Props {
                content: string;
                        .use(remarkGfm) // GitHub Flavored Markdown
                        .use(remarkMath) // Parse $inline$ and $$block$$ math
                        .use(remarkBreaks) // Convert line breaks to <br>
-                       .use(remarkRehype) // Convert to rehype (HTML AST)
+                       .use(remarkLiteralHtml) // Treat raw HTML as literal text with preserved indentation
+                       .use(remarkRehype) // Convert Markdown AST to rehype
                        .use(rehypeKatex) // Render math using KaTeX
                        .use(rehypeHighlight) // Add syntax highlighting
                        .use(rehypeStringify); // Convert to HTML string
        });
 
        function enhanceLinks(html: string): string {
+               if (!html.includes('<a')) {
+                       return html;
+               }
+
                const tempDiv = document.createElement('div');
                tempDiv.innerHTML = html;
 
                // Make all links open in new tabs
                const linkElements = tempDiv.querySelectorAll('a[href]');
+               let mutated = false;
+
                for (const link of linkElements) {
+                       const target = link.getAttribute('target');
+                       const rel = link.getAttribute('rel');
+
+                       if (target !== '_blank' || rel !== 'noopener noreferrer') {
+                               mutated = true;
+                       }
+
                        link.setAttribute('target', '_blank');
                        link.setAttribute('rel', 'noopener noreferrer');
                }
 
-               return tempDiv.innerHTML;
+               return mutated ? tempDiv.innerHTML : html;
        }
 
        function enhanceCodeBlocks(html: string): string {
+               if (!html.includes('<pre')) {
+                       return html;
+               }
+
                const tempDiv = document.createElement('div');
                tempDiv.innerHTML = html;
 
                const preElements = tempDiv.querySelectorAll('pre');
+               let mutated = false;
 
                for (const [index, pre] of Array.from(preElements).entries()) {
                        const codeElement = pre.querySelector('code');
 
-                       if (!codeElement) continue;
+                       if (!codeElement) {
+                               continue;
+                       }
+
+                       mutated = true;
 
                        let language = 'text';
                        const classList = Array.from(codeElement.classList);
                        pre.parentNode?.replaceChild(wrapper, pre);
                }
 
-               return tempDiv.innerHTML;
+               return mutated ? tempDiv.innerHTML : html;
        }
 
        async function processMarkdown(text: string): Promise<string> {
diff --git a/tools/server/webui/src/lib/constants/literal-html.ts b/tools/server/webui/src/lib/constants/literal-html.ts
new file mode 100644 (file)
index 0000000..ed1b0cf
--- /dev/null
@@ -0,0 +1,15 @@
+export const LINE_BREAK = /\r?\n/;
+
+export const PHRASE_PARENTS = new Set([
+       'paragraph',
+       'heading',
+       'emphasis',
+       'strong',
+       'delete',
+       'link',
+       'linkReference',
+       'tableCell'
+]);
+
+export const NBSP = '\u00a0';
+export const TAB_AS_SPACES = NBSP.repeat(4);
diff --git a/tools/server/webui/src/lib/markdown/literal-html.ts b/tools/server/webui/src/lib/markdown/literal-html.ts
new file mode 100644 (file)
index 0000000..d4ace01
--- /dev/null
@@ -0,0 +1,121 @@
+import type { Plugin } from 'unified';
+import { visit } from 'unist-util-visit';
+import type { Break, Content, Paragraph, PhrasingContent, Root, Text } from 'mdast';
+import { LINE_BREAK, NBSP, PHRASE_PARENTS, TAB_AS_SPACES } from '$lib/constants/literal-html';
+
+/**
+ * remark plugin that rewrites raw HTML nodes into plain-text equivalents.
+ *
+ * remark parses inline HTML into `html` nodes even when we do not want to render
+ * them. We turn each of those nodes into regular text (plus `<br>` break markers)
+ * so the downstream rehype pipeline escapes the characters instead of executing
+ * them. Leading spaces and tab characters are converted to non‑breaking spaces to
+ * keep indentation identical to the original author input.
+ */
+
+function preserveIndent(line: string): string {
+       let index = 0;
+       let output = '';
+
+       while (index < line.length) {
+               const char = line[index];
+
+               if (char === ' ') {
+                       output += NBSP;
+                       index += 1;
+                       continue;
+               }
+
+               if (char === '\t') {
+                       output += TAB_AS_SPACES;
+                       index += 1;
+                       continue;
+               }
+
+               break;
+       }
+
+       return output + line.slice(index);
+}
+
+function createLiteralChildren(value: string): PhrasingContent[] {
+       const lines = value.split(LINE_BREAK);
+       const nodes: PhrasingContent[] = [];
+
+       for (const [lineIndex, rawLine] of lines.entries()) {
+               if (lineIndex > 0) {
+                       nodes.push({ type: 'break' } as Break as unknown as PhrasingContent);
+               }
+
+               nodes.push({
+                       type: 'text',
+                       value: preserveIndent(rawLine)
+               } as Text as unknown as PhrasingContent);
+       }
+
+       if (!nodes.length) {
+               nodes.push({ type: 'text', value: '' } as Text as unknown as PhrasingContent);
+       }
+
+       return nodes;
+}
+
+export const remarkLiteralHtml: Plugin<[], Root> = () => {
+       return (tree) => {
+               visit(tree, 'html', (node, index, parent) => {
+                       if (!parent || typeof index !== 'number') {
+                               return;
+                       }
+
+                       const replacement = createLiteralChildren(node.value);
+
+                       if (!PHRASE_PARENTS.has(parent.type as string)) {
+                               const paragraph: Paragraph = {
+                                       type: 'paragraph',
+                                       children: replacement as Paragraph['children'],
+                                       data: { literalHtml: true }
+                               };
+
+                               const siblings = parent.children as unknown as Content[];
+                               siblings.splice(index, 1, paragraph as unknown as Content);
+
+                               if (index > 0) {
+                                       const previous = siblings[index - 1] as Paragraph | undefined;
+
+                                       if (
+                                               previous?.type === 'paragraph' &&
+                                               (previous.data as { literalHtml?: boolean } | undefined)?.literalHtml
+                                       ) {
+                                               const prevChildren = previous.children as unknown as PhrasingContent[];
+
+                                               if (prevChildren.length) {
+                                                       const lastChild = prevChildren[prevChildren.length - 1];
+
+                                                       if (lastChild.type !== 'break') {
+                                                               prevChildren.push({
+                                                                       type: 'break'
+                                                               } as Break as unknown as PhrasingContent);
+                                                       }
+                                               }
+
+                                               prevChildren.push(...(paragraph.children as unknown as PhrasingContent[]));
+
+                                               siblings.splice(index, 1);
+
+                                               return index;
+                                       }
+                               }
+
+                               return index + 1;
+                       }
+
+                       (parent.children as unknown as PhrasingContent[]).splice(
+                               index,
+                               1,
+                               ...(replacement as unknown as PhrasingContent[])
+                       );
+
+                       return index + replacement.length;
+               });
+       };
+};