]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
webui: Improve model parsing logic + add unit tests (#20749)
authorAleksander Grygier <redacted>
Thu, 19 Mar 2026 11:25:50 +0000 (12:25 +0100)
committerGitHub <redacted>
Thu, 19 Mar 2026 11:25:50 +0000 (12:25 +0100)
* add tests for model id parser

* add test case having activated params

* add structured tests for model id parser

* add ToDo

* feat: Improve model parsing logic + tests

* chore: update webui build output

---------

Co-authored-by: bluemoehre <redacted>
tools/server/public/index.html.gz
tools/server/webui/src/lib/components/app/models/ModelId.svelte
tools/server/webui/src/lib/constants/model-id.ts
tools/server/webui/src/lib/services/models.service.ts
tools/server/webui/src/lib/types/models.d.ts
tools/server/webui/tests/unit/model-id-parser.test.ts [new file with mode: 0644]

index 07f7b7e422b66f4c4ce130f0f1178c7b23d37e7a..20523afa3394d6d0727cdb4768985f3ebf4fbe16 100644 (file)
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
index 9b25d05c13c22d26c25ed5faa05df3020e655cb8..5fda493429df45dc31d2987a508642d99315000d 100644 (file)
 
        let parsed = $derived(ModelsService.parseModelId(modelId));
        let resolvedShowRaw = $derived(showRaw ?? (config().showRawModelNames as boolean) ?? false);
+       let displayName = $derived(
+               aliases && aliases.length > 0 ? aliases[0] : (parsed.modelName ?? modelId)
+       );
+       let remainingAliases = $derived(aliases && aliases.length > 1 ? aliases.slice(1) : []);
+       let allTags = $derived([...(parsed.tags ?? []), ...(tags ?? [])]);
 </script>
 
 {#if resolvedShowRaw}
@@ -35,7 +40,7 @@
 {:else}
        <span class="flex min-w-0 flex-wrap items-center gap-1 {className}">
                <span class="min-w-0 truncate font-medium">
-                       {#if showOrgName && parsed.orgName}{parsed.orgName}/{/if}{parsed.modelName ?? modelId}
+                       {#if showOrgName && parsed.orgName && !(aliases && aliases.length > 0)}{parsed.orgName}/{/if}{displayName}
                </span>
 
                {#if parsed.params}
                        </span>
                {/if}
 
-               {#if aliases && aliases.length > 0}
-                       {#each aliases as alias (alias)}
+               {#if remainingAliases.length > 0}
+                       {#each remainingAliases as alias (alias)}
                                <span class={badgeClass}>{alias}</span>
                        {/each}
                {/if}
 
-               {#if tags && tags.length > 0}
-                       {#each tags as tag (tag)}
+               {#if allTags.length > 0}
+                       {#each allTags as tag (tag)}
                                <span class={tagBadgeClass}>{tag}</span>
                        {/each}
                {/if}
index eb6662a02daa1b7afc7a745ce2d221c5613dd1d6..ee314d16744e0e4396eab4c54ea60ae2375e337e 100644 (file)
@@ -11,10 +11,16 @@ export const MODEL_ID_SEGMENT_SEPARATOR = '-';
 export const MODEL_ID_QUANTIZATION_SEPARATOR = ':';
 
 /**
- * Matches a trailing ALL-CAPS format segment, e.g. `GGUF`, `BF16`, `Q4_K_M`.
- * Must be at least 2 uppercase letters, optionally followed by uppercase letters or digits.
+ * Matches a quantization/precision segment, e.g. `Q4_K_M`, `IQ4_XS`, `F16`, `BF16`, `MXFP4`.
+ * Case-insensitive to handle both uppercase and lowercase inputs.
  */
-export const MODEL_FORMAT_SEGMENT_RE = /^[A-Z]{2,}[A-Z0-9]*$/;
+export const MODEL_QUANTIZATION_SEGMENT_RE =
+       /^(I?Q\d+(_[A-Z0-9]+)*|F\d+|BF\d+|MXFP\d+(_[A-Z0-9]+)*)$/i;
+
+/**
+ * Matches prefix for custom quantization types, e.g. `UD-Q8_K_XL`.
+ */
+export const MODEL_CUSTOM_QUANTIZATION_PREFIX_RE = /^UD$/i;
 
 /**
  * Matches a parameter-count segment, e.g. `7B`, `1.5b`, `120M`.
@@ -22,7 +28,12 @@ export const MODEL_FORMAT_SEGMENT_RE = /^[A-Z]{2,}[A-Z0-9]*$/;
 export const MODEL_PARAMS_RE = /^\d+(\.\d+)?[BbMmKkTt]$/;
 
 /**
- * Matches an activated-parameter-count segment, e.g. `A10B`, `A2.4b`.
- * The leading `A` distinguishes it from a regular params segment.
+ * Matches an activated-parameter-count segment, e.g. `A10B`, `a2.4b`.
+ * The leading `A`/`a` distinguishes it from a regular params segment.
+ */
+export const MODEL_ACTIVATED_PARAMS_RE = /^[Aa]\d+(\.\d+)?[BbMmKkTt]$/;
+
+/**
+ * Container format segments to exclude from tags (every model uses these).
  */
-export const MODEL_ACTIVATED_PARAMS_RE = /^A\d+(\.\d+)?[BbMmKkTt]$/;
+export const MODEL_IGNORED_SEGMENTS = new Set(['GGUF', 'GGML']);
index de90c48cf00f72e0a35bd6364ba18069908ce518..209bd7caba5239d7565d3cfe92945787c979f35a 100644 (file)
@@ -2,9 +2,11 @@ import { ServerModelStatus } from '$lib/enums';
 import { apiFetch, apiPost } from '$lib/utils';
 import type { ParsedModelId } from '$lib/types/models';
 import {
-       MODEL_FORMAT_SEGMENT_RE,
+       MODEL_QUANTIZATION_SEGMENT_RE,
+       MODEL_CUSTOM_QUANTIZATION_PREFIX_RE,
        MODEL_PARAMS_RE,
        MODEL_ACTIVATED_PARAMS_RE,
+       MODEL_IGNORED_SEGMENTS,
        MODEL_ID_NOT_FOUND,
        MODEL_ID_ORG_SEPARATOR,
        MODEL_ID_SEGMENT_SEPARATOR,
@@ -119,8 +121,9 @@ export class ModelsService {
        /**
         * Parse a model ID string into its structured components.
         *
-        * Handles the convention:
-        *   `<org>/<ModelName>-<Parameters>(-<ActivatedParameters>)-<Format>:<QuantizationType>`
+        * Handles conventions like:
+        *   `<org>/<ModelName>-<Parameters>(-<ActivatedParameters>)(-<Tags>)(-<Quantization>):<Quantization>`
+        *   `<ModelName>.<Quantization>` (dot-separated quantization, e.g. `model.Q4_K_M`)
         *
         * @param modelId - Raw model identifier string
         * @returns Structured {@link ParsedModelId} with all detected fields
@@ -132,11 +135,11 @@ export class ModelsService {
                        modelName: null,
                        params: null,
                        activatedParams: null,
-                       format: null,
                        quantization: null,
                        tags: []
                };
 
+               // 1. Extract colon-separated quantization (e.g. `model:Q4_K_M`)
                const colonIdx = modelId.indexOf(MODEL_ID_QUANTIZATION_SEPARATOR);
                let modelPath: string;
 
@@ -147,6 +150,7 @@ export class ModelsService {
                        modelPath = modelId;
                }
 
+               // 2. Extract org name (e.g. `org/model` -> org = "org")
                const slashIdx = modelPath.indexOf(MODEL_ID_ORG_SEPARATOR);
                let modelStr: string;
 
@@ -157,37 +161,66 @@ export class ModelsService {
                        modelStr = modelPath;
                }
 
-               const segments = modelStr.split(MODEL_ID_SEGMENT_SEPARATOR);
+               // 3. Handle dot-separated quantization (e.g. `model-name.Q4_K_M`)
+               const dotIdx = modelStr.lastIndexOf('.');
+
+               if (dotIdx !== MODEL_ID_NOT_FOUND && !result.quantization) {
+                       const afterDot = modelStr.slice(dotIdx + 1);
 
-               if (segments.length > 0 && MODEL_FORMAT_SEGMENT_RE.test(segments[segments.length - 1])) {
-                       result.format = segments.pop()!;
+                       if (MODEL_QUANTIZATION_SEGMENT_RE.test(afterDot)) {
+                               result.quantization = afterDot;
+                               modelStr = modelStr.slice(0, dotIdx);
+                       }
                }
 
-               const paramsRe = MODEL_PARAMS_RE;
-               const activatedParamsRe = MODEL_ACTIVATED_PARAMS_RE;
+               const segments = modelStr.split(MODEL_ID_SEGMENT_SEPARATOR);
 
+               // 4. Detect trailing quantization from dash-separated segments
+               //    Handle UD-prefixed quantization (e.g. `UD-Q8_K_XL`) and
+               //    standalone quantization (e.g. `Q4_K_M`, `BF16`, `F16`, `MXFP4`)
+               if (!result.quantization && segments.length > 1) {
+                       const last = segments[segments.length - 1];
+                       const secondLast = segments.length > 2 ? segments[segments.length - 2] : null;
+
+                       if (MODEL_QUANTIZATION_SEGMENT_RE.test(last)) {
+                               if (secondLast && MODEL_CUSTOM_QUANTIZATION_PREFIX_RE.test(secondLast)) {
+                                       result.quantization = `${secondLast}-${last}`;
+                                       segments.splice(segments.length - 2, 2);
+                               } else {
+                                       result.quantization = last;
+                                       segments.pop();
+                               }
+                       }
+               }
+
+               // 5. Find params and activated params
                let paramsIdx = MODEL_ID_NOT_FOUND;
                let activatedParamsIdx = MODEL_ID_NOT_FOUND;
 
                for (let i = 0; i < segments.length; i++) {
                        const seg = segments[i];
-                       if (paramsIdx === -1 && paramsRe.test(seg)) {
+
+                       if (paramsIdx === MODEL_ID_NOT_FOUND && MODEL_PARAMS_RE.test(seg)) {
                                paramsIdx = i;
                                result.params = seg.toUpperCase();
-                       } else if (activatedParamsRe.test(seg)) {
+                       } else if (paramsIdx !== MODEL_ID_NOT_FOUND && MODEL_ACTIVATED_PARAMS_RE.test(seg)) {
                                activatedParamsIdx = i;
                                result.activatedParams = seg.toUpperCase();
                        }
                }
 
+               // 6. Model name = segments before params; tags = remaining segments after params
                const pivotIdx = paramsIdx !== MODEL_ID_NOT_FOUND ? paramsIdx : segments.length;
 
                result.modelName = segments.slice(0, pivotIdx).join(MODEL_ID_SEGMENT_SEPARATOR) || null;
 
                if (paramsIdx !== MODEL_ID_NOT_FOUND) {
-                       result.tags = segments
-                               .slice(paramsIdx + 1)
-                               .filter((_, relIdx) => paramsIdx + 1 + relIdx !== activatedParamsIdx);
+                       result.tags = segments.slice(paramsIdx + 1).filter((_, relIdx) => {
+                               const absIdx = paramsIdx + 1 + relIdx;
+                               if (absIdx === activatedParamsIdx) return false;
+
+                               return !MODEL_IGNORED_SEGMENTS.has(segments[absIdx].toUpperCase());
+                       });
                }
 
                return result;
index dc8e86485ceb32d78ae8cdf3569b7c31c056b87e..b4d5f11f576b2099416e6ce06ff320bb038f4f39 100644 (file)
@@ -25,7 +25,6 @@ export interface ParsedModelId {
        modelName: string | null;
        params: string | null;
        activatedParams: string | null;
-       format: string | null;
        quantization: string | null;
        tags: string[];
 }
diff --git a/tools/server/webui/tests/unit/model-id-parser.test.ts b/tools/server/webui/tests/unit/model-id-parser.test.ts
new file mode 100644 (file)
index 0000000..3c2937d
--- /dev/null
@@ -0,0 +1,270 @@
+import { describe, expect, it } from 'vitest';
+import { ModelsService } from '$lib/services/models.service';
+
+const { parseModelId } = ModelsService;
+
+describe('parseModelId', () => {
+       it('handles unknown patterns correctly', () => {
+               expect(parseModelId('model-name-1')).toStrictEqual({
+                       activatedParams: null,
+                       modelName: 'model-name-1',
+                       orgName: null,
+                       params: null,
+                       quantization: null,
+                       raw: 'model-name-1',
+                       tags: []
+               });
+
+               expect(parseModelId('org/model-name-2')).toStrictEqual({
+                       activatedParams: null,
+                       modelName: 'model-name-2',
+                       orgName: 'org',
+                       params: null,
+                       quantization: null,
+                       raw: 'org/model-name-2',
+                       tags: []
+               });
+       });
+
+       it('extracts model parameters correctly', () => {
+               expect(parseModelId('model-100B-BF16')).toMatchObject({ params: '100B' });
+               expect(parseModelId('model-100B:Q4_K_M')).toMatchObject({ params: '100B' });
+       });
+
+       it('extracts model parameters correctly in lowercase', () => {
+               expect(parseModelId('model-100b-bf16')).toMatchObject({ params: '100B' });
+               expect(parseModelId('model-100b:q4_k_m')).toMatchObject({ params: '100B' });
+       });
+
+       it('extracts activated parameters correctly', () => {
+               expect(parseModelId('model-100B-A10B-BF16')).toMatchObject({ activatedParams: 'A10B' });
+               expect(parseModelId('model-100B-A10B:Q4_K_M')).toMatchObject({ activatedParams: 'A10B' });
+       });
+
+       it('extracts activated parameters correctly in lowercase', () => {
+               expect(parseModelId('model-100b-a10b-bf16')).toMatchObject({ activatedParams: 'A10B' });
+               expect(parseModelId('model-100b-a10b:q4_k_m')).toMatchObject({ activatedParams: 'A10B' });
+       });
+
+       it('extracts quantization correctly', () => {
+               // Dash-separated quantization
+               expect(parseModelId('model-100B-UD-IQ1_S')).toMatchObject({ quantization: 'UD-IQ1_S' });
+               expect(parseModelId('model-100B-IQ4_XS')).toMatchObject({ quantization: 'IQ4_XS' });
+               expect(parseModelId('model-100B-Q4_K_M')).toMatchObject({ quantization: 'Q4_K_M' });
+               expect(parseModelId('model-100B-Q8_0')).toMatchObject({ quantization: 'Q8_0' });
+               expect(parseModelId('model-100B-UD-Q8_K_XL')).toMatchObject({ quantization: 'UD-Q8_K_XL' });
+               expect(parseModelId('model-100B-F16')).toMatchObject({ quantization: 'F16' });
+               expect(parseModelId('model-100B-BF16')).toMatchObject({ quantization: 'BF16' });
+               expect(parseModelId('model-100B-MXFP4')).toMatchObject({ quantization: 'MXFP4' });
+
+               // Colon-separated quantization
+               expect(parseModelId('model-100B:UD-IQ1_S')).toMatchObject({ quantization: 'UD-IQ1_S' });
+               expect(parseModelId('model-100B:IQ4_XS')).toMatchObject({ quantization: 'IQ4_XS' });
+               expect(parseModelId('model-100B:Q4_K_M')).toMatchObject({ quantization: 'Q4_K_M' });
+               expect(parseModelId('model-100B:Q8_0')).toMatchObject({ quantization: 'Q8_0' });
+               expect(parseModelId('model-100B:UD-Q8_K_XL')).toMatchObject({ quantization: 'UD-Q8_K_XL' });
+               expect(parseModelId('model-100B:F16')).toMatchObject({ quantization: 'F16' });
+               expect(parseModelId('model-100B:BF16')).toMatchObject({ quantization: 'BF16' });
+               expect(parseModelId('model-100B:MXFP4')).toMatchObject({ quantization: 'MXFP4' });
+
+               // Dot-separated quantization
+               expect(parseModelId('nomic-embed-text-v2-moe.Q4_K_M')).toMatchObject({
+                       quantization: 'Q4_K_M'
+               });
+       });
+
+       it('extracts additional tags correctly', () => {
+               expect(parseModelId('model-100B-foobar-Q4_K_M')).toMatchObject({ tags: ['foobar'] });
+               expect(parseModelId('model-100B-A10B-foobar-1M-BF16')).toMatchObject({
+                       tags: ['foobar', '1M']
+               });
+               expect(parseModelId('model-100B-1M-foobar:UD-Q8_K_XL')).toMatchObject({
+                       tags: ['1M', 'foobar']
+               });
+       });
+
+       it('filters out container format segments from tags', () => {
+               expect(parseModelId('model-100B-GGUF-Instruct-BF16')).toMatchObject({
+                       tags: ['Instruct']
+               });
+               expect(parseModelId('model-100B-GGML-Instruct:Q4_K_M')).toMatchObject({
+                       tags: ['Instruct']
+               });
+       });
+
+       it('handles real-world examples correctly', () => {
+               expect(parseModelId('meta-llama/Llama-3.1-8B')).toStrictEqual({
+                       activatedParams: null,
+                       modelName: 'Llama-3.1',
+                       orgName: 'meta-llama',
+                       params: '8B',
+                       quantization: null,
+                       raw: 'meta-llama/Llama-3.1-8B',
+                       tags: []
+               });
+
+               expect(parseModelId('openai/gpt-oss-120b-MXFP4')).toStrictEqual({
+                       activatedParams: null,
+                       modelName: 'gpt-oss',
+                       orgName: 'openai',
+                       params: '120B',
+                       quantization: 'MXFP4',
+                       raw: 'openai/gpt-oss-120b-MXFP4',
+                       tags: []
+               });
+
+               expect(parseModelId('openai/gpt-oss-20b:Q4_K_M')).toStrictEqual({
+                       activatedParams: null,
+                       modelName: 'gpt-oss',
+                       orgName: 'openai',
+                       params: '20B',
+                       quantization: 'Q4_K_M',
+                       raw: 'openai/gpt-oss-20b:Q4_K_M',
+                       tags: []
+               });
+
+               expect(parseModelId('Qwen/Qwen3-Coder-30B-A3B-Instruct-1M-BF16')).toStrictEqual({
+                       activatedParams: 'A3B',
+                       modelName: 'Qwen3-Coder',
+                       orgName: 'Qwen',
+                       params: '30B',
+                       quantization: 'BF16',
+                       raw: 'Qwen/Qwen3-Coder-30B-A3B-Instruct-1M-BF16',
+                       tags: ['Instruct', '1M']
+               });
+       });
+
+       it('handles real-world examples with quantization in segments', () => {
+               expect(parseModelId('meta-llama/Llama-4-Scout-17B-16E-Instruct-Q4_K_M')).toStrictEqual({
+                       activatedParams: null,
+                       modelName: 'Llama-4-Scout',
+                       orgName: 'meta-llama',
+                       params: '17B',
+                       quantization: 'Q4_K_M',
+                       raw: 'meta-llama/Llama-4-Scout-17B-16E-Instruct-Q4_K_M',
+                       tags: ['16E', 'Instruct']
+               });
+
+               expect(parseModelId('MiniMaxAI/MiniMax-M2-IQ4_XS')).toStrictEqual({
+                       activatedParams: null,
+                       modelName: 'MiniMax-M2',
+                       orgName: 'MiniMaxAI',
+                       params: null,
+                       quantization: 'IQ4_XS',
+                       raw: 'MiniMaxAI/MiniMax-M2-IQ4_XS',
+                       tags: []
+               });
+
+               expect(parseModelId('MiniMaxAI/MiniMax-M2-UD-Q3_K_XL')).toStrictEqual({
+                       activatedParams: null,
+                       modelName: 'MiniMax-M2',
+                       orgName: 'MiniMaxAI',
+                       params: null,
+                       quantization: 'UD-Q3_K_XL',
+                       raw: 'MiniMaxAI/MiniMax-M2-UD-Q3_K_XL',
+                       tags: []
+               });
+
+               expect(parseModelId('mistralai/Devstral-2-123B-Instruct-2512-Q4_K_M')).toStrictEqual({
+                       activatedParams: null,
+                       modelName: 'Devstral-2',
+                       orgName: 'mistralai',
+                       params: '123B',
+                       quantization: 'Q4_K_M',
+                       raw: 'mistralai/Devstral-2-123B-Instruct-2512-Q4_K_M',
+                       tags: ['Instruct', '2512']
+               });
+
+               expect(parseModelId('mistralai/Devstral-Small-2-24B-Instruct-2512-Q8_0')).toStrictEqual({
+                       activatedParams: null,
+                       modelName: 'Devstral-Small-2',
+                       orgName: 'mistralai',
+                       params: '24B',
+                       quantization: 'Q8_0',
+                       raw: 'mistralai/Devstral-Small-2-24B-Instruct-2512-Q8_0',
+                       tags: ['Instruct', '2512']
+               });
+
+               expect(parseModelId('noctrex/GLM-4.7-Flash-MXFP4_MOE')).toStrictEqual({
+                       activatedParams: null,
+                       modelName: 'GLM-4.7-Flash',
+                       orgName: 'noctrex',
+                       params: null,
+                       quantization: 'MXFP4_MOE',
+                       raw: 'noctrex/GLM-4.7-Flash-MXFP4_MOE',
+                       tags: []
+               });
+
+               expect(parseModelId('Qwen/Qwen3-Coder-Next-Q4_K_M')).toStrictEqual({
+                       activatedParams: null,
+                       modelName: 'Qwen3-Coder-Next',
+                       orgName: 'Qwen',
+                       params: null,
+                       quantization: 'Q4_K_M',
+                       raw: 'Qwen/Qwen3-Coder-Next-Q4_K_M',
+                       tags: []
+               });
+
+               expect(parseModelId('openai/gpt-oss-120b-Q4_K_M')).toStrictEqual({
+                       activatedParams: null,
+                       modelName: 'gpt-oss',
+                       orgName: 'openai',
+                       params: '120B',
+                       quantization: 'Q4_K_M',
+                       raw: 'openai/gpt-oss-120b-Q4_K_M',
+                       tags: []
+               });
+
+               expect(parseModelId('openai/gpt-oss-20b-F16')).toStrictEqual({
+                       activatedParams: null,
+                       modelName: 'gpt-oss',
+                       orgName: 'openai',
+                       params: '20B',
+                       quantization: 'F16',
+                       raw: 'openai/gpt-oss-20b-F16',
+                       tags: []
+               });
+
+               expect(parseModelId('nomic-embed-text-v2-moe.Q4_K_M')).toStrictEqual({
+                       activatedParams: null,
+                       modelName: 'nomic-embed-text-v2-moe',
+                       orgName: null,
+                       params: null,
+                       quantization: 'Q4_K_M',
+                       raw: 'nomic-embed-text-v2-moe.Q4_K_M',
+                       tags: []
+               });
+       });
+
+       it('handles ambiguous model names', () => {
+               // Qwen3.5 Instruct vs Thinking — tags should distinguish them
+               expect(parseModelId('Qwen/Qwen3.5-30B-A3B-Instruct')).toMatchObject({
+                       modelName: 'Qwen3.5',
+                       params: '30B',
+                       activatedParams: 'A3B',
+                       tags: ['Instruct']
+               });
+
+               expect(parseModelId('Qwen/Qwen3.5-30B-A3B-Thinking')).toMatchObject({
+                       modelName: 'Qwen3.5',
+                       params: '30B',
+                       activatedParams: 'A3B',
+                       tags: ['Thinking']
+               });
+
+               // Dot-separated quantization with variant suffixes
+               expect(parseModelId('gemma-3-27b-it-heretic-v2.Q8_0')).toMatchObject({
+                       modelName: 'gemma-3',
+                       params: '27B',
+                       quantization: 'Q8_0',
+                       tags: ['it', 'heretic', 'v2']
+               });
+
+               expect(parseModelId('gemma-3-27b-it.Q8_0')).toMatchObject({
+                       modelName: 'gemma-3',
+                       params: '27B',
+                       quantization: 'Q8_0',
+                       tags: ['it']
+               });
+       });
+});