server : (webui) put DeepSeek R1 CoT in a collapsible <details> element (#11364)

author stduhpf <redacted>

Fri, 24 Jan 2025 08:02:38 +0000 (09:02 +0100)

committer GitHub <redacted>

Fri, 24 Jan 2025 08:02:38 +0000 (09:02 +0100)
author stduhpf <redacted>
Fri, 24 Jan 2025 08:02:38 +0000 (09:02 +0100)
committer GitHub <redacted>
Fri, 24 Jan 2025 08:02:38 +0000 (09:02 +0100)
diff --git a/examples/server/public/index.html.gz b/examples/server/public/index.html.gz

index 26f3583bdf844ccdb67fb197a56991a25d4b2ecc..582ccc0d3f8d07ce79dd9e978772f9c5ea85c426 100644 (file)

Binary files a/examples/server/public/index.html.gz and b/examples/server/public/index.html.gz differ
diff --git a/examples/server/webui/index.html b/examples/server/webui/index.html

index 2180ef4ad93ee7fd6d073c38b63c9843fb42d65f..d3893ea4e1dfe2fb93ca25d765280a5a8fff650b 100644 (file)
--- a/examples/server/webui/index.html
+++ b/examples/server/webui/index.html
@@ -141,6 +141,7 @@
                :msg="pendingMsg"
                :key="pendingMsg.id"
                :is-generating="isGenerating"
+              :show-thought-in-progress="config.showThoughtInProgress"
                :edit-user-msg-and-regenerate="() => {}"
                :regenerate-msg="() => {}"></message-bubble>
            </div>
@@ -202,6 +203,20 @@
                </template>
              </div>
            </details>
+          <!-- Section: Reasoning models -->
+          <details class="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
+            <summary class="collapse-title font-bold">Reasoning models</summary>
+            <div class="collapse-content">
+              <div class="flex flex-row items-center mb-2">
+                <input type="checkbox" class="checkbox" v-model="config.showThoughtInProgress" />
+                <span class="ml-4">Expand though process by default for generating message</span>
+              </div>
+              <div class="flex flex-row items-center mb-2">
+                <input type="checkbox" class="checkbox" v-model="config.excludeThoughtOnReq" />
+                <span class="ml-4">Exclude thought process when sending request to API (Recommended for DeepSeek-R1)</span>
+              </div>
+            </div>
+          </details>
            <!-- Section: Advanced config -->
            <details class="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
              <summary class="collapse-title font-bold">Advanced config</summary>
@@ -261,7 +276,17 @@
            <span v-if="msg.content === null" class="loading loading-dots loading-md"></span>
            <!-- render message as markdown -->
            <div v-else dir="auto">
-            <vue-markdown :source="msg.content"></vue-markdown>
+            <details v-if="msg.role === 'assistant' && splitMsgContent.cot" class="collapse bg-base-200 collapse-arrow mb-4" :open="splitMsgContent.isThinking && showThoughtInProgress">
+              <summary class="collapse-title">
+                <span v-if="splitMsgContent.isThinking">
+                  <span v-if="isGenerating" class="loading loading-spinner loading-md mr-2" style="vertical-align: middle;"></span>
+                  <b>Thinking</b>
+                </span>
+                <b v-else>Thought Process</b>
+              </summary>
+              <vue-markdown :source="splitMsgContent.cot" dir="auto" class="collapse-content"></vue-markdown>
+            </details>
+            <vue-markdown :source="splitMsgContent.content"></vue-markdown>
            </div>
            <!-- render timings if enabled -->
            <div class="dropdown dropdown-hover dropdown-top mt-2" v-if="timings && config.showTokensPerSecond">
diff --git a/examples/server/webui/src/main.js b/examples/server/webui/src/main.js

index feb741a4ef02f82f643dc7b893b198fb5cbe574a..90f4ca36802fc10c72ef842bc4d276c5842dff49 100644 (file)
--- a/examples/server/webui/src/main.js
+++ b/examples/server/webui/src/main.js
@@ -17,6 +17,11 @@ import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
  
  const isDev = import.meta.env.MODE === 'development';
  
+// types
+/** @typedef {{ id: number, role: 'user' | 'assistant', content: string, timings: any }} Message */
+/** @typedef {{ role: 'user' | 'assistant', content: string }} APIMessage */
+/** @typedef {{ id: string, lastModified: number, messages: Array<Message> }} Conversation */
+
  // utility functions
  const isString = (x) => !!x.toLowerCase;
  const isBoolean = (x) => x === true || x === false;
@@ -50,6 +55,8 @@ const CONFIG_DEFAULT = {
    apiKey: '',
    systemMessage: 'You are a helpful assistant.',
    showTokensPerSecond: false,
+  showThoughtInProgress: false,
+  excludeThoughtOnReq: true,
    // make sure these default values are in sync with `common.h`
    samplers: 'edkypmxt',
    temperature: 0.8,
@@ -172,6 +179,7 @@ const MessageBubble = defineComponent({
      config: Object,
      msg: Object,
      isGenerating: Boolean,
+    showThoughtInProgress: Boolean,
      editUserMsgAndRegenerate: Function,
      regenerateMsg: Function,
    },
@@ -188,7 +196,31 @@ const MessageBubble = defineComponent({
          prompt_per_second: this.msg.timings.prompt_n / (this.msg.timings.prompt_ms / 1000),
          predicted_per_second: this.msg.timings.predicted_n / (this.msg.timings.predicted_ms / 1000),
        };
-    }
+    },
+    splitMsgContent() {
+      const content = this.msg.content;
+      if (this.msg.role !== 'assistant') {
+        return { content };
+      }
+      let actualContent = '';
+      let cot = '';
+      let isThinking = false;
+      let thinkSplit = content.split('<think>', 2);
+      actualContent += thinkSplit[0];
+      while (thinkSplit[1] !== undefined) {
+        // <think> tag found
+        thinkSplit = thinkSplit[1].split('</think>', 2);
+        cot += thinkSplit[0];
+        isThinking = true;
+        if (thinkSplit[1] !== undefined) {
+          // </think> closing tag found
+          isThinking = false;
+          thinkSplit = thinkSplit[1].split('<think>', 2);
+          actualContent += thinkSplit[0];
+        }
+      }
+      return { content: actualContent, cot, isThinking };
+    },
    },
    methods: {
      copyMsg() {
@@ -208,7 +240,10 @@ const MessageBubble = defineComponent({
  // format: { [convId]: { id: string, lastModified: number, messages: [...] } }
  // convId is a string prefixed with 'conv-'
  const StorageUtils = {
-  // manage conversations
+  /**
+   * manage conversations
+   * @returns {Array<Conversation>}
+   */
    getAllConversations() {
      const res = [];
      for (const key in localStorage) {
@@ -219,11 +254,19 @@ const StorageUtils = {
      res.sort((a, b) => b.lastModified - a.lastModified);
      return res;
    },
-  // can return null if convId does not exist
+  /**
+   * can return null if convId does not exist
+   * @param {string} convId
+   * @returns {Conversation | null}
+   */
    getOneConversation(convId) {
      return JSON.parse(localStorage.getItem(convId) || 'null');
    },
-  // if convId does not exist, create one
+  /**
+   * if convId does not exist, create one
+   * @param {string} convId
+   * @param {Message} msg
+   */
    appendMsg(convId, msg) {
      if (msg.content === null) return;
      const conv = StorageUtils.getOneConversation(convId) || {
@@ -235,12 +278,24 @@ const StorageUtils = {
      conv.lastModified = Date.now();
      localStorage.setItem(convId, JSON.stringify(conv));
    },
+  /**
+   * Get new conversation id
+   * @returns {string}
+   */
    getNewConvId() {
      return `conv-${Date.now()}`;
    },
+  /**
+   * remove conversation by id
+   * @param {string} convId
+   */
    remove(convId) {
      localStorage.removeItem(convId);
    },
+  /**
+   * remove all conversations
+   * @param {string} convId
+   */
    filterAndKeepMsgs(convId, predicate) {
      const conv = StorageUtils.getOneConversation(convId);
      if (!conv) return;
@@ -248,6 +303,11 @@ const StorageUtils = {
      conv.lastModified = Date.now();
      localStorage.setItem(convId, JSON.stringify(conv));
    },
+  /**
+   * remove last message from conversation
+   * @param {string} convId
+   * @returns {Message | undefined}
+   */
    popMsg(convId) {
      const conv = StorageUtils.getOneConversation(convId);
      if (!conv) return;
@@ -322,10 +382,12 @@ const mainApp = createApp({
    data() {
      return {
        conversations: StorageUtils.getAllConversations(),
-      messages: [], // { id: number, role: 'user' | 'assistant', content: string }
+      /** @type {Array<Message>} */
+      messages: [],
        viewingConvId: StorageUtils.getNewConvId(),
        inputMsg: '',
        isGenerating: false,
+      /** @type {Array<Message> | null} */
        pendingMsg: null, // the on-going message from assistant
        stopGeneration: () => {},
        selectedTheme: StorageUtils.getTheme(),
@@ -333,6 +395,7 @@ const mainApp = createApp({
        showConfigDialog: false,
        // const
        themes: THEMES,
+      /** @type {CONFIG_DEFAULT} */
        configDefault: {...CONFIG_DEFAULT},
        configInfo: {...CONFIG_INFO},
        isDev,
@@ -425,42 +488,50 @@ const mainApp = createApp({
        this.isGenerating = true;
  
        try {
+        /** @type {CONFIG_DEFAULT} */
+        const config = this.config;
          const abortController = new AbortController();
          this.stopGeneration = () => abortController.abort();
+        /** @type {Array<APIMessage>} */
+        let messages = [
+          { role: 'system', content: config.systemMessage },
+          ...normalizeMsgsForAPI(this.messages),
+        ];
+        if (config.excludeThoughtOnReq) {
+          messages = filterThoughtFromMsgs(messages);
+        }
+        if (isDev) console.log({messages});
          const params = {
-          messages: [
-            { role: 'system', content: this.config.systemMessage },
-            ...this.messages,
-          ],
+          messages,
            stream: true,
            cache_prompt: true,
-          samplers: this.config.samplers,
-          temperature: this.config.temperature,
-          dynatemp_range: this.config.dynatemp_range,
-          dynatemp_exponent: this.config.dynatemp_exponent,
-          top_k: this.config.top_k,
-          top_p: this.config.top_p,
-          min_p: this.config.min_p,
-          typical_p: this.config.typical_p,
-          xtc_probability: this.config.xtc_probability,
-          xtc_threshold: this.config.xtc_threshold,
-          repeat_last_n: this.config.repeat_last_n,
-          repeat_penalty: this.config.repeat_penalty,
-          presence_penalty: this.config.presence_penalty,
-          frequency_penalty: this.config.frequency_penalty,
-          dry_multiplier: this.config.dry_multiplier,
-          dry_base: this.config.dry_base,
-          dry_allowed_length: this.config.dry_allowed_length,
-          dry_penalty_last_n: this.config.dry_penalty_last_n,
-          max_tokens: this.config.max_tokens,
-          timings_per_token: !!this.config.showTokensPerSecond,
-          ...(this.config.custom.length ? JSON.parse(this.config.custom) : {}),
+          samplers: config.samplers,
+          temperature: config.temperature,
+          dynatemp_range: config.dynatemp_range,
+          dynatemp_exponent: config.dynatemp_exponent,
+          top_k: config.top_k,
+          top_p: config.top_p,
+          min_p: config.min_p,
+          typical_p: config.typical_p,
+          xtc_probability: config.xtc_probability,
+          xtc_threshold: config.xtc_threshold,
+          repeat_last_n: config.repeat_last_n,
+          repeat_penalty: config.repeat_penalty,
+          presence_penalty: config.presence_penalty,
+          frequency_penalty: config.frequency_penalty,
+          dry_multiplier: config.dry_multiplier,
+          dry_base: config.dry_base,
+          dry_allowed_length: config.dry_allowed_length,
+          dry_penalty_last_n: config.dry_penalty_last_n,
+          max_tokens: config.max_tokens,
+          timings_per_token: !!config.showTokensPerSecond,
+          ...(config.custom.length ? JSON.parse(config.custom) : {}),
          };
          const chunks = sendSSEPostRequest(`${BASE_URL}/v1/chat/completions`, {
            method: 'POST',
            headers: {
              'Content-Type': 'application/json',
-            ...(this.config.apiKey ? {'Authorization': `Bearer ${this.config.apiKey}`} : {})
+            ...(config.apiKey ? {'Authorization': `Bearer ${config.apiKey}`} : {})
            },
            body: JSON.stringify(params),
            signal: abortController.signal,
@@ -477,7 +548,7 @@ const mainApp = createApp({
              };
            }
            const timings = chunk.timings;
-          if (timings && this.config.showTokensPerSecond) {
+          if (timings && config.showTokensPerSecond) {
              // only extract what's really needed, to save some space
              this.pendingMsg.timings = {
                prompt_n: timings.prompt_n,
@@ -598,3 +669,33 @@ try {
      <button class="btn" onClick="localStorage.clear(); window.location.reload();">Clear localStorage</button>
    </div>`;
  }
+
+/**
+ * filter out redundant fields upon sending to API
+ * @param {Array<APIMessage>} messages
+ * @returns {Array<APIMessage>}
+ */
+function normalizeMsgsForAPI(messages) {
+  return messages.map((msg) => {
+    return {
+      role: msg.role,
+      content: msg.content,
+    };
+  });
+}
+
+/**
+ * recommended for DeepsSeek-R1, filter out content between <think> and </think> tags
+ * @param {Array<APIMessage>} messages
+ * @returns {Array<APIMessage>}
+ */
+function filterThoughtFromMsgs(messages) {
+  return messages.map((msg) => {
+    return {
+      role: msg.role,
+      content: msg.role === 'assistant'
+        ? msg.content.split('</think>').at(-1).trim()
+        : msg.content,
+    };
+  });
+}
author	stduhpf <redacted>
	Fri, 24 Jan 2025 08:02:38 +0000 (09:02 +0100)
committer	GitHub <redacted>
	Fri, 24 Jan 2025 08:02:38 +0000 (09:02 +0100)
examples/server/public/index.html.gz		patch \| blob \| history
examples/server/webui/index.html		patch \| blob \| history
examples/server/webui/src/main.js		patch \| blob \| history