SimpleChat v3.1: Boolean chat request options in Settings UI, cache_prompt (#7950)

author HanishKVC <redacted>

Tue, 25 Jun 2024 11:27:35 +0000 (16:57 +0530)

committer GitHub <redacted>

Tue, 25 Jun 2024 11:27:35 +0000 (21:27 +1000)
author HanishKVC <redacted>
Tue, 25 Jun 2024 11:27:35 +0000 (16:57 +0530)
committer GitHub <redacted>
Tue, 25 Jun 2024 11:27:35 +0000 (21:27 +1000)
diff --git a/examples/server/public_simplechat/readme.md b/examples/server/public_simplechat/readme.md

index 2dc17782552569736238d27093e5cfe6d28363a7..21410199f60169b5f8c696e246424c4f82cfd1b3 100644 (file)
--- a/examples/server/public_simplechat/readme.md
+++ b/examples/server/public_simplechat/readme.md
@@ -3,6 +3,13 @@
  
  by Humans for All.
  
+## quickstart
+
+To run from the build dir
+
+bin/llama-server -m path/model.gguf --path ../examples/server/public_simplechat
+
+Continue reading for the details.
  
  ## overview
  
@@ -14,6 +21,8 @@ own system prompts.
  This allows seeing the generated text / ai-model response in oneshot at the end, after it is fully generated,
  or potentially as it is being generated, in a streamed manner from the server/ai-model.
  
+![Chat and Settings screens](./simplechat_screens.webp "Chat and Settings screens")
+
  Auto saves the chat session locally as and when the chat is progressing and inturn at a later time when you
  open SimpleChat, option is provided to restore the old chat session, if a matching one exists.
  
@@ -170,17 +179,23 @@ It is attached to the document object. Some of these can also be updated using t
      The histogram/freq based trimming logic is currently tuned for english language wrt its
      is-it-a-alpabetic|numeral-char regex match logic.
  
-  chatRequestOptions - maintains the list of options/fields to send along with chat request,
+  apiRequestOptions - maintains the list of options/fields to send along with api request,
    irrespective of whether /chat/completions or /completions endpoint.
  
      If you want to add additional options/fields to send to the server/ai-model, and or
      modify the existing options value or remove them, for now you can update this global var
      using browser's development-tools/console.
  
-    For string and numeric fields in chatRequestOptions, including even those added by a user
-    at runtime by directly modifying gMe.chatRequestOptions, setting ui entries will be auto
+    For string, numeric and boolean fields in apiRequestOptions, including even those added by a
+    user at runtime by directly modifying gMe.apiRequestOptions, setting ui entries will be auto
      created.
  
+    cache_prompt option supported by example/server is allowed to be controlled by user, so that
+    any caching supported wrt system-prompt and chat history, if usable can get used. When chat
+    history sliding window is enabled, cache_prompt logic may or may not kick in at the backend
+    wrt same, based on aspects related to model, positional encoding, attention mechanism etal.
+    However system prompt should ideally get the benefit of caching.
+
    headers - maintains the list of http headers sent when request is made to the server. By default
    Content-Type is set to application/json. Additionally Authorization entry is provided, which can
    be set if needed using the settings ui.
@@ -197,10 +212,10 @@ It is attached to the document object. Some of these can also be updated using t
      >0 : Send the latest chat history from the latest system prompt, limited to specified cnt.
  
  
-By using gMe's iRecentUserMsgCnt and chatRequestOptions.max_tokens one can try to control the
-implications of loading of the ai-model's context window by chat history, wrt chat response to
-some extent in a simple crude way. You may also want to control the context size enabled when
-the server loads ai-model, on the server end.
+By using gMe's iRecentUserMsgCnt and apiRequestOptions.max_tokens/n_predict one can try to control
+the implications of loading of the ai-model's context window by chat history, wrt chat response to
+some extent in a simple crude way. You may also want to control the context size enabled when the
+server loads ai-model, on the server end.
  
  
  Sometimes the browser may be stuborn with caching of the file, so your updates to html/css/js
@@ -237,12 +252,12 @@ also be started with a model context size of 1k or more, to be on safe side.
    internal n_predict, for now add the same here on the client side, maybe later add max_tokens
    to /completions endpoint handling code on server side.
  
-NOTE: One may want to experiment with frequency/presence penalty fields in chatRequestOptions
-wrt the set of fields sent to server along with the user query. To check how the model behaves
+NOTE: One may want to experiment with frequency/presence penalty fields in apiRequestOptions
+wrt the set of fields sent to server along with the user query, to check how the model behaves
  wrt repeatations in general in the generated text response.
  
  A end-user can change these behaviour by editing gMe from browser's devel-tool/console or by
-using the providing settings ui.
+using the provided settings ui (for settings exposed through the ui).
  
  
  ### OpenAi / Equivalent API WebService
@@ -253,7 +268,7 @@ for a minimal chatting experimentation by setting the below.
  * the baseUrl in settings ui
    * https://api.openai.com/v1 or similar
  
-* Wrt request body - gMe.chatRequestOptions
+* Wrt request body - gMe.apiRequestOptions
    * model (settings ui)
    * any additional fields if required in future
  
diff --git a/examples/server/public_simplechat/simplechat.js b/examples/server/public_simplechat/simplechat.js

index 25afb2564913916a83245b51258f5a4fcc8495cd..8e0df3b61df2b1b279b6419583d2f3dfd6542b0c 100644 (file)
--- a/examples/server/public_simplechat/simplechat.js
+++ b/examples/server/public_simplechat/simplechat.js
@@ -222,8 +222,8 @@ class SimpleChat {
       * @param {Object} obj
       */
      request_jsonstr_extend(obj) {
-        for(let k in gMe.chatRequestOptions) {
-            obj[k] = gMe.chatRequestOptions[k];
+        for(let k in gMe.apiRequestOptions) {
+            obj[k] = gMe.apiRequestOptions[k];
          }
          if (gMe.bStream) {
              obj["stream"] = true;
@@ -740,11 +740,12 @@ class Me {
              "Authorization": "", // Authorization: Bearer OPENAI_API_KEY
          }
          // Add needed fields wrt json object to be sent wrt LLM web services completions endpoint.
-        this.chatRequestOptions = {
+        this.apiRequestOptions = {
              "model": "gpt-3.5-turbo",
              "temperature": 0.7,
              "max_tokens": 1024,
              "n_predict": 1024,
+            "cache_prompt": false,
              //"frequency_penalty": 1.2,
              //"presence_penalty": 1.2,
          };
@@ -800,51 +801,55 @@ class Me {
  
              ui.el_create_append_p(`bStream:${this.bStream}`, elDiv);
  
-            ui.el_create_append_p(`bCompletionFreshChatAlways:${this.bCompletionFreshChatAlways}`, elDiv);
-
-            ui.el_create_append_p(`bCompletionInsertStandardRolePrefix:${this.bCompletionInsertStandardRolePrefix}`, elDiv);
-
              ui.el_create_append_p(`bTrimGarbage:${this.bTrimGarbage}`, elDiv);
  
+            ui.el_create_append_p(`ApiEndPoint:${this.apiEP}`, elDiv);
+
              ui.el_create_append_p(`iRecentUserMsgCnt:${this.iRecentUserMsgCnt}`, elDiv);
  
-            ui.el_create_append_p(`ApiEndPoint:${this.apiEP}`, elDiv);
+            ui.el_create_append_p(`bCompletionFreshChatAlways:${this.bCompletionFreshChatAlways}`, elDiv);
+
+            ui.el_create_append_p(`bCompletionInsertStandardRolePrefix:${this.bCompletionInsertStandardRolePrefix}`, elDiv);
  
          }
  
-        ui.el_create_append_p(`chatRequestOptions:${JSON.stringify(this.chatRequestOptions, null, " - ")}`, elDiv);
+        ui.el_create_append_p(`apiRequestOptions:${JSON.stringify(this.apiRequestOptions, null, " - ")}`, elDiv);
          ui.el_create_append_p(`headers:${JSON.stringify(this.headers, null, " - ")}`, elDiv);
  
      }
  
      /**
-     * Auto create ui input elements for fields in ChatRequestOptions
+     * Auto create ui input elements for fields in apiRequestOptions
       * Currently supports text and number field types.
       * @param {HTMLDivElement} elDiv
       */
-    show_settings_chatrequestoptions(elDiv) {
+    show_settings_apirequestoptions(elDiv) {
          let typeDict = {
              "string": "text",
              "number": "number",
          };
          let fs = document.createElement("fieldset");
          let legend = document.createElement("legend");
-        legend.innerText = "ChatRequestOptions";
+        legend.innerText = "ApiRequestOptions";
          fs.appendChild(legend);
          elDiv.appendChild(fs);
-        for(const k in this.chatRequestOptions) {
-            let val = this.chatRequestOptions[k];
+        for(const k in this.apiRequestOptions) {
+            let val = this.apiRequestOptions[k];
              let type = typeof(val);
-            if (!((type == "string") || (type == "number"))) {
-                continue;
+            if (((type == "string") || (type == "number"))) {
+                let inp = ui.el_creatediv_input(`Set${k}`, k, typeDict[type], this.apiRequestOptions[k], (val)=>{
+                    if (type == "number") {
+                        val = Number(val);
+                    }
+                    this.apiRequestOptions[k] = val;
+                });
+                fs.appendChild(inp.div);
+            } else if (type == "boolean") {
+                let bbtn = ui.el_creatediv_boolbutton(`Set{k}`, k, {true: "true", false: "false"}, val, (userVal)=>{
+                    this.apiRequestOptions[k] = userVal;
+                });
+                fs.appendChild(bbtn.div);
              }
-            let inp = ui.el_creatediv_input(`Set${k}`, k, typeDict[type], this.chatRequestOptions[k], (val)=>{
-                if (type == "number") {
-                    val = Number(val);
-                }
-                this.chatRequestOptions[k] = val;
-            });
-            fs.appendChild(inp.div);
          }
      }
  
@@ -870,32 +875,32 @@ class Me {
          });
          elDiv.appendChild(bb.div);
  
-        bb = ui.el_creatediv_boolbutton("SetCompletionFreshChatAlways", "CompletionFreshChatAlways", {true: "[+] yes fresh", false: "[-] no, with history"}, this.bCompletionFreshChatAlways, (val)=>{
-            this.bCompletionFreshChatAlways = val;
+        bb = ui.el_creatediv_boolbutton("SetTrimGarbage", "TrimGarbage", {true: "[+] yes trim", false: "[-] dont trim"}, this.bTrimGarbage, (val)=>{
+            this.bTrimGarbage = val;
          });
          elDiv.appendChild(bb.div);
  
-        bb = ui.el_creatediv_boolbutton("SetCompletionInsertStandardRolePrefix", "CompletionInsertStandardRolePrefix", {true: "[+] yes insert", false: "[-] dont insert"}, this.bCompletionInsertStandardRolePrefix, (val)=>{
-            this.bCompletionInsertStandardRolePrefix = val;
-        });
-        elDiv.appendChild(bb.div);
+        this.show_settings_apirequestoptions(elDiv);
  
-        bb = ui.el_creatediv_boolbutton("SetTrimGarbage", "TrimGarbage", {true: "[+] yes trim", false: "[-] dont trim"}, this.bTrimGarbage, (val)=>{
-            this.bTrimGarbage = val;
+        let sel = ui.el_creatediv_select("SetApiEP", "ApiEndPoint", ApiEP.Type, this.apiEP, (val)=>{
+            this.apiEP = ApiEP.Type[val];
          });
-        elDiv.appendChild(bb.div);
+        elDiv.appendChild(sel.div);
  
-        let sel = ui.el_creatediv_select("SetChatHistoryInCtxt", "ChatHistoryInCtxt", this.sRecentUserMsgCnt, this.iRecentUserMsgCnt, (val)=>{
+        sel = ui.el_creatediv_select("SetChatHistoryInCtxt", "ChatHistoryInCtxt", this.sRecentUserMsgCnt, this.iRecentUserMsgCnt, (val)=>{
              this.iRecentUserMsgCnt = this.sRecentUserMsgCnt[val];
          });
          elDiv.appendChild(sel.div);
  
-        sel = ui.el_creatediv_select("SetApiEP", "ApiEndPoint", ApiEP.Type, this.apiEP, (val)=>{
-            this.apiEP = ApiEP.Type[val];
+        bb = ui.el_creatediv_boolbutton("SetCompletionFreshChatAlways", "CompletionFreshChatAlways", {true: "[+] yes fresh", false: "[-] no, with history"}, this.bCompletionFreshChatAlways, (val)=>{
+            this.bCompletionFreshChatAlways = val;
          });
-        elDiv.appendChild(sel.div);
+        elDiv.appendChild(bb.div);
  
-        this.show_settings_chatrequestoptions(elDiv);
+        bb = ui.el_creatediv_boolbutton("SetCompletionInsertStandardRolePrefix", "CompletionInsertStandardRolePrefix", {true: "[+] yes insert", false: "[-] dont insert"}, this.bCompletionInsertStandardRolePrefix, (val)=>{
+            this.bCompletionInsertStandardRolePrefix = val;
+        });
+        elDiv.appendChild(bb.div);
  
      }
  
diff --git a/examples/server/public_simplechat/simplechat_screens.webp b/examples/server/public_simplechat/simplechat_screens.webp

new file mode 100644 (file)

index 0000000..ccea443

Binary files /dev/null and b/examples/server/public_simplechat/simplechat_screens.webp differ
author	HanishKVC <redacted>
	Tue, 25 Jun 2024 11:27:35 +0000 (16:57 +0530)
committer	GitHub <redacted>
	Tue, 25 Jun 2024 11:27:35 +0000 (21:27 +1000)
examples/server/public_simplechat/readme.md		patch \| blob \| history
examples/server/public_simplechat/simplechat.js		patch \| blob \| history
examples/server/public_simplechat/simplechat_screens.webp	[new file with mode: 0644]	patch \| blob