english : use `typos` to fix comments and logs (#4354)

author Richard Kiss <redacted>

Tue, 12 Dec 2023 09:53:36 +0000 (01:53 -0800)

committer GitHub <redacted>

Tue, 12 Dec 2023 09:53:36 +0000 (11:53 +0200)
author Richard Kiss <redacted>
Tue, 12 Dec 2023 09:53:36 +0000 (01:53 -0800)
committer GitHub <redacted>
Tue, 12 Dec 2023 09:53:36 +0000 (11:53 +0200)
diff --git a/common/log.h b/common/log.h

index c0e814861e0c6944e3cce91552f3096d9546bc31..e4e1b9f4f01aa8a23b7f6593be204bb379a37f08 100644 (file)
--- a/common/log.h
+++ b/common/log.h
@@ -61,13 +61,13 @@
  //  #define LOG_TARGET stderr
  //  #include "log.h"
  //
-//  The log target can also be redirected to a diffrent function
+//  The log target can also be redirected to a different function
  //  like so:
  //
-//  #define LOG_TARGET log_handler_diffrent()
+//  #define LOG_TARGET log_handler_different()
  //  #include "log.h"
  //
-//  FILE* log_handler_diffrent()
+//  FILE* log_handler_different()
  //  {
  //      return stderr;
  //  }
@@ -421,7 +421,7 @@ inline FILE *log_handler2_impl(bool change = false, LogTriState append = LogTriS
  
  // Disables logs entirely at runtime.
  //  Makes LOG() and LOG_TEE() produce no output,
-//  untill enabled back.
+//  until enabled back.
  #define log_disable() log_disable_impl()
  
  // INTERNAL, DO NOT USE
diff --git a/convert.py b/convert.py

index 6e95d6cb37e7958a8801dedf427e75734cd2cd85..a6fc6b8ea893309b421d97ed3ecb2bcd79000ac5 100755 (executable)
--- a/convert.py
+++ b/convert.py
@@ -585,7 +585,7 @@ def merge_multifile_models(models_plus: list[ModelPlus]) -> ModelPlus:
  
      if any("model.embed_tokens.weight" in mp.model for mp in models_plus):
          # Transformers models put different tensors in different files, but
-        # don't split indivdual tensors between files.
+        # don't split individual tensors between files.
          model: LazyModel = {}
          for mp in models_plus:
              model.update(mp.model)
@@ -678,7 +678,7 @@ class LazyUnpickler(pickle.Unpickler):
          return func(*args)
  
      CLASSES: dict[tuple[str, str], Any] = {
-        # getattr used here as a workaround for mypy not being smart enough to detrmine
+        # getattr used here as a workaround for mypy not being smart enough to determine
          # the staticmethods have a __func__ attribute.
          ('torch._tensor', '_rebuild_from_type_v2'): getattr(rebuild_from_type_v2, '__func__'),
          ('torch._utils', '_rebuild_tensor_v2'): getattr(lazy_rebuild_tensor_v2, '__func__'),
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp

index fc0656c231a0cf1e6a0f94bd60a49181e423da75..4bb7b93b63440c176772e7a32f97e622b208a982 100644 (file)
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -739,7 +739,7 @@ bool clip_image_preprocess(const clip_ctx * ctx, const clip_image_u8 * img, clip
          temp->ny = longer_side;
          temp->size = 3 * longer_side * longer_side;
          temp->data = new uint8_t[temp->size]();
-        uint8_t bc[3] = {122, 116, 104}; // bakground color in RGB from LLaVA
+        uint8_t bc[3] = {122, 116, 104}; // background color in RGB from LLaVA
  
          // fill with background color
          for (size_t i = 0; i < temp->size; i++) {
diff --git a/examples/llava/convert-image-encoder-to-gguf.py b/examples/llava/convert-image-encoder-to-gguf.py

index 729aaef8f0fd21690583eae756981d6e19dadd2a..03688e0ea1889added422e3d3ea75de406bc86f2 100644 (file)
--- a/examples/llava/convert-image-encoder-to-gguf.py
+++ b/examples/llava/convert-image-encoder-to-gguf.py
@@ -51,7 +51,7 @@ def bytes_to_unicode():
      The reversible bpe codes work on unicode strings.
      This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
      When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
-    This is a signficant percentage of your normal, say, 32K bpe vocab.
+    This is a significant percentage of your normal, say, 32K bpe vocab.
      To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
      And avoids mapping to whitespace/control characters the bpe code barfs on.
      """
diff --git a/examples/lookahead/README.md b/examples/lookahead/README.md

index 252a6689ef52857370ff424ae7b5e95916618f77..a69a471b47d397c2318787df77081066903a87d6 100644 (file)
--- a/examples/lookahead/README.md
+++ b/examples/lookahead/README.md
@@ -1,6 +1,6 @@
  # llama.cpp/examples/lookahead
  
-Demonstartion of lookahead decoding technique:
+Demonstration of lookahead decoding technique:
  
  https://lmsys.org/blog/2023-11-21-lookahead-decoding/
  
diff --git a/examples/server/json.hpp b/examples/server/json.hpp

index 4d1a37ad7cb874769d911fc3362d567da3aca291..ea945f346d67b57acfc4cd9758a76565e3947f46 100644 (file)
--- a/examples/server/json.hpp
+++ b/examples/server/json.hpp
@@ -11227,7 +11227,7 @@ class binary_reader
                  }
                  if (is_ndarray) // ndarray dimensional vector can only contain integers, and can not embed another array
                  {
-                    return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, exception_message(input_format, "ndarray dimentional vector is not allowed", "size"), nullptr));
+                    return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, exception_message(input_format, "ndarray dimensional vector is not allowed", "size"), nullptr));
                  }
                  std::vector<size_t> dim;
                  if (JSON_HEDLEY_UNLIKELY(!get_ubjson_ndarray_size(dim)))
diff --git a/examples/server/public/completion.js b/examples/server/public/completion.js

index b9c442509a2fa2d7d230f5443287569adc81bf28..c281f0fbd55350b74ba62e7587ae58f19c8848fb 100644 (file)
--- a/examples/server/public/completion.js
+++ b/examples/server/public/completion.js
@@ -114,7 +114,7 @@ export async function* llama(prompt, params = {}, config = {}) {
    return content;
  }
  
-// Call llama, return an event target that you can subcribe to
+// Call llama, return an event target that you can subscribe to
  //
  // Example:
  //
diff --git a/examples/server/public/index.html b/examples/server/public/index.html

index 175c52478918a9faa2480700f7017dd3cfd333da..18a6ccf0f38473edad61c3dbf72d893ef2529e0e 100644 (file)
--- a/examples/server/public/index.html
+++ b/examples/server/public/index.html
@@ -238,7 +238,7 @@
        cache_prompt: true
      })
  
-    /* START: Support for storing prompt templates and parameters in borwser LocalStorage */
+    /* START: Support for storing prompt templates and parameters in browsers LocalStorage */
  
      const local_storage_storageKey = "llamacpp_server_local_storage";
  
@@ -282,7 +282,7 @@
      let importedTemplates = local_storage_getDataAsObject('user_templates')
  
      if (importedTemplates) {
-      // saved templates were successfuly imported.
+      // saved templates were successfully imported.
  
        console.log('Processing saved templates and updating default template')
        params.value = { ...params.value, image_data: [] };
@@ -303,7 +303,7 @@
      }
  
      function userTemplateResetToDefault() {
-      console.log('Reseting themplate to default')
+      console.log('Resetting template to default')
        selectedUserTemplate.value.name = 'default';
        selectedUserTemplate.value.data = savedUserTemplates.value['default'];
      }
diff --git a/examples/speculative/README.md b/examples/speculative/README.md

index d88fd37901443fb8867d05534e64421274001fbc..814efa592d94fabca1b184d8f699c557558b5b07 100644 (file)
--- a/examples/speculative/README.md
+++ b/examples/speculative/README.md
@@ -1,6 +1,6 @@
  # llama.cpp/examples/speculative
  
-Demonstartion of speculative decoding and tree-based speculative decoding techniques
+Demonstration of speculative decoding and tree-based speculative decoding techniques
  
  More info:
  
diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp

index dca3f84a5756273071819c965c6eed8cbf978473..20f1fb5bfcd99392efa2a8855825053ccdd071a1 100644 (file)
--- a/examples/speculative/speculative.cpp
+++ b/examples/speculative/speculative.cpp
@@ -428,7 +428,7 @@ int main(int argc, char ** argv) {
              ++n_past_tgt;
          }
  
-        // the first token is always proposed by the traget model before the speculation loop so we erase it here
+        // the first token is always proposed by the target model before the speculation loop so we erase it here
          for (int s = 0; s < n_seq_dft; ++s) {
              if (!drafts[s].active) {
                  continue;
diff --git a/ggml-alloc.h b/ggml-alloc.h

index ad87cebc8873f4584ad99e2c67e110a603f4a71c..64a412468915b47c58ce100bb2dfcaf4d5502b32 100644 (file)
--- a/ggml-alloc.h
+++ b/ggml-alloc.h
@@ -43,7 +43,7 @@ GGML_API size_t ggml_allocr_alloc_graph(ggml_allocr_t alloc, struct ggml_cgraph
  // ggml-backend v2 API
  //
  
-// Seperate tensor and graph allocator objects
+// Separate tensor and graph allocator objects
  // This is necessary for multi-backend allocation because the graph allocator needs to use multiple tensor allocators
  // The original API is kept as a wrapper around the new API
  
diff --git a/ggml-quants.c b/ggml-quants.c

index 7285d5f7fbcc00ce41b5d481b702ecc52c5671f6..0e8163a16b39549671363ac859cad2a7e0aaeefa 100644 (file)
--- a/ggml-quants.c
+++ b/ggml-quants.c
@@ -3114,7 +3114,7 @@ void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restri
  
      size_t vl = __riscv_vsetvl_e8m1(qk/2);
  
-    // These tempory registers are for masking and shift operations
+    // These temporary registers are for masking and shift operations
      vuint32m2_t vt_1 = __riscv_vid_v_u32m2(vl);
      vuint32m2_t vt_2 = __riscv_vsll_vv_u32m2(__riscv_vmv_v_x_u32m2(1, vl), vt_1, vl);
  
@@ -4757,7 +4757,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
  
              vl = 16;
  
-            // retreive lane to multiply with scale
+            // retrieve lane to multiply with scale
              vint32m2_t aux0_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a0, 0), (scale[0]), vl);
              vint32m2_t aux0_1 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a0, 1), (scale[1]), vl);
              vint32m2_t aux1_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a1, 0), (scale[2]), vl);
diff --git a/ggml.c b/ggml.c

index ca56f063c3a87440353e3efce428c18e003517fa..eb7989dc45cefed7f51089148c0be21911fa2422 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -1,4 +1,4 @@
-#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
+#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnings on Windows
  #define _USE_MATH_DEFINES // For M_PI on MSVC
  
  #include "ggml-impl.h"
@@ -33,7 +33,7 @@
  // we should just be careful :)
  #pragma warning(disable: 4244 4267)
  
-// disable POSIX deprecation warnigns
+// disable POSIX deprecation warnings
  // these functions are never going away, anyway
  #pragma warning(disable: 4996)
  #endif
@@ -1760,7 +1760,7 @@ static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size
  static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
  
  // WARN:
-// Mis-confguration can lead to problem that's hard to reason about:
+// Mis-configuration can lead to problem that's hard to reason about:
  // * At best  it crash or talks nosense.
  // * At worst it talks slightly difference but hard to perceive.
  //
@@ -7520,7 +7520,7 @@ static void ggml_compute_forward_acc_f32(
      GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
  
      // view src0 and dst with these strides and data offset inbytes during acc
-    // nb0 is implicitely element_size because src0 and dst are contiguous
+    // nb0 is implicitly element_size because src0 and dst are contiguous
      size_t nb1     = ((int32_t *) dst->op_params)[0];
      size_t nb2     = ((int32_t *) dst->op_params)[1];
      size_t nb3     = ((int32_t *) dst->op_params)[2];
@@ -10161,7 +10161,7 @@ static void ggml_compute_forward_set_f32(
      GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
  
      // view src0 and dst with these strides and data offset inbytes during set
-    // nb0 is implicitely element_size because src0 and dst are contiguous
+    // nb0 is implicitly element_size because src0 and dst are contiguous
      size_t nb1     = ((int32_t *) dst->op_params)[0];
      size_t nb2     = ((int32_t *) dst->op_params)[1];
      size_t nb3     = ((int32_t *) dst->op_params)[2];
@@ -14475,7 +14475,7 @@ void ggml_build_backward_gradient_checkpointing(
              // insert new tensors recomputing src, reusing already made replacements,
              // remember replacements: remember new tensors with mapping from corresponding gf nodes
              // recurse for input tensors,
-            // unless (i.e. terminating when) input tensors are replacments (like checkpoints)
+            // unless (i.e. terminating when) input tensors are replacements (like checkpoints)
              node->src[k] = ggml_recompute_graph_node(ctx, gf, replacements, node->src[k]);
          }
          // insert rewritten backward node with replacements made into resulting backward graph gb
diff --git a/gguf-py/README.md b/gguf-py/README.md

index 502b6a510cc70dcb2d62288b94b9b37fe1e72949..a27d2fc0e10215b805c726897f53777cee851a53 100644 (file)
--- a/gguf-py/README.md
+++ b/gguf-py/README.md
@@ -61,7 +61,7 @@ If you want to publish the package manually for any reason, you need to have `tw
  pip install build twine
  ```
  
-Then, folow these steps to release a new version:
+Then, follow these steps to release a new version:
  
  1. Bump the version in `pyproject.toml`.
  2. Build the package:
diff --git a/llama.cpp b/llama.cpp

index 93d8f3e164a26790c86cc4ba10ad23ee1effc0e4..54fa9e43eb605c10859fa141ca396717417d7105 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -2758,7 +2758,7 @@ static void llm_load_vocab(
          // The assumption is, since special tokens aren't meant to be exposed to end user, they are designed
          //  to be unmatchable by the tokenizer, therefore tokens from the vocab, which are unmatchable by the tokenizer
          //  are special tokens.
-        // From testing, this appears to corelate 1:1 with special tokens.
+        // From testing, this appears to correlate 1:1 with special tokens.
          //
  
          // Counting special tokens and verifying in only one direction
@@ -5846,7 +5846,7 @@ static int llama_decode_internal(
      const int64_t n_embd  = hparams.n_embd;
      const int64_t n_vocab = hparams.n_vocab;
  
-    // helpers for smoother batch API transistion
+    // helpers for smoother batch API transition
      // after deprecating the llama_eval calls, these will be removed
      std::vector<llama_pos> pos;
  
@@ -6625,12 +6625,12 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list<
  
                  // loop over the text
                  while (true) {
-                    // find the first occurence of a given special token in this fragment
+                    // find the first occurrence of a given special token in this fragment
                      //  passing offset argument only limit the "search area" but match coordinates
                      //  are still relative to the source full raw_text
                      auto match = raw_text->find(special_token, raw_text_base_offset);
  
-                    // no occurences found, stop processing this fragment for a given special token
+                    // no occurrences found, stop processing this fragment for a given special token
                      if (match == std::string::npos) break;
  
                      // check if match is within bounds of offset <-> length
@@ -7829,7 +7829,7 @@ struct llama_beam_search_data {
      }
  
      // Min-heaps are used to efficiently collect the top-k elements (k=n_beams).
-    // The repetative patterns below reflect the 2 stages of heaps:
+    // The repetitive patterns below reflect the 2 stages of heaps:
      //  * Gather elements until the vector is full, then call std::make_heap() on it.
      //  * If the heap is full and a new element is found that should be included, pop the
      //    least element to the back(), replace it with the new, then push it into the heap.
diff --git a/tests/test-grad0.cpp b/tests/test-grad0.cpp

index 7fe9154ddbb1663106a4845dcf8d515070873e13..81c20a89cb586b14cf5b264170f96e0d84f39d56 100644 (file)
--- a/tests/test-grad0.cpp
+++ b/tests/test-grad0.cpp
@@ -1,4 +1,4 @@
-#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
+#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnings on Windows
  #include "ggml.h"
  
  #include <cmath>
diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp

index 88fac0e23106bc4f11c4899da9c12479a29c352a..62d0190f9066c010d565e92c94442c628179525f 100644 (file)
--- a/tests/test-quantize-perf.cpp
+++ b/tests/test-quantize-perf.cpp
@@ -117,7 +117,7 @@ static void usage(char * argv[]) {
      printf("  --size SIZE           set test size, divisible by 32 (L1_SIZE:%d)\n", L1_SIZE);
      printf("  -3                    use size as L1, L2, L3 sizes (L1:%d L2:%d L3:%d)\n", L1_SIZE, L2_SIZE, L3_SIZE);
      printf("  -4                    use size as L1, L2, L3, MEM sizes (L1:%d L2:%d L3:%d MEM:%d)\n", L1_SIZE, L2_SIZE, L3_SIZE, MEM_SIZE);
-    printf("  --op OP               set test opration as quantize_row_q_reference, quantize_row_q, dequantize_row_q,\n");
+    printf("  --op OP               set test operation as quantize_row_q_reference, quantize_row_q, dequantize_row_q,\n");
      printf("                        quantize_row_q_dot, vec_dot_q (all)\n");
      printf("  --type TYPE           set test type as");
      for (int i = 0; i < GGML_TYPE_COUNT; i++) {
@@ -202,7 +202,7 @@ int main(int argc, char * argv[]) {
              }
              int alignment = std::stoi(argv[i]);
              if (alignment < 0 || alignment > MAX_ALIGNMENT) {
-            fprintf(stderr, "error: aligment-offset must be less than %d\n", MAX_ALIGNMENT);
+            fprintf(stderr, "error: alignment-offset must be less than %d\n", MAX_ALIGNMENT);
                  invalid_param = true;
                  break;
              }
author	Richard Kiss <redacted>
	Tue, 12 Dec 2023 09:53:36 +0000 (01:53 -0800)
committer	GitHub <redacted>
	Tue, 12 Dec 2023 09:53:36 +0000 (11:53 +0200)
common/log.h		patch \| blob \| history
convert.py		patch \| blob \| history
examples/llava/clip.cpp		patch \| blob \| history
examples/llava/convert-image-encoder-to-gguf.py		patch \| blob \| history
examples/lookahead/README.md		patch \| blob \| history
examples/server/json.hpp		patch \| blob \| history
examples/server/public/completion.js		patch \| blob \| history
examples/server/public/index.html		patch \| blob \| history
examples/speculative/README.md		patch \| blob \| history
examples/speculative/speculative.cpp		patch \| blob \| history
ggml-alloc.h		patch \| blob \| history
ggml-quants.c		patch \| blob \| history
ggml.c		patch \| blob \| history
gguf-py/README.md		patch \| blob \| history
llama.cpp		patch \| blob \| history
tests/test-grad0.cpp		patch \| blob \| history
tests/test-quantize-perf.cpp		patch \| blob \| history