ggml : fix builds, add ggml-quants-k.o (close #1712, close #1710)

author Georgi Gerganov <redacted>

Tue, 6 Jun 2023 07:18:03 +0000 (10:18 +0300)

committer Georgi Gerganov <redacted>

Tue, 6 Jun 2023 07:18:03 +0000 (10:18 +0300)
author Georgi Gerganov <redacted>
Tue, 6 Jun 2023 07:18:03 +0000 (10:18 +0300)
committer Georgi Gerganov <redacted>
Tue, 6 Jun 2023 07:18:03 +0000 (10:18 +0300)
diff --git a/.gitignore b/.gitignore

index 6cf5c45a752f619c43177f473cc184bddd599281..9b6905ed4ef0c30943123048a9d2b07cba299213 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -35,6 +35,7 @@ models/*
  /benchmark-matmult
  /vdot
  /Pipfile
+/libllama.so
  
  build-info.h
  arm_neon.h
diff --git a/Makefile b/Makefile

index 7c9e7f739aa386ff290c4dd6c1adbfa4f63a42b4..0205f1959d3860d8c947fe0b1d8a147c41d66364 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -243,7 +243,7 @@ llama.o: llama.cpp ggml.h ggml-cuda.h llama.h llama-util.h
  common.o: examples/common.cpp examples/common.h
         $(CXX) $(CXXFLAGS) -c $< -o $@
  
-libllama.so: llama.o ggml.o $(OBJS)
+libllama.so: llama.o ggml.o ggml-quants-k.o $(OBJS)
         $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
  
  clean:
@@ -253,28 +253,28 @@ clean:
  # Examples
  #
  
-main: examples/main/main.cpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS)
+main: examples/main/main.cpp                                  build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS)
         $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
         @echo
         @echo '====  Run ./main -h for help.  ===='
         @echo
  
-quantize: examples/quantize/quantize.cpp build-info.h ggml.o llama.o ggml-quants-k.o $(OBJS)
+quantize: examples/quantize/quantize.cpp                      build-info.h ggml.o ggml-quants-k.o llama.o $(OBJS)
         $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
  
-quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.h ggml.o llama.o ggml-quants-k.o $(OBJS)
+quantize-stats: examples/quantize-stats/quantize-stats.cpp    build-info.h ggml.o ggml-quants-k.o llama.o $(OBJS)
         $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
  
-perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.o llama.o common.o ggml-quants-k.o $(OBJS)
+perplexity: examples/perplexity/perplexity.cpp                build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS)
         $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
  
-embedding: examples/embedding/embedding.cpp build-info.h ggml.o llama.o common.o ggml-quants-k.o $(OBJS)
+embedding: examples/embedding/embedding.cpp                   build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS)
         $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
  
-save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o ggml-quants-k.o $(OBJS)
+save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS)
         $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
  
-server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp build-info.h ggml.o llama.o common.o $(OBJS)
+server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS)
         $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS)
  
  build-info.h: $(wildcard .git/index) scripts/build-info.sh
@@ -289,7 +289,7 @@ build-info.h: $(wildcard .git/index) scripts/build-info.sh
  # Tests
  #
  
-benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o $(OBJS)
+benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o ggml-quants-k.o $(OBJS)
         $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
         ./$@
  
diff --git a/ggml.c b/ggml.c

index 4e3e7edb98b39ad5485d151acb4590e369977896..8308dd9913d3cb27776370d6d766a63ae0177531 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -14753,7 +14753,7 @@ static void ggml_graph_export_leaf(const struct ggml_tensor * tensor, FILE * fou
      const int64_t * ne = tensor->ne;
      const size_t  * nb = tensor->nb;
  
-    fprintf(fout, "%-6s %-12s %8d %8lld %8lld %8lld %8lld %16zu %16zu %16zu %16zu %16p %32s\n",
+    fprintf(fout, "%-6s %-12s %8d %8jd %jd %jd %jd %16zu %16zu %16zu %16zu %16p %32s\n",
              ggml_type_name(tensor->type),
              ggml_op_name  (tensor->op),
              tensor->n_dims,
@@ -14767,7 +14767,7 @@ static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char
      const int64_t * ne = tensor->ne;
      const size_t  * nb = tensor->nb;
  
-    fprintf(fout, "%-6s %-6s %-12s %8d %8lld %8lld %8lld %8lld %16zu %16zu %16zu %16zu %8d %16p %32s\n",
+    fprintf(fout, "%-6s %-6s %-12s %8d %jd %jd %jd %jd %16zu %16zu %16zu %16zu %8d %16p %32s\n",
              arg,
              ggml_type_name(tensor->type),
              ggml_op_name  (tensor->op),
@@ -14796,11 +14796,11 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
          FILE * fout = stdout;
  
          fprintf(fout, "\n");
-        fprintf(fout, "%-16s %8x\n",   "magic",   GGML_FILE_MAGIC);
-        fprintf(fout, "%-16s %8d\n",   "version", GGML_FILE_VERSION);
-        fprintf(fout, "%-16s %8d\n",   "leafs",   cgraph->n_leafs);
-        fprintf(fout, "%-16s %8d\n",   "nodes",   cgraph->n_nodes);
-        fprintf(fout, "%-16s %8llu\n", "eval",    size_eval);
+        fprintf(fout, "%-16s %8x\n",  "magic",   GGML_FILE_MAGIC);
+        fprintf(fout, "%-16s %8d\n",  "version", GGML_FILE_VERSION);
+        fprintf(fout, "%-16s %8d\n",  "leafs",   cgraph->n_leafs);
+        fprintf(fout, "%-16s %8d\n",  "nodes",   cgraph->n_nodes);
+        fprintf(fout, "%-16s %8ju\n", "eval",    size_eval);
  
          // header
          fprintf(fout, "\n");
@@ -15033,7 +15033,11 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
  
          data = ggml_new_tensor_1d(*ctx_data, GGML_TYPE_I8, fsize);
  
-        fread(data->data, sizeof(char), fsize, fin);
+        const size_t ret = fread(data->data, sizeof(char), fsize, fin);
+        if (ret != fsize) {
+            fprintf(stderr, "%s: failed to read %s\n", __func__, fname);
+            return result;
+        }
  
          fclose(fin);
      }
author	Georgi Gerganov <redacted>
	Tue, 6 Jun 2023 07:18:03 +0000 (10:18 +0300)
committer	Georgi Gerganov <redacted>
	Tue, 6 Jun 2023 07:18:03 +0000 (10:18 +0300)
.gitignore		patch \| blob \| history
Makefile		patch \| blob \| history
ggml.c		patch \| blob \| history