build/
build-debug/
build-*/
+out/
compile_commands.json
+CMakeSettings.json
+.vs/
+.vscode/
.exrc
.cache
#define M_PI 3.14159265358979323846
#endif
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
for (int i = 1; i < argc; i++) {
std::string arg = argv[i];
}
}
- fprintf(stderr, "%s : %lu tests failed out of %lu tests.\n", __func__, n_fails, tests.size());
+ fprintf(stderr, "%s : %zu tests failed out of %zu tests.\n", __func__, n_fails, tests.size());
}
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) {
#include <string>
#include <vector>
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
// default hparams (Dolly-V2 3B)
struct dollyv2_hparams {
int32_t n_vocab = 50254; // tokenizer.vocab_size
#include <string>
#include <vector>
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
// default hparams (GPT-2 117M)
struct gpt2_hparams {
int32_t n_vocab = 50257;
#include <string>
#include <vector>
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
+
// default hparams (GPT-J 6B)
struct gptj_hparams {
int32_t n_vocab = 50400;
#include <string>
#include <vector>
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
// default hparams (StableLM 3B)
struct gpt_neox_hparams {
int32_t n_vocab = 50257;
#include <fstream>
#include <vector>
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
// evaluate the MNIST compute graph
//
// - fname_cgraph: path to the compute graph
#include <vector>
#include <algorithm>
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
// default hparams
struct mnist_hparams {
int32_t n_input = 784;
#include <utility>
#include <vector>
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
// no defaults for now
struct mpt_hparams {
int32_t d_model = 0;
printf("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
for (size_t i = 0; i < embd_inp.size(); i++) {
- printf("%s: token[%lu] = %6d\n", __func__, i, embd_inp[i]);
+ printf("%s: token[%zu] = %6d\n", __func__, i, embd_inp[i]);
}
printf("\n");
#include <map>
#include <stdint.h>
#include <string>
-#include <unistd.h>
#include <unordered_map>
#include <utility>
#include <vector>
+#if defined(_WIN32)
+#define NOMINMAX
+#include <Windows.h>
+bool is_stdin_terminal() {
+ auto in = GetStdHandle(STD_INPUT_HANDLE);
+ return GetFileType(in) == FILE_TYPE_CHAR;
+}
+#else
+#include <unistd.h>
+bool is_stdin_terminal() {
+ return isatty(STDIN_FILENO);
+}
+#endif
+
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
using piece_t = std::pair<std::size_t, float>;
using piece_map_t = std::unordered_map<std::string, piece_t>;
std::mt19937 rng(params.seed);
if (params.prompt.empty()) {
- if (!isatty(STDIN_FILENO)) {
+ if (!is_stdin_terminal()) {
std::string line;
while (std::getline(std::cin, line)) {
params.prompt = params.prompt + "\n" + line;
printf("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
for (int i = 0; i < embd_inp.size(); i++) {
- printf("%s: token[%d] = %6lu\n", __func__, i, embd_inp[i]);
+ printf("%s: token[%d] = %6zu\n", __func__, i, embd_inp[i]);
// vocab.id_to_token.at(embd_inp[i]).c_str()
}
printf("\n");
#include <string>
#include <vector>
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
// default hparams (GPT-2 117M)
// https://huggingface.co/bigcode/gpt_bigcode-santacoder/blob/main/config.json
struct starcoder_hparams {
#include <vector>
#include <cstring>
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
// Terminal color map. 10 colors grouped in ranges [0.0, 0.1, ..., 0.9]
// Lowest is red, middle is yellow, highest is green.
const std::vector<std::string> k_colors = {
#include <regex>
#include <random>
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
#if defined(GGML_BIG_ENDIAN)
#include <bit>
-// Defines CLOCK_MONOTONIC on Linux
-#define _GNU_SOURCE
+#define _GNU_SOURCE // Defines CLOCK_MONOTONIC on Linux
+#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
#include "ggml.h"
+#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
#include "ggml.h"
#include <math.h>
#include <stdlib.h>
#include <assert.h>
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
#define MAX_NARGS 3
#undef MIN
+#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
#include "ggml/ggml.h"
#include <math.h>
#include <assert.h>
#include <inttypes.h>
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
#define MAX_NARGS 2
float frand() {
#include <float.h>
#include <stdint.h>
#include <stdio.h>
+#include <inttypes.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
-#include <time.h>
#include <math.h>
-#include <sys/time.h>
-
#if defined(__ARM_NEON)
#include "arm_neon.h"
#elif defined(__AVX__) || defined(__AVX2__)
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#endif
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#include <intrin.h>
+#define __builtin_popcountll __popcnt64
+#endif
+
const int M = 1280;
const int N = 1536;
const int K = 1280;
return (float) rand() / (float) RAND_MAX;
}
-uint64_t get_time_us() {
- struct timeval tv;
- gettimeofday(&tv, NULL);
- return tv.tv_sec * 1000000 + tv.tv_usec;
-}
-
#if defined(__AVX2__)
// horizontally reduce 8 32-bit integers
static inline uint32_t _mm256_hadd_epi32_gg(__m256i v) {
s1[b + 1] = d1*(1 << b);
}
- m0[0] = -1ULL;
- m1[0] = -1ULL;
+ m0[0] = 0-1ULL;
+ m1[0] = 0-1ULL;
for (int s = 0; s < QK/gq_t_bits; ++s) {
for (int b = 0; b < QB; b++) {
int main(int argc, const char ** argv) {
assert(sizeof(gq_quant_t)*8 == gq_t_bits);
+ ggml_time_init();
// needed to initialize f16 tables
{
// convert fp32 -> gq
{
- const uint64_t t_start = get_time_us();
+ const int64_t t_start = ggml_time_us();
if (method == 1) {
quantize_1(src0, src0_gq, M, K);
quantize_6(src1, src1_gq, N, K);
}
- const uint64_t t_end = get_time_us();
+ const int64_t t_end = ggml_time_us();
printf("convert time: %f ms / method = %d\n", (t_end - t_start) / 1000.0, method);
}
const int nIter = 1;
- const clock_t start = clock();
- const uint64_t start_us = get_time_us();
+ const int64_t start = ggml_cycles();
+ const int64_t start_us = ggml_time_us();
double iM = 1.0/M;
double sum = 0.0f;
}
{
- const clock_t end = clock();
- const uint64_t end_us = get_time_us();
- printf("%s: elapsed ticks: %ld\n", __func__, end - start);
+ const int64_t end = ggml_cycles();
+ const int64_t end_us = ggml_time_us();
+ printf("%s: elapsed ticks: %" PRIu64 "\n", __func__, end - start);
printf("%s: elapsed us: %d / %f ms\n", __func__, (int)(end_us - start_us), (end_us - start_us) / 1000.0 / nIter);
}
dst[i] = sum;
}
}
-
-typedef float afloat __attribute__ ((__aligned__(32)));
+#if defined(_MSC_VER)
+typedef float __declspec(align(32)) afloat;
+#else
+typedef float afloat __attribute__((__aligned__(32)));
+#endif
void mul_mat_vec_f32_1(
const afloat *restrict src0,
const afloat *restrict src1,
for (unsigned i = 0; i < nrows; i++) {
float sum = 0.0f;
- const void * row = src0 + i*ncols*sizeof(float);
- const void * col = src1;
+ const char * row = (const char*)src0 + i*ncols*sizeof(float);
+ const char * col = (const char*)src1;
for (unsigned j = 0; j < ncols; j++) {
sum += (*(float *)row) * (*(float *)col);
row += sizeof(float);
col += sizeof(float);
}
*(float *)d = sum;
- d += sizeof(float);
+ d = (char*)d + sizeof(float);
}
}
+#if defined(_MSC_VER)
+void* aligned_alloc(size_t alignment, size_t size) {
+ return _aligned_malloc(size, alignment);
+}
+#endif
+
int main(int argc, const char ** argv) {
//float * src0 = malloc(sizeof(float)*N*M);
//float * src1 = malloc(sizeof(float)*M);
afloat * src1 = (float *)(aligned_alloc(32, sizeof(float)*M));
afloat * dst = (float *)(aligned_alloc(32, sizeof(float)*N));
- for (unsigned i = 0; i < N*M; i++) {
- src0[i] = i;
+ for (int i = 0; i < N*M; i++) {
+ src0[i] = (afloat)i;
}
- for (unsigned i = 0; i < M; i++) {
- src1[i] = i;
+ for (int i = 0; i < M; i++) {
+ src1[i] = (afloat)i;
}
const int nIter = 10;
//mul_mat_vec_f32_0(src0, src1, dst, N, M);
mul_mat_vec_f32_1(src0, src1, dst, N, M);
//mul_mat_vec_f32_2(src0, src1, dst, N, M);
- for (unsigned i = 0; i < N; i++) {
+ for (int i = 0; i < N; i++) {
sum += dst[i];
}
}
+#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
#include "ggml/ggml.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
bool is_close(float a, float b, float epsilon) {
return fabs(a - b) < epsilon;
}
l)
)
),
- ggml_new_f32(ctx0, NP)
+ ggml_new_f32(ctx0, (float)NP)
),
ggml_mul(ctx0,
ggml_sum(ctx0, ggml_sqr(ctx0, x)),