-CC_SDL=`sdl2-config --cflags --libs`
+UNAME_S := $(shell uname -s)
+UNAME_P := $(shell uname -p)
+UNAME_M := $(shell uname -m)
+
+#
+# Compile flags
+#
+
+CFLAGS = -O3 -std=c11
+CXXFLAGS = -O3 -std=c++11
+
+CFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function
+CXXFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function
+
+# OS specific
+# TODO: support Windows
+ifeq ($(UNAME_S),Linux)
+ CFLAGS += -pthread
+endif
+ifeq ($(UNAME_S),Darwin)
+ CFLAGS += -pthread
+endif
+
+# Architecture specific
+ifeq ($(UNAME_P),x86_64)
+ CFLAGS += -mavx -mavx2 -mfma -mf16c
+endif
+ifneq ($(filter arm%,$(UNAME_P)),)
+ CFLAGS += -mfpu=neon
+endif
+ifneq ($(filter aarch64%,$(UNAME_M)),)
+ CFLAGS += -mfpu=neon
+endif
+ifneq ($(filter armv%,$(UNAME_M)),)
+ # Raspberry Pi 4
+ CFLAGS += -mcpu=cortex-a72 -mfloat-abi=hard -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
+endif
+
+#
+# Build library + main
+#
-main: ggml.o whisper.o main.o
- g++ -pthread -o main ggml.o whisper.o main.o
+main: main.cpp ggml.o whisper.o
+ $(CXX) $(CXXFLAGS) main.cpp whisper.o ggml.o -o main
./main -h
ggml.o: ggml.c ggml.h
- gcc -pthread -O3 -mavx -mavx2 -mfma -mf16c -c ggml.c
+ $(CC) $(CFLAGS) -c ggml.c
whisper.o: whisper.cpp whisper.h
- gcc -pthread -O3 -std=c++11 -c whisper.cpp
+ $(CXX) $(CXXFLAGS) -c whisper.cpp
-main.o: main.cpp ggml.h
- g++ -pthread -O3 -std=c++11 -c main.cpp
-
-stream: stream.cpp
- g++ -pthread -O3 -std=c++11 -o stream stream.cpp ggml.o whisper.o $(CC_SDL)
-
-# clean up the directory
clean:
rm -f *.o main
+#
+# Examples
+#
+
+CC_SDL=`sdl2-config --cflags --libs`
+
+stream: stream.cpp ggml.o whisper.o
+ $(CXX) $(CXXFLAGS) stream.cpp ggml.o whisper.o -o stream $(CC_SDL)
+
+#
+# Audio samples
+#
+
# download a few audio samples into folder "./samples":
.PHONY: samples
samples:
@ffmpeg -loglevel -0 -y -i samples/mm1.wav -ar 16000 -ac 1 -c:a pcm_s16le samples/mm0.wav
@rm samples/mm1.wav
+#
+# Models
+#
# if not already downloaded, the following targets download the specified model and
# runs it on all samples in the folder "./samples":
// convert to mono, float
pcmf32.resize(n);
if (wav.channels == 1) {
- for (size_t i = 0; i < n; i++) {
+ for (int i = 0; i < n; i++) {
pcmf32[i] = float(pcm16[i])/32768.0f;
}
} else {
- for (size_t i = 0; i < n; i++) {
+ for (int i = 0; i < n; i++) {
pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
}
}
const auto & mel_inp = wctx.mel;
const auto & hparams = model.hparams;
- const int n_vocab = hparams.n_vocab;
-
const int n_ctx = hparams.n_audio_ctx;
const int n_state = hparams.n_audio_state;
const int n_head = hparams.n_audio_head;
bool done = false;
int seek_delta = 100*WHISPER_CHUNK_SIZE;
- whisper_token last_id = 0;
// print the prompt
//printf("\n\n");
// feel free to experiment!
//
{
- const int n_vocab = whisper_n_vocab(ctx);
-
whisper_token id = 0;
whisper_token tid = whisper_token_beg(ctx);
seek_delta = 2*(id - whisper_token_beg(ctx));
result_len = i + 1;
}
- last_id = id;
// add it to the context
prompt.push_back(id);
std::string text = "";
- for (int i = 0; i < result_cur.size(); i++) {
+ for (int i = 0; i < (int) result_cur.size(); i++) {
if (params.print_special_tokens == false && result_cur[i].id >= whisper_token_eot(ctx)) {
} else {
text += whisper_token_to_str(ctx, result_cur[i].id);
result_all.push_back({ t0, t1, text });
}
text = "";
- while (result_cur[i].id > whisper_token_beg(ctx) && i < result_cur.size()) {
+ while (result_cur[i].id > whisper_token_beg(ctx) && i < (int) result_cur.size()) {
i++;
}
i--;