-Wstrict-prototypes \
-Wpointer-arith \
")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
+ -Wall \
+ -Wextra \
+ -Wpedantic \
+ -Wcast-qual \
+ ")
else()
# todo : msvc
endif()
return true;
}
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
fprintf(stderr, "\n");
fprintf(stderr, "usage: %s [options]\n", argv[0]);
fprintf(stderr, "\n");
return true;
}
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
fprintf(stderr, "\n");
fprintf(stderr, "usage: %s [options]\n", argv[0]);
fprintf(stderr, "\n");
float energy_all = 0.0f;
float energy_last = 0.0f;
- for (size_t i = 0; i < n_samples; i++) {
+ for (int i = 0; i < n_samples; i++) {
energy_all += fabsf(pcmf32[i]);
if (i >= n_samples - n_samples_last) {
whisper_token tokens[1024];
allowed_tokens.emplace_back();
- for (int l = 0; l < cmd.size(); ++l) {
+ for (int l = 0; l < (int) cmd.size(); ++l) {
// NOTE: very important to add the whitespace !
// the reason is that the first decoded token starts with a whitespace too!
std::string ss = std::string(" ") + cmd.substr(0, l + 1);
// best command
{
+ const auto t_end = std::chrono::high_resolution_clock::now();
+
fprintf(stdout, "\n");
fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__,
"\033[1m", allowed_commands[probs_id[0].second].c_str(), "\033[0m", probs_id[0].first,
- (int) std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - t_start).count());
+ (int) std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count());
fprintf(stdout, "\n");
}
- const auto t_end = std::chrono::high_resolution_clock::now();
-
audio.clear();
}
}
return true;
}
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
fprintf(stderr, "\n");
fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
fprintf(stderr, "\n");
// karaoke video generation
// outputs a bash script that uses ffmpeg to generate a video with the subtitles
// TODO: font parameter adjustments
-bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & params, float t_sec) {
+bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & /*params*/, float t_sec) {
std::ofstream fout(fname);
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
txt_ul = "\\ \\ ";
{
- int ncnt = 0;
for (int k = 0; k < n; ++k) {
const auto & token2 = tokens[k];
txt_ul += "\\ ";
}
}
-
- ncnt += txt.size();
}
::replace_all(txt_bg, "'", "\u2019");
{
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
- wparams.encoder_begin_callback = [](struct whisper_context * ctx, void * user_data) {
+ wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, void * user_data) {
bool is_aborted = *(bool*)user_data;
return !is_aborted;
};
return true;
}
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
fprintf(stderr, "\n");
fprintf(stderr, "usage: %s [options]\n", argv[0]);
fprintf(stderr, "\n");
float energy_all = 0.0f;
float energy_last = 0.0f;
- for (size_t i = 0; i < n_samples; i++) {
+ for (int i = 0; i < n_samples; i++) {
energy_all += fabsf(pcmf32[i]);
if (i >= n_samples - n_samples_last) {
const float * logits,
int top_k,
double top_p,
- double temp,
+ double /*temp*/,
std::mt19937 & rng) {
int n_logits = vocab.id_to_token.size();
fin.read((char *) &len, sizeof(len));
word.resize(len);
- fin.read((char *) word.data(), len);
+ fin.read((char *) &word[0], len);
vocab.token_to_id[word] = i;
vocab.id_to_token[i] = word;
std::string result;
- for (int i = embd.size(); i < embd_inp.size() + n_predict; i++) {
+ for (int i = embd.size(); i < (int) embd_inp.size() + n_predict; i++) {
// predict
if (embd.size() > 0) {
if (!gpt2_eval(ctx->model, ctx->n_threads, n_past, embd, embd_w, mem_per_token)) {
return true;
}
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
fprintf(stderr, "\n");
fprintf(stderr, "usage: %s [options]\n", argv[0]);
fprintf(stderr, "\n");
float energy_all = 0.0f;
float energy_last = 0.0f;
- for (size_t i = 0; i < n_samples; i++) {
+ for (int i = 0; i < n_samples; i++) {
energy_all += fabsf(pcmf32[i]);
if (i >= n_samples - n_samples_last) {
bool force_speak = false;
float prob0 = 0.0f;
- float prob = 0.0f;
std::vector<float> pcmf32_cur;
std::vector<float> pcmf32_prompt;
const ggml_type wtype = model.hparams.f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
size_t ctx_size = 0;
- size_t ctx_mem_size = 0;
{
const auto & hparams = model.hparams;
ctx_size += n_text_layer*( n_text_state*ggml_type_size(GGML_TYPE_F32)); // cross_attn_ln_1_b
}
- ctx_mem_size += n_text_layer*n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_k
- ctx_mem_size += n_text_layer*n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_v
-
- ctx_mem_size += n_text_layer*n_audio_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_cross_k
- ctx_mem_size += n_text_layer*n_audio_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_cross_v
-
ctx_size += (15 + 15*n_audio_layer + 24*n_text_layer)*256; // object overhead
fprintf(stderr, "%s: ggml ctx size = %7.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
static bool log_mel_spectrogram(
const float * samples,
const int n_samples,
- const int sample_rate,
+ const int /*sample_rate*/,
const int fft_size,
const int fft_step,
const int n_mel,