-mc N, --max-context N [-1 ] maximum number of text context tokens to store
-ml N, --max-len N [0 ] maximum segment length in characters
-sow, --split-on-word [false ] split on word rather than on token
- -bo N, --best-of N [2 ] number of best candidates to keep
- -bs N, --beam-size N [-1 ] beam size for beam search
+ -bo N, --best-of N [5 ] number of best candidates to keep
+ -bs N, --beam-size N [5 ] beam size for beam search
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
-et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
-lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
-fp, --font-path [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video
-ocsv, --output-csv [false ] output result in a CSV file
-oj, --output-json [false ] output result in a JSON file
+ -ojf, --output-json-full [false ] include more information in the JSON file
-of FNAME, --output-file FNAME [ ] output file path (without file extension)
-ps, --print-special [false ] print special tokens
-pc, --print-colors [false ] print colors
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
-f FNAME, --file FNAME [ ] input WAV file path
-oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
- -ls, --log-score [false ] log best decoder scores of token
+ -ls, --log-score [false ] log best decoder scores of tokens
+ -ng, --no-gpu [false ] disable GPU
bash ./models/download-ggml-model.sh base.en
-d N, --duration N [0 ] duration of audio to process in milliseconds\r
-mc N, --max-context N [-1 ] maximum number of text context tokens to store\r
-ml N, --max-len N [0 ] maximum segment length in characters\r
+ -sow, --split-on-word [false ] split on word rather than on token\r
-bo N, --best-of N [5 ] number of best candidates to keep\r
- -bs N, --beam-size N [-1 ] beam size for beam search\r
+ -bs N, --beam-size N [5 ] beam size for beam search\r
-wt N, --word-thold N [0.01 ] word timestamp probability threshold\r
-et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail\r
-lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail\r
- -su, --speed-up [false ] speed up audio by x2 (reduced accuracy)\r
+ -debug, --debug-mode [false ] enable debug mode (eg. dump log_mel)\r
-tr, --translate [false ] translate from source language to english\r
-di, --diarize [false ] stereo audio diarization\r
+ -tdrz, --tinydiarize [false ] enable tinydiarize (requires a tdrz model)\r
-nf, --no-fallback [false ] do not use temperature fallback while decoding\r
-otxt, --output-txt [false ] output result in a text file\r
-ovtt, --output-vtt [false ] output result in a vtt file\r
-osrt, --output-srt [false ] output result in a srt file\r
+ -olrc, --output-lrc [false ] output result in a lrc file\r
-owts, --output-words [false ] output script for generating karaoke video\r
+ -fp, --font-path [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video\r
-ocsv, --output-csv [false ] output result in a CSV file\r
-oj, --output-json [false ] output result in a JSON file\r
+ -ojf, --output-json-full [false ] include more information in the JSON file\r
-of FNAME, --output-file FNAME [ ] output file path (without file extension)\r
-ps, --print-special [false ] print special tokens\r
-pc, --print-colors [false ] print colors\r
-pp, --print-progress [false ] print progress\r
- -nt, --no-timestamps [true ] do not print timestamps\r
+ -nt, --no-timestamps [false ] do not print timestamps\r
-l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)\r
+ -dl, --detect-language [false ] exit after automatically detecting language\r
--prompt PROMPT [ ] initial prompt\r
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path\r
-f FNAME, --file FNAME [ ] input WAV file path\r
+ -oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference\r
+ -ls, --log-score [false ] log best decoder scores of tokens\r
+ -ng, --no-gpu [false ] disable GPU\r
```\r