diff --git a/README.md b/README.md index c790af36..a01d53a0 100644 --- a/README.md +++ b/README.md @@ -110,8 +110,8 @@ options: -mc N, --max-context N [-1 ] maximum number of text context tokens to store -ml N, --max-len N [0 ] maximum segment length in characters -sow, --split-on-word [false ] split on word rather than on token - -bo N, --best-of N [2 ] number of best candidates to keep - -bs N, --beam-size N [-1 ] beam size for beam search + -bo N, --best-of N [5 ] number of best candidates to keep + -bs N, --beam-size N [5 ] beam size for beam search -wt N, --word-thold N [0.01 ] word timestamp probability threshold -et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail -lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail @@ -128,6 +128,7 @@ options: -fp, --font-path [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video -ocsv, --output-csv [false ] output result in a CSV file -oj, --output-json [false ] output result in a JSON file + -ojf, --output-json-full [false ] include more information in the JSON file -of FNAME, --output-file FNAME [ ] output file path (without file extension) -ps, --print-special [false ] print special tokens -pc, --print-colors [false ] print colors @@ -139,7 +140,8 @@ options: -m FNAME, --model FNAME [models/ggml-base.en.bin] model path -f FNAME, --file FNAME [ ] input WAV file path -oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference - -ls, --log-score [false ] log best decoder scores of token + -ls, --log-score [false ] log best decoder scores of tokens + -ng, --no-gpu [false ] disable GPU bash ./models/download-ggml-model.sh base.en diff --git a/examples/main/README.md b/examples/main/README.md index 68a3e3b5..2d868810 100644 --- a/examples/main/README.md +++ b/examples/main/README.md @@ -17,28 +17,37 @@ options: -d N, --duration N [0 ] duration of audio to process in milliseconds -mc N, --max-context N [-1 ] maximum number of text context tokens to store -ml N, --max-len N [0 ] maximum segment length in characters + -sow, --split-on-word [false ] split on word rather than on token -bo N, --best-of N [5 ] number of best candidates to keep - -bs N, --beam-size N [-1 ] beam size for beam search + -bs N, --beam-size N [5 ] beam size for beam search -wt N, --word-thold N [0.01 ] word timestamp probability threshold -et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail -lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail - -su, --speed-up [false ] speed up audio by x2 (reduced accuracy) + -debug, --debug-mode [false ] enable debug mode (eg. dump log_mel) -tr, --translate [false ] translate from source language to english -di, --diarize [false ] stereo audio diarization + -tdrz, --tinydiarize [false ] enable tinydiarize (requires a tdrz model) -nf, --no-fallback [false ] do not use temperature fallback while decoding -otxt, --output-txt [false ] output result in a text file -ovtt, --output-vtt [false ] output result in a vtt file -osrt, --output-srt [false ] output result in a srt file + -olrc, --output-lrc [false ] output result in a lrc file -owts, --output-words [false ] output script for generating karaoke video + -fp, --font-path [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video -ocsv, --output-csv [false ] output result in a CSV file -oj, --output-json [false ] output result in a JSON file + -ojf, --output-json-full [false ] include more information in the JSON file -of FNAME, --output-file FNAME [ ] output file path (without file extension) -ps, --print-special [false ] print special tokens -pc, --print-colors [false ] print colors -pp, --print-progress [false ] print progress - -nt, --no-timestamps [true ] do not print timestamps + -nt, --no-timestamps [false ] do not print timestamps -l LANG, --language LANG [en ] spoken language ('auto' for auto-detect) + -dl, --detect-language [false ] exit after automatically detecting language --prompt PROMPT [ ] initial prompt -m FNAME, --model FNAME [models/ggml-base.en.bin] model path -f FNAME, --file FNAME [ ] input WAV file path + -oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference + -ls, --log-score [false ] log best decoder scores of tokens + -ng, --no-gpu [false ] disable GPU ```