]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commitdiff
yt-wsp.sh : script to easily transcribe VODs
authorGeorgi Gerganov <redacted>
Sat, 26 Nov 2022 10:53:23 +0000 (12:53 +0200)
committerGeorgi Gerganov <redacted>
Sat, 26 Nov 2022 10:54:42 +0000 (12:54 +0200)
Thanks to @DaniruKun
ref: https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818

Usage:

  cd whisper.cpp
  make

  ./examples/yt-wsp.sh <video-url>

README.md
examples/yt-wsp.sh [new file with mode: 0755]

index 5f0270637b910058f62c215481a88a66ade86e06..fd141b88f46f2b60087b4d91a887895c62cc94b0 100644 (file)
--- a/README.md
+++ b/README.md
@@ -466,5 +466,6 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch
 | [examples/whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
 | [examples/generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
 | [examples/livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
+| [examples/yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
 
 ## [Frequently asked questions (#126)](https://github.com/ggerganov/whisper.cpp/discussions/126)
diff --git a/examples/yt-wsp.sh b/examples/yt-wsp.sh
new file mode 100755 (executable)
index 0000000..0e41b1c
--- /dev/null
@@ -0,0 +1,132 @@
+#!/usr/bin/env bash
+
+# Small shell script to more easily automatically download and transcribe live stream VODs.
+# This uses YT-DLP, ffmpeg and the CPP version of Whisper: https://github.com/ggerganov/whisper.cpp
+# Use `./transcribe-vod help` to print help info.
+
+# MIT License
+
+# Copyright (c) 2022 Daniils Petrovs
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set -Eeuo pipefail
+
+# You can find how to download models in the OG repo: https://github.com/ggerganov/whisper.cpp/#usage
+MODEL_PATH="${MODEL_PATH:-models/ggml-base.en.bin}" # Set to a multilingual model if you want to translate from foreign lang to en
+WHISPER_EXECUTABLE="${WHISPER_EXECUTABLE:-whisper}" # Where to find the whisper.cpp executable
+WHISPER_LANG="${WHISPER_LANG:-en}" # Set to desired lang to translate from
+
+msg() {
+    echo >&2 -e "${1-}"
+}
+
+cleanup() {
+    msg "Cleaning up..."
+    rm -rf "${temp_dir}" "vod-resampled.wav" "vod-resampled.wav.srt"
+}
+
+print_help() {
+    echo "Usage: ./transcribe-vod <video_url>"
+    echo "See configurable env variables in the script"
+    echo "This will produce an MP4 muxed file called res.mp4 in the working directory"
+    echo "Requirements: ffmpeg yt-dlp whisper"
+    echo "Whisper needs to be built into the main binary with make, then you can rename it to something like 'whisper' and add it to your PATH for convenience."
+    echo "E.g. in the root of Whisper.cpp, run: 'make && cp ./main /usr/local/bin/whisper'"
+}
+
+check_requirements() {
+    if ! command -v ffmpeg &>/dev/null; then
+        echo "ffmpeg is required (https://ffmpeg.org)."
+        exit 1
+    fi
+
+    if ! command -v yt-dlp &>/dev/null; then
+        echo "yt-dlp is required (https://github.com/yt-dlp/yt-dlp)."
+        exit 1
+    fi
+
+    if ! command -v "$WHISPER_EXECUTABLE" &>/dev/null; then
+        WHISPER_EXECUTABLE="./main"
+        if ! command -v "$WHISPER_EXECUTABLE" &>/dev/null; then
+            echo "Whisper is required (https://github.com/ggerganov/whisper.cpp)."
+            exit 1
+        fi
+    fi
+}
+
+if [[ $# -lt 1 ]]; then
+    print_help
+    exit 1
+fi
+
+if [[ "$1" == "help" ]]; then
+    print_help
+    exit 0
+fi
+
+temp_dir="tmp"
+source_url="$1"
+
+check_requirements
+
+msg "Downloading VOD..."
+
+# Optionally add --cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER] for members only VODs
+yt-dlp \
+    -f "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best" \
+    --embed-thumbnail \
+    --embed-chapters \
+    --xattrs \
+    "${source_url}" -o "${temp_dir}/vod.mp4"
+
+msg "Extracting audio and resampling..."
+
+ffmpeg -i "${temp_dir}/vod.mp4" \
+    -hide_banner \
+    -loglevel error \
+    -ar 16000 \
+    -ac 1 \
+    -c:a \
+    pcm_s16le -y "vod-resampled.wav"
+
+msg "Transcribing to subtitle file..."
+msg "Whisper specified at: ${WHISPER_EXECUTABLE}"
+
+$WHISPER_EXECUTABLE \
+    -m "${MODEL_PATH}" \
+    -l "${WHISPER_LANG}" \
+    -f "vod-resampled.wav" \
+    -t 8 \
+    -osrt \
+    --translate
+
+msg "Embedding subtitle track..."
+
+ffmpeg -i "${temp_dir}/vod.mp4" \
+    -hide_banner \
+    -loglevel error \
+    -i "vod-resampled.wav.srt" \
+    -c copy \
+    -c:s mov_text \
+    -y res.mp4
+
+cleanup
+
+msg "Done! Your finished file is ready: res.mp4"