#!/bin/bash # # vosk can also output JSON that includes the timing of each individual # detected WORD! # NOTE: I had an error when I did this and needed to PATCH some PYTHON code in VOSK # see VOSKPATCH.TXT vosk-transcriber -l en-us -i w25mia60.wav -t json -o w25mia60.json # use a python script to translate the VOSK json output # into a VTT with JSON objects for each caption #python3 scripts/voskjson2vtt.py w25mia60.json w25mia60_words.vtt # Now check out vtt_words.html