File indexing completed on 2024-05-05 04:52:35

0001 #!/usr/bin/env python3
0002 
0003 # SPDX-FileCopyrightText: 2021 Jean-Baptiste Mardelle <jb@kdenlive.org>
0004 # SPDX-License-Identifier: GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
0005 
0006 #pip3 install vosk
0007 #pip3 install srt
0008 
0009 from vosk import Model, KaldiRecognizer, SetLogLevel
0010 import sys
0011 import os
0012 import wave
0013 import subprocess
0014 import srt
0015 import json
0016 import datetime
0017 
0018 SetLogLevel(-1)
0019 
0020 os.chdir(sys.argv[1])
0021 
0022 if not os.path.exists(sys.argv[2]):
0023     print ("Please download the model from https://alphacephei.com/vosk/models and unpack as ", sys.argv[2]," in the current folder.")
0024     exit (1)
0025 
0026 if sys.platform == 'darwin':
0027     from os.path import abspath, dirname, join
0028     path = abspath(join(dirname(__file__), '../../MacOS/ffmpeg'))
0029 else:
0030     path = 'ffmpeg'
0031 
0032 sample_rate=16000
0033 model = Model(sys.argv[2])
0034 rec = KaldiRecognizer(model, sample_rate)
0035 rec.SetWords(True)
0036 
0037 process = subprocess.Popen([path, '-loglevel', 'quiet', '-i',
0038                             sys.argv[3],
0039                             '-ar', str(sample_rate) , '-ac', '1', '-f', 's16le', '-'],
0040                             stdout=subprocess.PIPE)
0041 WORDS_PER_LINE = 7
0042 
0043 def transcribe():
0044     results = []
0045     subs = []
0046     progress = 0
0047     while True:
0048        data = process.stdout.read(4000)
0049        print("progress:" + str(progress), file = sys.stdout, flush=True)
0050        progress += 1
0051        if len(data) == 0:
0052            break
0053        if rec.AcceptWaveform(data):
0054            results.append(rec.Result())
0055     results.append(rec.FinalResult())
0056 
0057     for i, res in enumerate(results):
0058        jres = json.loads(res)
0059        if not 'result' in jres:
0060            continue
0061        words = jres['result']
0062        for j in range(0, len(words), WORDS_PER_LINE):
0063            line = words[j : j + WORDS_PER_LINE] 
0064            s = srt.Subtitle(index=len(subs), 
0065                    content=" ".join([l['word'] for l in line]),
0066                    start=datetime.timedelta(seconds=line[0]['start']), 
0067                    end=datetime.timedelta(seconds=line[-1]['end']))
0068            subs.append(s)
0069     return subs
0070 
0071 subtitle = srt.compose(transcribe())
0072 #print (subtitle)
0073 with open(sys.argv[4], 'w',encoding='utf8') as f:
0074     f.writelines(subtitle)
0075 f.close()