File indexing completed on 2024-04-28 08:44:23
0001 /* 0002 SPDX-FileCopyrightText: 2021 Jean-Baptiste Mardelle <jb@kdenlive.org> 0003 SPDX-FileCopyrightText: 2022 Julius Künzel <jk.kdedev@smartlab.uber.space> 0004 0005 SPDX-License-Identifier: GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL 0006 */ 0007 0008 #pragma once 0009 0010 #include "abstractpythoninterface.h" 0011 0012 #include <QObject> 0013 #include <QProcess> 0014 0015 class SpeechToText: public AbstractPythonInterface 0016 { 0017 Q_OBJECT 0018 public: 0019 enum class EngineType { EngineVosk = 0, EngineWhisper = 1 }; 0020 SpeechToText(EngineType engineType = EngineType::EngineVosk, QObject *parent = nullptr); 0021 QString runSubtitleScript(QString modelDirectory, QString language, QString audio, QString speech); 0022 QString subtitleScript(); 0023 QString speechScript(); 0024 QString voskModelPath(); 0025 QStringList parseVoskDictionaries(); 0026 static QList<std::pair<QString, QString>> whisperModels(); 0027 static QMap<QString, QString> whisperLanguages(); 0028 0029 protected: 0030 QString featureName() override; 0031 0032 private: 0033 EngineType m_engineType; 0034 0035 public Q_SLOTS: 0036 0037 Q_SIGNALS: 0038 void subtitleProgressUpdate(int); 0039 void subtitleFinished(int exitCode, QProcess::ExitStatus exitStatus); 0040 0041 };