File indexing completed on 2024-04-28 08:44:23

0001 /*
0002     SPDX-FileCopyrightText: 2021 Jean-Baptiste Mardelle <jb@kdenlive.org>
0003     SPDX-FileCopyrightText: 2022 Julius Künzel <jk.kdedev@smartlab.uber.space>
0004 
0005     SPDX-License-Identifier: GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
0006 */
0007 
0008 #pragma once
0009 
0010 #include "abstractpythoninterface.h"
0011 
0012 #include <QObject>
0013 #include <QProcess>
0014 
0015 class SpeechToText: public AbstractPythonInterface
0016 {
0017     Q_OBJECT
0018 public:
0019     enum class EngineType { EngineVosk = 0, EngineWhisper = 1 };
0020     SpeechToText(EngineType engineType = EngineType::EngineVosk, QObject *parent = nullptr);
0021     QString runSubtitleScript(QString modelDirectory, QString language, QString audio, QString speech);
0022     QString subtitleScript();
0023     QString speechScript();
0024     QString voskModelPath();
0025     QStringList parseVoskDictionaries();
0026     static QList<std::pair<QString, QString>> whisperModels();
0027     static QMap<QString, QString> whisperLanguages();
0028 
0029 protected:
0030     QString featureName() override;
0031 
0032 private:
0033     EngineType m_engineType;
0034 
0035 public Q_SLOTS:
0036 
0037 Q_SIGNALS:
0038     void subtitleProgressUpdate(int);
0039     void subtitleFinished(int exitCode, QProcess::ExitStatus exitStatus);
0040 
0041 };