File indexing completed on 2024-12-22 04:28:23
0001 /* 0002 SPDX-FileCopyrightText: 2023-2024 Laurent Montel <montel.org> 0003 0004 SPDX-License-Identifier: GPL-2.0-or-later 0005 based on VoiceAssistant plugin code 0006 */ 0007 0008 #include "voskspeechtotextdevice.h" 0009 #include "libvoskspeechtotext_debug.h" 0010 #if HAVE_VOSK_API_SUPPORT 0011 #include "vosk_api.h" 0012 #endif 0013 #include <QJsonDocument> 0014 0015 VoskSpeechToTextDevice::VoskSpeechToTextDevice(QObject *parent) 0016 : QIODevice{parent} 0017 { 0018 if (!open(QIODevice::ReadWrite)) { 0019 qCWarning(LIBVOSKSPEECHTOTEXT_LOG) << "Impossible to open VoskSpeechToTextDevice"; 0020 #if HAVE_VOSK_API_SUPPORT 0021 vosk_set_log_level(-1); 0022 #endif 0023 } 0024 } 0025 0026 VoskSpeechToTextDevice::~VoskSpeechToTextDevice() 0027 { 0028 #if HAVE_VOSK_API_SUPPORT 0029 vosk_recognizer_free(mRecognizer); 0030 vosk_model_free(mModel); 0031 #endif 0032 } 0033 0034 bool VoskSpeechToTextDevice::available() const 0035 { 0036 #if HAVE_VOSK_API_SUPPORT 0037 return true; 0038 #else 0039 return false; 0040 #endif 0041 } 0042 0043 bool VoskSpeechToTextDevice::isAsking() const 0044 { 0045 return mIsAsking; 0046 } 0047 0048 void VoskSpeechToTextDevice::setAsking(bool asking) 0049 { 0050 if (mIsAsking != asking) { 0051 mIsAsking = asking; 0052 Q_EMIT askingChanged(); 0053 } 0054 } 0055 0056 bool VoskSpeechToTextDevice::initialize(VoskSpeechToTextDeviceInfo &&info) 0057 { 0058 #if HAVE_VOSK_API_SUPPORT 0059 mModel = vosk_model_new(QString(info.modelDir + info.formattedLang).toUtf8().constData()); 0060 if (mModel) { 0061 mRecognizer = vosk_recognizer_new(mModel, info.sampleRate); 0062 } 0063 0064 if (!mModel || !mRecognizer) { 0065 return false; 0066 } 0067 #endif 0068 return true; 0069 } 0070 0071 void VoskSpeechToTextDevice::clear() 0072 { 0073 #if HAVE_VOSK_API_SUPPORT 0074 if (mRecognizer) { 0075 vosk_recognizer_reset(mRecognizer); 0076 } 0077 #endif 0078 } 0079 0080 qint64 VoskSpeechToTextDevice::readData(char *data, qint64 maxlen) 0081 { 0082 Q_UNUSED(data); 0083 return maxlen; 0084 } 0085 0086 qint64 VoskSpeechToTextDevice::writeData(const char *data, qint64 len) 0087 { 0088 #if HAVE_VOSK_API_SUPPORT 0089 if (vosk_recognizer_accept_waveform(mRecognizer, data, (int)len)) { 0090 parseText(vosk_recognizer_result(mRecognizer)); 0091 } else { 0092 parsePartial(vosk_recognizer_partial_result(mRecognizer)); 0093 } 0094 #else 0095 Q_UNUSED(data); 0096 #endif 0097 return len; 0098 } 0099 0100 void VoskSpeechToTextDevice::parseText(const char *json) 0101 { 0102 const QJsonDocument obj = QJsonDocument::fromJson(json); 0103 QString text = obj[QStringLiteral("text")].toString(); 0104 0105 if (text.isEmpty()) 0106 return; 0107 else if (mIsAsking) { 0108 Q_EMIT result(text); 0109 return; 0110 } 0111 0112 text.append(u' '); 0113 0114 if (!text.contains(mWakeWord)) { 0115 if (!mIsListiningBecauseOfWakeWord) 0116 return; 0117 0118 Q_EMIT falsePositiveWakeWord(); 0119 mIsListiningBecauseOfWakeWord = false; 0120 return; 0121 } 0122 0123 text = text.mid(text.indexOf(mWakeWord) + mWakeWord.size()); 0124 text = text.trimmed(); 0125 0126 Q_EMIT result(text); 0127 qDebug() << "[debug] Text:" << text; 0128 Q_EMIT doneListening(); 0129 } 0130 0131 void VoskSpeechToTextDevice::parsePartial(const char *json) 0132 { 0133 const QJsonDocument obj = QJsonDocument::fromJson(json); 0134 QString text = obj[QStringLiteral("partial")].toString(); 0135 if (text.isEmpty()) 0136 return; 0137 text.append(u' '); 0138 0139 if (text.contains(mWakeWord)) { 0140 Q_EMIT wakeWordDetected(); 0141 text = text.mid(text.indexOf(mWakeWord) + mWakeWord.size()); 0142 mIsListiningBecauseOfWakeWord = true; 0143 } else if (mIsListiningBecauseOfWakeWord) { 0144 Q_EMIT falsePositiveWakeWord(); 0145 mIsListiningBecauseOfWakeWord = false; 0146 return; 0147 } else if (!mIsAsking) 0148 return; 0149 0150 Q_EMIT result(text); 0151 } 0152 0153 QDebug operator<<(QDebug d, const VoskSpeechToTextDevice::VoskSpeechToTextDeviceInfo &t) 0154 { 0155 d.space() << "sampleRate" << t.sampleRate; 0156 d.space() << "modelDir" << t.modelDir; 0157 d.space() << "formattedLang" << t.formattedLang; 0158 return d; 0159 } 0160 0161 #include "moc_voskspeechtotextdevice.cpp"