plugins/vosk/voskspeechtotextdevice.cpp

0001 /*
0002   SPDX-FileCopyrightText: 2023-2024 Laurent Montel <montel.org>
0003
0004   SPDX-License-Identifier: GPL-2.0-or-later
0005   based on VoiceAssistant plugin code
0006 */
0007
0008 #include "voskspeechtotextdevice.h"
0009 #include "libvoskspeechtotext_debug.h"
0010 #if HAVE_VOSK_API_SUPPORT
0011 #include "vosk_api.h"
0012 #endif
0013 #include <QJsonDocument>
0014
0015 VoskSpeechToTextDevice::VoskSpeechToTextDevice(QObject *parent)
0016     : QIODevice{parent}
0017 {
0018     if (!open(QIODevice::ReadWrite)) {
0019         qCWarning(LIBVOSKSPEECHTOTEXT_LOG) << "Impossible to open VoskSpeechToTextDevice";
0020 #if HAVE_VOSK_API_SUPPORT
0021         vosk_set_log_level(-1);
0022 #endif
0023     }
0024 }
0025
0026 VoskSpeechToTextDevice::~VoskSpeechToTextDevice()
0027 {
0028 #if HAVE_VOSK_API_SUPPORT
0029     vosk_recognizer_free(mRecognizer);
0030     vosk_model_free(mModel);
0031 #endif
0032 }
0033
0034 bool VoskSpeechToTextDevice::available() const
0035 {
0036 #if HAVE_VOSK_API_SUPPORT
0037     return true;
0038 #else
0039     return false;
0040 #endif
0041 }
0042
0043 bool VoskSpeechToTextDevice::isAsking() const
0044 {
0045     return mIsAsking;
0046 }
0047
0048 void VoskSpeechToTextDevice::setAsking(bool asking)
0049 {
0050     if (mIsAsking != asking) {
0051         mIsAsking = asking;
0052         Q_EMIT askingChanged();
0053     }
0054 }
0055
0056 bool VoskSpeechToTextDevice::initialize(VoskSpeechToTextDeviceInfo &&info)
0057 {
0058 #if HAVE_VOSK_API_SUPPORT
0059     mModel = vosk_model_new(QString(info.modelDir + info.formattedLang).toUtf8().constData());
0060     if (mModel) {
0061         mRecognizer = vosk_recognizer_new(mModel, info.sampleRate);
0062     }
0063
0064     if (!mModel || !mRecognizer) {
0065         return false;
0066     }
0067 #endif
0068     return true;
0069 }
0070
0071 void VoskSpeechToTextDevice::clear()
0072 {
0073 #if HAVE_VOSK_API_SUPPORT
0074     if (mRecognizer) {
0075         vosk_recognizer_reset(mRecognizer);
0076     }
0077 #endif
0078 }
0079
0080 qint64 VoskSpeechToTextDevice::readData(char *data, qint64 maxlen)
0081 {
0082     Q_UNUSED(data);
0083     return maxlen;
0084 }
0085
0086 qint64 VoskSpeechToTextDevice::writeData(const char *data, qint64 len)
0087 {
0088 #if HAVE_VOSK_API_SUPPORT
0089     if (vosk_recognizer_accept_waveform(mRecognizer, data, (int)len)) {
0090         parseText(vosk_recognizer_result(mRecognizer));
0091     } else {
0092         parsePartial(vosk_recognizer_partial_result(mRecognizer));
0093     }
0094 #else
0095     Q_UNUSED(data);
0096 #endif
0097     return len;
0098 }
0099
0100 void VoskSpeechToTextDevice::parseText(const char *json)
0101 {
0102     const QJsonDocument obj = QJsonDocument::fromJson(json);
0103     QString text = obj[QStringLiteral("text")].toString();
0104
0105     if (text.isEmpty())
0106         return;
0107     else if (mIsAsking) {
0108         Q_EMIT result(text);
0109         return;
0110     }
0111
0112     text.append(u' ');
0113
0114     if (!text.contains(mWakeWord)) {
0115         if (!mIsListiningBecauseOfWakeWord)
0116             return;
0117
0118         Q_EMIT falsePositiveWakeWord();
0119         mIsListiningBecauseOfWakeWord = false;
0120         return;
0121     }
0122
0123     text = text.mid(text.indexOf(mWakeWord) + mWakeWord.size());
0124     text = text.trimmed();
0125
0126     Q_EMIT result(text);
0127     qDebug() << "[debug] Text:" << text;
0128     Q_EMIT doneListening();
0129 }
0130
0131 void VoskSpeechToTextDevice::parsePartial(const char *json)
0132 {
0133     const QJsonDocument obj = QJsonDocument::fromJson(json);
0134     QString text = obj[QStringLiteral("partial")].toString();
0135     if (text.isEmpty())
0136         return;
0137     text.append(u' ');
0138
0139     if (text.contains(mWakeWord)) {
0140         Q_EMIT wakeWordDetected();
0141         text = text.mid(text.indexOf(mWakeWord) + mWakeWord.size());
0142         mIsListiningBecauseOfWakeWord = true;
0143     } else if (mIsListiningBecauseOfWakeWord) {
0144         Q_EMIT falsePositiveWakeWord();
0145         mIsListiningBecauseOfWakeWord = false;
0146         return;
0147     } else if (!mIsAsking)
0148         return;
0149
0150     Q_EMIT result(text);
0151 }
0152
0153 QDebug operator<<(QDebug d, const VoskSpeechToTextDevice::VoskSpeechToTextDeviceInfo &t)
0154 {
0155     d.space() << "sampleRate" << t.sampleRate;
0156     d.space() << "modelDir" << t.modelDir;
0157     d.space() << "formattedLang" << t.formattedLang;
0158     return d;
0159 }
0160
0161 #include "moc_voskspeechtotextdevice.cpp"