File indexing completed on 2024-12-22 04:40:09

0001 /*
0002     SPDX-FileCopyrightText: 2021-2022 Mladen Milinkovic <max@smoothware.net>
0003 
0004     SPDX-License-Identifier: GPL-2.0-or-later
0005 */
0006 
0007 #include "webvttinputformat.h"
0008 
0009 #include "core/richtext/richcss.h"
0010 #include "core/richtext/richdocument.h"
0011 #include "core/subtitle.h"
0012 #include "core/subtitleline.h"
0013 #include "helpers/common.h"
0014 
0015 #include <QMap>
0016 #include <QRegularExpression>
0017 #include <QVector>
0018 
0019 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
0020 // QStringView use is unoptimized in Qt5, and some methods are missing pre 5.15
0021 #include <QStringRef>
0022 #define QStringView(x) QStringRef(&(x))
0023 #define QStringView_ QStringRef
0024 #define capturedView capturedRef
0025 #else
0026 #include <QStringView>
0027 #define QStringView_ QStringView
0028 #endif
0029 
0030 
0031 using namespace SubtitleComposer;
0032 
0033 WebVTTInputFormat::WebVTTInputFormat()
0034     : InputFormat($("WebVTT"), QStringList($("vtt")))
0035 {
0036 }
0037 
0038 static int
0039 skipTextLine(const QString &str, int off)
0040 {
0041     for(; off < str.length(); off++) {
0042         if(str.at(off) == QChar::LineFeed)
0043             return off + 1;
0044     }
0045     return str.length();
0046 }
0047 
0048 static int
0049 skipTextBlock(const QString &str, int off)
0050 {
0051     for(bool prevLF = false; off < str.length(); off++) {
0052         const bool curLF = str.at(off) == QChar::LineFeed;
0053         if(prevLF && curLF)
0054             return off + 1;
0055         prevLF = curLF;
0056     }
0057     return str.length();
0058 }
0059 
0060 typedef bool (*charCompare)(QChar ch);
0061 
0062 inline static int
0063 skipChar(QStringView_ text, int off, const charCompare &cf)
0064 {
0065     auto it = text.cbegin() + off;
0066     const auto end = text.cend();
0067     while(it != end && cf(*it))
0068         it++;
0069     return it - text.cbegin();
0070 }
0071 
0072 void
0073 parseCueSettings(SubtitleLine *line, QStringView_ css)
0074 {
0075     // https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API#cue_settings
0076     QMap<QByteArray, QStringView_> settings;
0077     int off = 0;
0078     while(off < css.size()) {
0079         off = skipChar(css, off, [](QChar c){ return c == QChar::Space || c == QChar::Tabulation; });
0080         int end = skipChar(css, off, [](QChar c){ return c != QChar(':'); });
0081         const QStringView_ key = css.mid(off, end - off);
0082         off = end + 1;
0083         end = skipChar(css, off, [](QChar c){ return c != QChar::Space && c != QChar::Tabulation; });
0084         const QStringView_ val = css.mid(off, end - off);
0085         if(!key.isEmpty() && !val.isEmpty())
0086             settings.insert(key.toLatin1(), val);
0087         off = end + 1;
0088     }
0089 
0090     SubtitleRect p;
0091 
0092     // vertical:rl|lr
0093     const QStringView_ csVert = settings.value("vertical");
0094     p.vertical = !csVert.isEmpty();
0095 #if 0
0096     bool rl = p.vertical && csVert.compare(QByteArray("rl")) == 0; // vertical growing left/right (true/false)
0097 #endif
0098 
0099     // align:<start|center|end|left|right>
0100     const QStringView_ csAlign = settings.value("align");
0101     if(csAlign.isEmpty())
0102         p.hAlign = SubtitleRect::CENTER;
0103     else if(csAlign.compare($("start")) == 0 || csAlign.compare($("left")) == 0) // FIXME: start should consider RTL?
0104         p.hAlign = SubtitleRect::START;
0105     else if(csAlign.compare($("end")) == 0 || csAlign.compare($("right")) == 0) // FIXME: end should consider RTL?
0106         p.hAlign = SubtitleRect::END;
0107     else
0108         p.hAlign = SubtitleRect::CENTER;
0109 
0110     // size:<n>%
0111     QStringView_ csSize = settings.value("size");
0112     if(csSize.back() == QChar('%'))
0113         csSize.chop(1);
0114     else
0115         qWarning() << "size css is missing '%'";
0116     const float posSize = csSize.isEmpty() ? 100 : csSize.toFloat();
0117 
0118     // position:<nFloat>%[,line-left|center|line-right]
0119     const QStringView_ csPos = settings.value("position");
0120     float pos = 0;
0121     int posAnchor = p.hAlign; // FIXME: should consider RTL
0122     {
0123         int n = skipChar(csPos, 0, [](QChar c){ return c >= QChar('0') && c <= QChar('9'); });
0124         if(n) {
0125             pos = csPos.mid(0, n).toFloat();
0126             if(n < csPos.size() && csPos.at(n) == QChar('%')) {
0127                 qWarning() << "position css is missing '%'";
0128                 n++;
0129             }
0130             if(n < csPos.size() && csPos.at(n) == QChar(',')) {
0131                 n++;
0132                 if(csPos.mid(n).compare($("line-left")) == 0)
0133                     posAnchor = SubtitleRect::START;
0134                 else if(csPos.mid(n).compare($("center")) == 0)
0135                     posAnchor = SubtitleRect::CENTER;
0136                 else if(csPos.mid(n).compare($("line-right")) == 0)
0137                     posAnchor = SubtitleRect::END;
0138             }
0139         }
0140     }
0141 
0142     // line:<n>[%][,start|center|end]
0143 #if 0
0144     const QStringView_ csLine = settings.value("line");
0145     float lineOff = 0.f;
0146     bool lineSnap = true;
0147     int lineAlign = SubtitleRect::START;
0148     {
0149         int n = skipChar(csLine, 0, [](QChar c){ return c >= QChar('0') && c <= QChar('9'); });
0150         if(n) {
0151             lineOff = csLine.mid(0, n).toFloat();
0152             if(n < csLine.size() && csLine.at(n) == QChar('%')) {
0153                 lineSnap = false;
0154                 n++;
0155             }
0156             if(n < csLine.size() && csLine.at(n) == QChar(',')) {
0157                 n++;
0158                 if(csLine.mid(n).compare($("center")) == 0)
0159                     lineAlign = SubtitleRect::CENTER;
0160                 else if(csLine.mid(n).compare($("end")) == 0)
0161                     lineAlign = SubtitleRect::END;
0162             }
0163             p.vAlign = lineOff >= 0.f ? SubtitleRect::TOP : SubtitleRect::BOTTOM;
0164         } else {
0165             p.vAlign = SubtitleRect::BOTTOM;
0166         }
0167     }
0168 #else
0169     // FIXME: line aligment is bad
0170     float lineOff = 0.f;
0171     p.vAlign = SubtitleRect::BOTTOM;
0172 #endif
0173 
0174     if(p.vertical) {
0175         if(posAnchor == SubtitleRect::START) {
0176             p.top = pos;
0177             p.bottom = p.top + posSize;
0178         } else if(posAnchor == SubtitleRect::END) {
0179             p.bottom = 100.f - pos;
0180             p.top = p.bottom - posSize;
0181         } else { // posAnchor == SubtitleRect::CENTER
0182             p.top = pos - posSize / 2;
0183             p.bottom = p.top + posSize;
0184         }
0185         if(lineOff >= 0.f) {
0186             p.left = lineOff;
0187             p.right = 100.f;
0188         } else {
0189             p.left = 0.f;
0190             p.right = -lineOff;
0191         }
0192     } else {
0193         if(posAnchor == SubtitleRect::START) {
0194             p.left = pos;
0195             p.right = p.left + posSize;
0196         } else if(posAnchor == SubtitleRect::END) {
0197             p.right = 100.f - pos;
0198             p.left = p.right - posSize;
0199         } else { // posAnchor == SubtitleRect::CENTER
0200             p.left = pos - posSize / 2;
0201             p.right = p.left + posSize;
0202         }
0203         if(lineOff >= 0.f) {
0204             p.top = lineOff;
0205             p.bottom = 100.f;
0206         } else {
0207             p.top = 0.f;
0208             p.bottom = -lineOff;
0209         }
0210     }
0211 
0212     line->setPosition(p);
0213 }
0214 
0215 bool
0216 WebVTTInputFormat::parseSubtitles(Subtitle &subtitle, const QString &data) const
0217 {
0218     if(!data.startsWith($("WEBVTT")))
0219         return false;
0220 
0221     int off = skipTextBlock(data, 6);
0222     int end;
0223     const QStringView_ hdr = QStringView(data).mid(6, off - 6).trimmed();
0224     if(!hdr.isEmpty())
0225         subtitle.meta("comment.intro.0", hdr.toString());
0226 
0227     QVector<QStringView_> notes;
0228     staticRE$(reTime, "(?:([0-9]{2,}):)?([0-5][0-9]):([0-5][0-9])\\.([0-9]{3}) --> (?:([0-9]{2,}):)?([0-5][0-9]):([0-5][0-9])\\.([0-9]{3})\\b([^\\n]*)", REu);
0229 
0230     subtitle.stylesheetClear();
0231 
0232     // https://w3c.github.io/webvtt/
0233     while(off < data.length()) {
0234         if(QStringView(data).mid(off, 5) == $("STYLE")) {
0235             if(!notes.isEmpty()) { // store note before style
0236                 int noteId = 0;
0237                 for(const QStringView_ &note: notes)
0238                     subtitle.meta(QByteArray("comment.top.") + QByteArray::number(noteId++), note.toString());
0239                 notes.clear();
0240             }
0241             // NOTE: styles can't appear after first cue/line, even if we're not forbidding it
0242             end = skipTextBlock(data, off += 5);
0243             subtitle.stylesheetAppend(QStringView(data).mid(off, end - off).trimmed().toString());
0244             off = end;
0245             continue;
0246         }
0247         if(QStringView(data).mid(off, 4) == $("NOTE")) {
0248             end = skipTextBlock(data, off += 4);
0249             notes.push_back(QStringView(data).mid(off, end - off).trimmed());
0250             off = end;
0251             continue;
0252         }
0253         end = skipTextLine(data, off);
0254         QStringView_ cueId = QStringView(data).mid(off, end - off).trimmed();
0255         QStringView_ cueTime;
0256         off = end;
0257         if(cueId.contains($("-->"))) {
0258             cueTime = cueId;
0259             cueId = QStringView_();
0260         } else {
0261             end = skipTextLine(data, off);
0262             cueTime = QStringView(data).mid(off, end - off).trimmed();
0263             off = end;
0264         }
0265         QRegularExpressionMatch m = reTime.match(cueTime);
0266         if(!m.isValid()) {
0267             qWarning() << "Invalid WEBVTT subtitle";
0268             return false;
0269         }
0270 
0271         const Time showTime(m.capturedView(1).toInt(), m.capturedView(2).toInt(), m.capturedView(3).toInt(), m.capturedView(4).toInt());
0272         const Time hideTime(m.capturedView(5).toInt(), m.capturedView(6).toInt(), m.capturedView(7).toInt(), m.capturedView(8).toInt());
0273         QStringView_ cueSettings = m.capturedView(9);
0274 
0275         end = skipTextBlock(data, off);
0276         const QStringView_ cueText = QStringView(data).mid(off, end - off).trimmed();
0277         off = end;
0278 
0279         SubtitleLine *line = new SubtitleLine(showTime, hideTime);
0280         RichString stext;
0281         // TODO: handle voice/class tags
0282         // https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API#cue_payload_text_tags
0283         // TODO: handle pseudo classes
0284         // https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API#css_pseudo-classes
0285         stext.setRichString(cueText.toString());
0286         line->primaryDoc()->setRichText(stext, true);
0287 
0288         if(!notes.isEmpty()) {
0289             QString comment;
0290             for(const QStringView_ &note: notes) {
0291                 if(!comment.isEmpty())
0292                     comment.append(QChar::LineFeed);
0293                 comment.append(note);
0294             }
0295             notes.clear();
0296             line->meta("comment", comment);
0297         }
0298         if(!cueSettings.isEmpty())
0299             parseCueSettings(line, cueSettings);
0300         if(!cueId.isEmpty())
0301             line->meta("id", cueId.toString());
0302         subtitle.insertLine(line);
0303     }
0304 
0305     if(!notes.isEmpty()) {
0306         int noteId = 0;
0307         for(const QStringView_ &note: notes)
0308             subtitle.meta(QByteArray("comment.bottom.") + QByteArray::number(noteId++), note.toString());
0309         notes.clear();
0310     }
0311 
0312     return true;
0313 }