File indexing completed on 2024-12-22 04:40:08

0001 /*
0002     SPDX-FileCopyrightText: 2017-2022 Mladen Milinkovic <max@smoothware.net>
0003 
0004     SPDX-License-Identifier: GPL-2.0-or-later
0005 */
0006 
0007 #ifndef VOBSUBINPUTFORMAT_H
0008 #define VOBSUBINPUTFORMAT_H
0009 
0010 #include"appglobal.h"
0011 #include "application.h"
0012 #include "core/richtext/richdocument.h"
0013 #include "formats/inputformat.h"
0014 #include "vobsubinputinitdialog.h"
0015 #include "vobsubinputprocessdialog.h"
0016 #include "streamprocessor/streamprocessor.h"
0017 
0018 #include <QUrl>
0019 #include <QFile>
0020 #include <QStringView>
0021 
0022 namespace SubtitleComposer {
0023 class VobSubInputFormat : public InputFormat
0024 {
0025     friend class FormatManager;
0026 
0027 public:
0028     bool isBinary() const override { return true; }
0029 
0030     FormatManager::Status readBinary(Subtitle &subtitle, const QUrl &url) override
0031     {
0032         QString filename = url.toLocalFile();
0033         const int extension = filename.lastIndexOf('.');
0034         const QByteArray filebase = filename.left(extension).toUtf8();
0035 
0036         if(QStringView(filename).mid(extension + 1) == QStringLiteral("sub")) {
0037             const QString filenameIdx = filebase + ".idx";
0038             if(QFile(filenameIdx).exists())
0039                 filename = filenameIdx;
0040         }
0041 
0042         // open the sub/idx subtitles
0043         StreamProcessor proc;
0044         if(!proc.open(filename))
0045             return FormatManager::ERROR;
0046 
0047         QStringList streamList = proc.listImage();
0048         if(streamList.empty())
0049             return FormatManager::ERROR;
0050 
0051         // show init dialog
0052         VobSubInputInitDialog dlgInit(app()->mainWindow());
0053         dlgInit.streamListSet(streamList);
0054         if(dlgInit.exec() == QDialog::Rejected)
0055             return FormatManager::CANCEL;
0056 
0057         if(!proc.initImage(dlgInit.streamIndex()))
0058             return FormatManager::ERROR;
0059 
0060         // subtitle updates will show in realtime
0061         LinesWidget *linesWidget = app()->linesWidget();
0062         Subtitle *oldSubtitle = linesWidget->model()->subtitle();
0063         linesWidget->setSubtitle(&subtitle);
0064 
0065         // show process dialog
0066         VobSubInputProcessDialog dlgProc(&subtitle, app()->mainWindow());
0067 
0068         dlgProc.processFrames(&proc);
0069 
0070         QByteArray symFile(filebase + ".sym");
0071 
0072         dlgProc.symFileOpen(symFile);
0073         const int dlgRes = dlgProc.exec();
0074         dlgProc.symFileSave(symFile);
0075         if(dlgRes == QDialog::Rejected) {
0076             // restore original subtitle
0077             linesWidget->setSubtitle(oldSubtitle);
0078             return FormatManager::CANCEL;
0079         }
0080 
0081         // TODO: move all these regexps into separate class that can be reused, make them static and optimize them after init
0082         quint32 ppFlags = dlgInit.postProcessingFlags();
0083         for(int i = 0, n = subtitle.count(); i < n; i++) {
0084             SubtitleLine *line = subtitle.at(i);
0085             RichString text = line->primaryDoc()->toRichText();
0086             if(ppFlags & VobSubInputInitDialog::APOSTROPHE_TO_QUOTES)
0087                 text
0088                     .replace(QRegularExpression(QStringLiteral("(?:"
0089                         "' *'" // double apostrophes ' ' => "
0090                         "|"
0091                         "\" *\"" // double quotes "" => "
0092                         ")")), QStringLiteral("\""));
0093 
0094             if(ppFlags & VobSubInputInitDialog::SPACE_PARENTHESES)
0095                 text
0096                     .replace(QRegularExpression(QStringLiteral("(?:"
0097                         " *['`]" // normalize apostrophes and remove leading space
0098                         "|"
0099                         "(?<=[A-ZÁ-Úa-zá-ú]) *['`] *(?=(ll|ve|s|m|d|t)\\b)" // remove space around apostrophe in: I'd, It's, He'll, ..
0100                         ")")), QStringLiteral("'"));
0101 
0102             if(ppFlags & VobSubInputInitDialog::SPACE_PUNCTUATION)
0103                 text
0104                     // remove space before/between, add it after punctuation
0105                     .replace(QRegularExpression(QStringLiteral(" *(?:([\\.,?!;:]) *)+")), QStringLiteral("\\1 "))
0106                     // ?. => ?, !. => !, :. => :
0107                     .replace(QRegularExpression(QStringLiteral("(?<=[?!:])\\.")), QString())
0108                     // ,, => ...; -- => ...
0109                     .replace(QRegularExpression(QStringLiteral("(?:,{2,}|-{2,})")), QStringLiteral("..."));
0110 
0111             if(ppFlags & VobSubInputInitDialog::SPACE_NUMBERS)
0112                 text
0113                     .replace(QRegularExpression(QStringLiteral("\\d[\\d,.]*\\K +(?=[\\d,.])")), QString()); // remove space between numbers
0114 
0115             if(ppFlags & VobSubInputInitDialog::CHARS_OCR)
0116                 text
0117                     .replace(QRegularExpression(QStringLiteral("\\d[,.]?\\KO")), QStringLiteral("0")) // uppercase O => zero 0
0118                     .replace(QRegularExpression(QStringLiteral("(?:[A-Z]\\K0|\\b0(?=A-Za-z))")), QStringLiteral("O")); // zero 0 => uppercase O
0119 
0120             if(ppFlags & VobSubInputInitDialog::SPACE_PARENTHESES)
0121                 text
0122                     // remove space inside parentheses
0123                     .replace(QRegularExpression(QStringLiteral("([\\(\\[\\{]\\K +| +(?=[\\]\\}\\)]))")), QStringLiteral(""))
0124                     // add space around parentheses
0125                     .replace(QRegularExpression(QStringLiteral("((?<!^|[ \n])(?=[\\(\\[\\{])|(?<=[\\]\\}\\)])(?!$|[ \\n]))")), QStringLiteral(" "))
0126                     // add space around, remove it from inside parentheses
0127                     .replace(QRegularExpression(QStringLiteral(" *\" *([^\"]+?) *\" *")), QStringLiteral(" \"\\1\" "));
0128 
0129             if(ppFlags & VobSubInputInitDialog::CHARS_OCR)
0130                 text
0131                     // fix roman numerals
0132                     .replace(QRegularExpression(QStringLiteral("\\b[VXLCDM]*\\K[lI]{3}\\b")), QStringLiteral("III"))
0133                     .replace(QRegularExpression(QStringLiteral("\\b[VXLCDM]*\\K[lI]{2}\\b")), QStringLiteral("II"))
0134                     .replace(QRegularExpression(QStringLiteral("\\b[VXLCDM]*\\Kl(?=[VXLCDM]*\\b)")), QStringLiteral("I"))
0135                     // replace II => ll
0136                     .replace(QRegularExpression(QStringLiteral("(?:[a-zá-ú]\\KII|II(?=[a-zá-ú]))")), QStringLiteral("ll"))
0137                     // replace I => l
0138                     .replace(QRegularExpression(QStringLiteral("(?:[a-zá-ú]\\KI(?=[a-zá-ú]|\\b)|\\bI(?=[oaeiuyá-ú])|\\b[A-ZÁ-Ú]\\KI(?=[a-zá-ú]))")), QStringLiteral("l"))
0139                     // replace l => I
0140                     .replace(QRegularExpression(QStringLiteral("(?:[A-ZÁ-Ú]{2,}\\Kl\\b|[A-ZÁ-Ú]\\Kl(?=[A-ZÁ-Ú])|\\bl\\b|\\bl(?=[^aeiouyàá-úl]))")), QStringLiteral("I"))
0141                     // replace 'II => 'll
0142                     .replace(QRegularExpression(QStringLiteral("[A-ZÁ-Úa-zá-ú]\\K *' *II\\b")), QStringLiteral("'ll"))
0143                     // exceptions l => I: Ian, Iowa, Ion, Iodine
0144                     .replace(QRegularExpression(QStringLiteral("\\bKl(?=(?:an|owa|ll)\\b|oni|odi|odo)")), QStringLiteral("I"))
0145                     // word Ill
0146                     .replace(QRegularExpression(QStringLiteral("(^|[.?!-\"] *)\\KIII\\b")), QStringLiteral("Ill"));
0147 
0148             // cleanup whitespace
0149             text.replace(QRegularExpression(QStringLiteral("(?: *(?=\\n)|(?<=\\n) *|^ *| *$| *(?= )|(?<= ) *)")), QStringLiteral(""));
0150             line->primaryDoc()->setRichText(text, true);
0151         }
0152 
0153         // restore original subtitle
0154         linesWidget->setSubtitle(oldSubtitle);
0155 
0156         return FormatManager::SUCCESS;
0157     }
0158 
0159 protected:
0160     bool parseSubtitles(Subtitle &, const QString &) const override
0161     {
0162         return false;
0163     }
0164 
0165     VobSubInputFormat()
0166         : InputFormat(QStringLiteral("DVD/BluRay Subpicture"), QStringList{
0167                       QStringLiteral("idx"),
0168                       QStringLiteral("sup"),
0169                       QStringLiteral("m2ts"),
0170                       QStringLiteral("vob"),
0171                       QStringLiteral("mkv"),
0172                       QStringLiteral("mp4")})
0173     {}
0174 
0175     QUrl m_url;
0176 };
0177 }
0178 
0179 #endif