File indexing completed on 2024-05-26 04:59:32

0001 /*
0002     SPDX-FileCopyrightText: 2007-2009 Sergio Pistone <sergio_pistone@yahoo.com.ar>
0003     SPDX-FileCopyrightText: 2010-2022 Mladen Milinkovic <max@smoothware.net>
0004 
0005     SPDX-License-Identifier: GPL-2.0-or-later
0006 */
0007 
0008 #include "config.h"
0009 
0010 #include "formatmanager.h"
0011 #include "inputformat.h"
0012 #include "outputformat.h"
0013 #include "gui/treeview/lineswidget.h"
0014 #include "application.h"
0015 #include "helpers/fileloadhelper.h"
0016 #include "helpers/filesavehelper.h"
0017 #include "dialogs/encodingdetectdialog.h"
0018 #include "scconfig.h"
0019 
0020 #include "microdvd/microdvdinputformat.h"
0021 #include "microdvd/microdvdoutputformat.h"
0022 #include "mplayer/mplayerinputformat.h"
0023 #include "mplayer/mplayeroutputformat.h"
0024 #include "mplayer2/mplayer2inputformat.h"
0025 #include "mplayer2/mplayer2outputformat.h"
0026 #include "subrip/subripinputformat.h"
0027 #include "subrip/subripoutputformat.h"
0028 #include "substationalpha/substationalphainputformat.h"
0029 #include "substationalpha/substationalphaoutputformat.h"
0030 #include "subviewer1/subviewer1inputformat.h"
0031 #include "subviewer1/subviewer1outputformat.h"
0032 #include "subviewer2/subviewer2inputformat.h"
0033 #include "subviewer2/subviewer2outputformat.h"
0034 #include "tmplayer/tmplayerinputformat.h"
0035 #include "tmplayer/tmplayeroutputformat.h"
0036 #include "vobsub/vobsubinputformat.h"
0037 #include "webvtt/webvttinputformat.h"
0038 #include "webvtt/webvttoutputformat.h"
0039 #include "youtubecaptions/youtubecaptionsinputformat.h"
0040 #include "youtubecaptions/youtubecaptionsoutputformat.h"
0041 
0042 #include <QFile>
0043 #include <QFileDevice>
0044 #include <QFileInfo>
0045 #include <QTextCodec>
0046 
0047 #include <QUrl>
0048 
0049 #ifdef HAVE_ICU
0050 #   include <unicode/ucsdet.h>
0051 #endif
0052 
0053 using namespace SubtitleComposer;
0054 
0055 FormatManager &
0056 FormatManager::instance()
0057 {
0058     static FormatManager instance;
0059     return instance;
0060 }
0061 
0062 #define INPUT_FORMAT(fmt) { InputFormat *f = new fmt##InputFormat(); m_inputFormats[f->name()] = f; }
0063 #define OUTPUT_FORMAT(fmt) { OutputFormat *f = new fmt##OutputFormat(); m_outputFormats[f->name()] = f; }
0064 #define IN_OUT_FORMAT(fmt) INPUT_FORMAT(fmt) OUTPUT_FORMAT(fmt)
0065 
0066 FormatManager::FormatManager()
0067 {
0068     IN_OUT_FORMAT(WebVTT)
0069     IN_OUT_FORMAT(SubRip)
0070     IN_OUT_FORMAT(MicroDVD)
0071     IN_OUT_FORMAT(MPlayer)
0072     IN_OUT_FORMAT(MPlayer2)
0073     IN_OUT_FORMAT(SubStationAlpha)
0074     IN_OUT_FORMAT(AdvancedSubStationAlpha)
0075     IN_OUT_FORMAT(SubViewer1)
0076     IN_OUT_FORMAT(SubViewer2)
0077     IN_OUT_FORMAT(TMPlayer)
0078     IN_OUT_FORMAT(TMPlayerPlus)
0079     IN_OUT_FORMAT(YouTubeCaptions)
0080     INPUT_FORMAT(VobSub)
0081 }
0082 
0083 FormatManager::~FormatManager()
0084 {
0085     for(QMap<QString, InputFormat *>::ConstIterator it = m_inputFormats.constBegin(), end = m_inputFormats.constEnd(); it != end; ++it)
0086         delete it.value();
0087 
0088     for(QMap<QString, OutputFormat *>::ConstIterator it = m_outputFormats.constBegin(), end = m_outputFormats.constEnd(); it != end; ++it)
0089         delete it.value();
0090 }
0091 
0092 bool
0093 FormatManager::hasInput(const QString &name) const
0094 {
0095     return m_inputFormats.contains(name);
0096 }
0097 
0098 const InputFormat *
0099 FormatManager::input(const QString &name) const
0100 {
0101     QMap<QString, InputFormat *>::const_iterator it = m_inputFormats.find(name);
0102     return it != m_inputFormats.end() ? it.value() : nullptr;
0103 }
0104 
0105 QStringList
0106 FormatManager::inputNames() const
0107 {
0108     return m_inputFormats.keys();
0109 }
0110 
0111 inline static QTextCodec *
0112 detectEncoding(const QByteArray &byteData)
0113 {
0114     EncodingDetectDialog dlg(byteData);
0115 
0116 #ifdef HAVE_ICU
0117     UErrorCode status = U_ZERO_ERROR;
0118     UCharsetDetector *csd = ucsdet_open(&status);
0119     ucsdet_setText(csd, byteData.data(), byteData.length(), &status);
0120     int32_t matchesFound = 0;
0121     const UCharsetMatch **ucms = ucsdet_detectAll(csd, &matchesFound, &status);
0122     for(int index = 0; index < matchesFound; ++index) {
0123         int confidence = ucsdet_getConfidence(ucms[index], &status);
0124         QTextCodec *codec = QTextCodec::codecForName(ucsdet_getName(ucms[index], &status));
0125         if(codec) {
0126             if(confidence == 100)
0127                 return codec;
0128             dlg.addEncoding(codec->name(), confidence);
0129         }
0130     }
0131     ucsdet_close(csd);
0132 #else
0133     KEncodingProber prober(KEncodingProber::Universal);
0134     prober.feed(byteData);
0135     QTextCodec *codec = QTextCodec::codecForName(prober.encoding());
0136     if(codec) {
0137         if(prober.confidence() >= 1.)
0138             return codec;
0139         dlg.addEncoding(codec->name(), prober.confidence() * 100.);
0140     }
0141 #endif
0142 
0143     if(dlg.exec() == QDialog::Accepted)
0144         return QTextCodec::codecForName(dlg.selectedEncoding().toUtf8());
0145 
0146     return nullptr;
0147 }
0148 
0149 
0150 FormatManager::Status
0151 FormatManager::readBinary(Subtitle &subtitle, const QUrl &url, bool primary,
0152                           QTextCodec **codec, QString *formatName) const
0153 {
0154     foreach(InputFormat *format, m_inputFormats) {
0155         QExplicitlySharedDataPointer<Subtitle> newSubtitle(new Subtitle());
0156         Status res = format->readBinary(*newSubtitle, url);
0157         if(res == ERROR)
0158             continue;
0159         if(res == SUCCESS) {
0160             if(formatName)
0161                 *formatName = format->name();
0162             *codec = QTextCodec::codecForName(SCConfig::defaultSubtitlesEncoding().toUtf8());
0163             if(primary)
0164                 subtitle.setPrimaryData(*newSubtitle, true);
0165             else
0166                 subtitle.setSecondaryData(*newSubtitle, true);
0167         }
0168         return res;
0169     }
0170     return ERROR;
0171 }
0172 
0173 FormatManager::Status
0174 FormatManager::readText(Subtitle &subtitle, const QUrl &url, bool primary,
0175                         QTextCodec **codec, QString *formatName) const
0176 {
0177     FileLoadHelper fileLoadHelper(url);
0178     if(!fileLoadHelper.open())
0179         return ERROR;
0180     // WARNING: only 1MB of text subtitle is being read here
0181     QByteArray byteData = fileLoadHelper.file()->read(1024 * 1024);
0182     fileLoadHelper.close();
0183 
0184     QString stringData;
0185     if(!codec) {
0186         // don't care about text nor text encoding
0187         stringData = QString::fromLatin1(byteData);
0188     } else {
0189         if(!*codec) {
0190             QTextCodec *c = detectEncoding(byteData);
0191             if(!c)
0192                 return CANCEL;
0193             *codec = c;
0194         }
0195         if(*codec)
0196             stringData = (*codec)->toUnicode(byteData);
0197     }
0198 
0199     stringData.replace(QLatin1String("\r\n"), QLatin1String("\n"));
0200     stringData.replace('\r', '\n');
0201 
0202     const QString extension = QFileInfo(url.path()).suffix();
0203 
0204     // attempt to parse subtitles based on extension information first
0205     for(QMap<QString, InputFormat *>::ConstIterator it = m_inputFormats.begin(), end = m_inputFormats.end(); it != end; ++it) {
0206         if(it.value()->knowsExtension(extension)) {
0207             if(it.value()->readSubtitle(subtitle, primary, stringData)) {
0208                 if(formatName)
0209                     *formatName = it.value()->name();
0210                 return SUCCESS;
0211             }
0212         }
0213     }
0214 
0215     // attempt to parse subtitles based on content
0216     for(QMap<QString, InputFormat *>::ConstIterator it = m_inputFormats.begin(), end = m_inputFormats.end(); it != end; ++it) {
0217         if(!it.value()->knowsExtension(extension)) {
0218             if(it.value()->readSubtitle(subtitle, primary, stringData)) {
0219                 if(formatName)
0220                     *formatName = it.value()->name();
0221                 return SUCCESS;
0222             }
0223         }
0224     }
0225 
0226     return ERROR;
0227 }
0228 
0229 FormatManager::Status
0230 FormatManager::readSubtitle(Subtitle &subtitle, bool primary, const QUrl &url,
0231                             QTextCodec **codec, QString *formatName) const
0232 {
0233     Status res = readBinary(subtitle, url, primary, codec, formatName);
0234     if(res != ERROR) // when SUCCESS or CANCEL no need to try text formats
0235         return res;
0236 
0237     return readText(subtitle, url, primary, codec, formatName);
0238 }
0239 
0240 bool
0241 FormatManager::hasOutput(const QString &name) const
0242 {
0243     return m_outputFormats.contains(name);
0244 }
0245 
0246 const OutputFormat *
0247 FormatManager::output(const QString &name) const
0248 {
0249     return m_outputFormats.contains(name) ? m_outputFormats[name] : nullptr;
0250 }
0251 
0252 const OutputFormat *
0253 FormatManager::defaultOutput() const
0254 {
0255     return output(QStringLiteral("SubRip"));
0256 }
0257 
0258 QStringList
0259 FormatManager::outputNames() const
0260 {
0261     return m_outputFormats.keys();
0262 }
0263 
0264 bool
0265 FormatManager::writeSubtitle(const Subtitle &subtitle, bool primary, const QUrl &url,
0266                              QTextCodec *codec, const QString &formatName, bool overwrite) const
0267 {
0268     const OutputFormat *format = output(formatName);
0269     if(format == nullptr) {
0270         QString extension = QFileInfo(url.path()).suffix();
0271         // attempt find format based on extension information
0272         for(QMap<QString, OutputFormat *>::ConstIterator it = m_outputFormats.begin(), end = m_outputFormats.end(); it != end; ++it)
0273             if(it.value()->knowsExtension(extension)) {
0274                 format = *it;
0275                 break;
0276             }
0277     }
0278 
0279     if(format == nullptr)
0280         return false;
0281 
0282     FileSaveHelper fileSaveHelper(url, overwrite);
0283     if(!fileSaveHelper.open())
0284         return false;
0285 
0286     QFileDevice *file = fileSaveHelper.file();
0287     QString data = format->writeSubtitle(subtitle, primary);
0288     if(codec->name().startsWith("UTF-") || codec->name().contains("UCS-"))
0289         data.prepend(QChar::ByteOrderMark);
0290     switch(SCConfig::textLineBreak()) {
0291     case 1: // CRLF
0292         file->write(codec->fromUnicode(data.replace(QChar::LineFeed, QLatin1String("\r\n"))));
0293         break;
0294     case 2: // CR
0295         file->write(codec->fromUnicode(data.replace(QChar::LineFeed, QChar::CarriageReturn)));
0296         break;
0297     default: // LF
0298         file->write(codec->fromUnicode(data));
0299         break;
0300     }
0301 
0302     return fileSaveHelper.close();
0303 }