File indexing completed on 2024-05-26 04:59:32
0001 /* 0002 SPDX-FileCopyrightText: 2007-2009 Sergio Pistone <sergio_pistone@yahoo.com.ar> 0003 SPDX-FileCopyrightText: 2010-2022 Mladen Milinkovic <max@smoothware.net> 0004 0005 SPDX-License-Identifier: GPL-2.0-or-later 0006 */ 0007 0008 #include "config.h" 0009 0010 #include "formatmanager.h" 0011 #include "inputformat.h" 0012 #include "outputformat.h" 0013 #include "gui/treeview/lineswidget.h" 0014 #include "application.h" 0015 #include "helpers/fileloadhelper.h" 0016 #include "helpers/filesavehelper.h" 0017 #include "dialogs/encodingdetectdialog.h" 0018 #include "scconfig.h" 0019 0020 #include "microdvd/microdvdinputformat.h" 0021 #include "microdvd/microdvdoutputformat.h" 0022 #include "mplayer/mplayerinputformat.h" 0023 #include "mplayer/mplayeroutputformat.h" 0024 #include "mplayer2/mplayer2inputformat.h" 0025 #include "mplayer2/mplayer2outputformat.h" 0026 #include "subrip/subripinputformat.h" 0027 #include "subrip/subripoutputformat.h" 0028 #include "substationalpha/substationalphainputformat.h" 0029 #include "substationalpha/substationalphaoutputformat.h" 0030 #include "subviewer1/subviewer1inputformat.h" 0031 #include "subviewer1/subviewer1outputformat.h" 0032 #include "subviewer2/subviewer2inputformat.h" 0033 #include "subviewer2/subviewer2outputformat.h" 0034 #include "tmplayer/tmplayerinputformat.h" 0035 #include "tmplayer/tmplayeroutputformat.h" 0036 #include "vobsub/vobsubinputformat.h" 0037 #include "webvtt/webvttinputformat.h" 0038 #include "webvtt/webvttoutputformat.h" 0039 #include "youtubecaptions/youtubecaptionsinputformat.h" 0040 #include "youtubecaptions/youtubecaptionsoutputformat.h" 0041 0042 #include <QFile> 0043 #include <QFileDevice> 0044 #include <QFileInfo> 0045 #include <QTextCodec> 0046 0047 #include <QUrl> 0048 0049 #ifdef HAVE_ICU 0050 # include <unicode/ucsdet.h> 0051 #endif 0052 0053 using namespace SubtitleComposer; 0054 0055 FormatManager & 0056 FormatManager::instance() 0057 { 0058 static FormatManager instance; 0059 return instance; 0060 } 0061 0062 #define INPUT_FORMAT(fmt) { InputFormat *f = new fmt##InputFormat(); m_inputFormats[f->name()] = f; } 0063 #define OUTPUT_FORMAT(fmt) { OutputFormat *f = new fmt##OutputFormat(); m_outputFormats[f->name()] = f; } 0064 #define IN_OUT_FORMAT(fmt) INPUT_FORMAT(fmt) OUTPUT_FORMAT(fmt) 0065 0066 FormatManager::FormatManager() 0067 { 0068 IN_OUT_FORMAT(WebVTT) 0069 IN_OUT_FORMAT(SubRip) 0070 IN_OUT_FORMAT(MicroDVD) 0071 IN_OUT_FORMAT(MPlayer) 0072 IN_OUT_FORMAT(MPlayer2) 0073 IN_OUT_FORMAT(SubStationAlpha) 0074 IN_OUT_FORMAT(AdvancedSubStationAlpha) 0075 IN_OUT_FORMAT(SubViewer1) 0076 IN_OUT_FORMAT(SubViewer2) 0077 IN_OUT_FORMAT(TMPlayer) 0078 IN_OUT_FORMAT(TMPlayerPlus) 0079 IN_OUT_FORMAT(YouTubeCaptions) 0080 INPUT_FORMAT(VobSub) 0081 } 0082 0083 FormatManager::~FormatManager() 0084 { 0085 for(QMap<QString, InputFormat *>::ConstIterator it = m_inputFormats.constBegin(), end = m_inputFormats.constEnd(); it != end; ++it) 0086 delete it.value(); 0087 0088 for(QMap<QString, OutputFormat *>::ConstIterator it = m_outputFormats.constBegin(), end = m_outputFormats.constEnd(); it != end; ++it) 0089 delete it.value(); 0090 } 0091 0092 bool 0093 FormatManager::hasInput(const QString &name) const 0094 { 0095 return m_inputFormats.contains(name); 0096 } 0097 0098 const InputFormat * 0099 FormatManager::input(const QString &name) const 0100 { 0101 QMap<QString, InputFormat *>::const_iterator it = m_inputFormats.find(name); 0102 return it != m_inputFormats.end() ? it.value() : nullptr; 0103 } 0104 0105 QStringList 0106 FormatManager::inputNames() const 0107 { 0108 return m_inputFormats.keys(); 0109 } 0110 0111 inline static QTextCodec * 0112 detectEncoding(const QByteArray &byteData) 0113 { 0114 EncodingDetectDialog dlg(byteData); 0115 0116 #ifdef HAVE_ICU 0117 UErrorCode status = U_ZERO_ERROR; 0118 UCharsetDetector *csd = ucsdet_open(&status); 0119 ucsdet_setText(csd, byteData.data(), byteData.length(), &status); 0120 int32_t matchesFound = 0; 0121 const UCharsetMatch **ucms = ucsdet_detectAll(csd, &matchesFound, &status); 0122 for(int index = 0; index < matchesFound; ++index) { 0123 int confidence = ucsdet_getConfidence(ucms[index], &status); 0124 QTextCodec *codec = QTextCodec::codecForName(ucsdet_getName(ucms[index], &status)); 0125 if(codec) { 0126 if(confidence == 100) 0127 return codec; 0128 dlg.addEncoding(codec->name(), confidence); 0129 } 0130 } 0131 ucsdet_close(csd); 0132 #else 0133 KEncodingProber prober(KEncodingProber::Universal); 0134 prober.feed(byteData); 0135 QTextCodec *codec = QTextCodec::codecForName(prober.encoding()); 0136 if(codec) { 0137 if(prober.confidence() >= 1.) 0138 return codec; 0139 dlg.addEncoding(codec->name(), prober.confidence() * 100.); 0140 } 0141 #endif 0142 0143 if(dlg.exec() == QDialog::Accepted) 0144 return QTextCodec::codecForName(dlg.selectedEncoding().toUtf8()); 0145 0146 return nullptr; 0147 } 0148 0149 0150 FormatManager::Status 0151 FormatManager::readBinary(Subtitle &subtitle, const QUrl &url, bool primary, 0152 QTextCodec **codec, QString *formatName) const 0153 { 0154 foreach(InputFormat *format, m_inputFormats) { 0155 QExplicitlySharedDataPointer<Subtitle> newSubtitle(new Subtitle()); 0156 Status res = format->readBinary(*newSubtitle, url); 0157 if(res == ERROR) 0158 continue; 0159 if(res == SUCCESS) { 0160 if(formatName) 0161 *formatName = format->name(); 0162 *codec = QTextCodec::codecForName(SCConfig::defaultSubtitlesEncoding().toUtf8()); 0163 if(primary) 0164 subtitle.setPrimaryData(*newSubtitle, true); 0165 else 0166 subtitle.setSecondaryData(*newSubtitle, true); 0167 } 0168 return res; 0169 } 0170 return ERROR; 0171 } 0172 0173 FormatManager::Status 0174 FormatManager::readText(Subtitle &subtitle, const QUrl &url, bool primary, 0175 QTextCodec **codec, QString *formatName) const 0176 { 0177 FileLoadHelper fileLoadHelper(url); 0178 if(!fileLoadHelper.open()) 0179 return ERROR; 0180 // WARNING: only 1MB of text subtitle is being read here 0181 QByteArray byteData = fileLoadHelper.file()->read(1024 * 1024); 0182 fileLoadHelper.close(); 0183 0184 QString stringData; 0185 if(!codec) { 0186 // don't care about text nor text encoding 0187 stringData = QString::fromLatin1(byteData); 0188 } else { 0189 if(!*codec) { 0190 QTextCodec *c = detectEncoding(byteData); 0191 if(!c) 0192 return CANCEL; 0193 *codec = c; 0194 } 0195 if(*codec) 0196 stringData = (*codec)->toUnicode(byteData); 0197 } 0198 0199 stringData.replace(QLatin1String("\r\n"), QLatin1String("\n")); 0200 stringData.replace('\r', '\n'); 0201 0202 const QString extension = QFileInfo(url.path()).suffix(); 0203 0204 // attempt to parse subtitles based on extension information first 0205 for(QMap<QString, InputFormat *>::ConstIterator it = m_inputFormats.begin(), end = m_inputFormats.end(); it != end; ++it) { 0206 if(it.value()->knowsExtension(extension)) { 0207 if(it.value()->readSubtitle(subtitle, primary, stringData)) { 0208 if(formatName) 0209 *formatName = it.value()->name(); 0210 return SUCCESS; 0211 } 0212 } 0213 } 0214 0215 // attempt to parse subtitles based on content 0216 for(QMap<QString, InputFormat *>::ConstIterator it = m_inputFormats.begin(), end = m_inputFormats.end(); it != end; ++it) { 0217 if(!it.value()->knowsExtension(extension)) { 0218 if(it.value()->readSubtitle(subtitle, primary, stringData)) { 0219 if(formatName) 0220 *formatName = it.value()->name(); 0221 return SUCCESS; 0222 } 0223 } 0224 } 0225 0226 return ERROR; 0227 } 0228 0229 FormatManager::Status 0230 FormatManager::readSubtitle(Subtitle &subtitle, bool primary, const QUrl &url, 0231 QTextCodec **codec, QString *formatName) const 0232 { 0233 Status res = readBinary(subtitle, url, primary, codec, formatName); 0234 if(res != ERROR) // when SUCCESS or CANCEL no need to try text formats 0235 return res; 0236 0237 return readText(subtitle, url, primary, codec, formatName); 0238 } 0239 0240 bool 0241 FormatManager::hasOutput(const QString &name) const 0242 { 0243 return m_outputFormats.contains(name); 0244 } 0245 0246 const OutputFormat * 0247 FormatManager::output(const QString &name) const 0248 { 0249 return m_outputFormats.contains(name) ? m_outputFormats[name] : nullptr; 0250 } 0251 0252 const OutputFormat * 0253 FormatManager::defaultOutput() const 0254 { 0255 return output(QStringLiteral("SubRip")); 0256 } 0257 0258 QStringList 0259 FormatManager::outputNames() const 0260 { 0261 return m_outputFormats.keys(); 0262 } 0263 0264 bool 0265 FormatManager::writeSubtitle(const Subtitle &subtitle, bool primary, const QUrl &url, 0266 QTextCodec *codec, const QString &formatName, bool overwrite) const 0267 { 0268 const OutputFormat *format = output(formatName); 0269 if(format == nullptr) { 0270 QString extension = QFileInfo(url.path()).suffix(); 0271 // attempt find format based on extension information 0272 for(QMap<QString, OutputFormat *>::ConstIterator it = m_outputFormats.begin(), end = m_outputFormats.end(); it != end; ++it) 0273 if(it.value()->knowsExtension(extension)) { 0274 format = *it; 0275 break; 0276 } 0277 } 0278 0279 if(format == nullptr) 0280 return false; 0281 0282 FileSaveHelper fileSaveHelper(url, overwrite); 0283 if(!fileSaveHelper.open()) 0284 return false; 0285 0286 QFileDevice *file = fileSaveHelper.file(); 0287 QString data = format->writeSubtitle(subtitle, primary); 0288 if(codec->name().startsWith("UTF-") || codec->name().contains("UCS-")) 0289 data.prepend(QChar::ByteOrderMark); 0290 switch(SCConfig::textLineBreak()) { 0291 case 1: // CRLF 0292 file->write(codec->fromUnicode(data.replace(QChar::LineFeed, QLatin1String("\r\n")))); 0293 break; 0294 case 2: // CR 0295 file->write(codec->fromUnicode(data.replace(QChar::LineFeed, QChar::CarriageReturn))); 0296 break; 0297 default: // LF 0298 file->write(codec->fromUnicode(data)); 0299 break; 0300 } 0301 0302 return fileSaveHelper.close(); 0303 }