File indexing completed on 2024-04-28 17:01:10

0001 // clang-format off
0002 /*
0003  * KDiff3 - Text Diff And Merge Tool
0004  *
0005  * SPDX-FileCopyrightText: 2002-2011 Joachim Eibl, joachim.eibl at gmx.de
0006  * SPDX-FileCopyrightText: 2018-2020 Michael Reeves reeves.87@gmail.com
0007  * SPDX-License-Identifier: GPL-2.0-or-later
0008  */
0009 // clang-format on
0010 
0011 /* Features of class SourceData:
0012 - Read a file (from the given URL) or accept data via a string.
0013 - Allocate and free buffers as necessary.
0014 - Run a preprocessor, when specified.
0015 - Run the line-matching preprocessor, when specified.
0016 - Run other preprocessing steps: Uppercase, ignore comments,
0017                                  remove carriage return, ignore numbers.
0018 
0019 Order of operation:
0020  1. If data was given via a string then save it to a temp file. (see setData())
0021  2. If the specified file is nonlocal (URL) copy it to a temp file. (TODO revisit this)
0022  3. If a preprocessor was specified, run the input file through it.
0023  4. Read the output of the preprocessor.
0024  5. If Uppercase was specified: Turn the read data to uppercase.
0025  6. Write the result to a temp file.
0026  7. If a line-matching preprocessor was specified, run the temp file through it.
0027  8. Read the output of the line-matching preprocessor.
0028  9. If ignore numbers was specified, strip the LMPP-output of all numbers.
0029 10. If ignore comments was specified, strip the LMPP-output of comments.
0030 
0031 Optimizations: Skip unneeded steps.
0032 */
0033 #include "SourceData.h"
0034 
0035 #include "CommentParser.h"
0036 #include "compat.h"
0037 #include "diff.h"
0038 #include "EncodedDataStream.h"
0039 #include "LineRef.h"
0040 #include "Logging.h"
0041 #include "Utils.h"
0042 
0043 #include <algorithm>         // for min
0044 #include <memory>
0045 #include <optional>
0046 #include <vector>            // for vector
0047 
0048 #include <QtGlobal>
0049 
0050 #include <QByteArray>
0051 #include <QProcess>
0052 #include <QScopedPointer>
0053 #include <QString>
0054 #include <QTemporaryFile>
0055 #include <QTextCodec>
0056 #include <QVector>
0057 
0058 extern std::unique_ptr<Options> gOptions;
0059 
0060 void SourceData::reset()
0061 {
0062     mFromClipBoard = false;
0063     mEncoding = u8"UTF-8";
0064     m_fileAccess = FileAccess();
0065     m_normalData.reset();
0066     m_lmppData.reset();
0067     if(!m_tempInputFileName.isEmpty())
0068     {
0069         m_tempFile.remove();
0070         m_tempInputFileName = "";
0071     }
0072 
0073     mErrors.clear();
0074 }
0075 
0076 void SourceData::setFilename(const QString& filename)
0077 {
0078     if(filename.isEmpty())
0079     {
0080         reset();
0081     }
0082     else
0083     {
0084         setFileAccess(FileAccess(filename));
0085     }
0086 }
0087 
0088 bool SourceData::isEmpty() const
0089 {
0090     return getFilename().isEmpty();
0091 }
0092 
0093 bool SourceData::hasData() const
0094 {
0095     return m_normalData.m_pBuf != nullptr;
0096 }
0097 
0098 bool SourceData::isValid() const
0099 {
0100     return isEmpty() || hasData();
0101 }
0102 
0103 QString SourceData::getFilename() const
0104 {
0105     return m_fileAccess.absoluteFilePath();
0106 }
0107 
0108 QString SourceData::getAliasName() const
0109 {
0110     return m_aliasName.isEmpty() ? m_fileAccess.prettyAbsPath() : m_aliasName;
0111 }
0112 
0113 void SourceData::setAliasName(const QString& name)
0114 {
0115     m_aliasName = name;
0116 }
0117 
0118 void SourceData::setFileAccess(const FileAccess& fileAccess)
0119 {
0120     mFromClipBoard = false;
0121 
0122     m_fileAccess = fileAccess;
0123     m_aliasName = QString();
0124     if(!m_tempInputFileName.isEmpty())
0125     {
0126         m_tempFile.remove();
0127         m_tempInputFileName = "";
0128     }
0129 
0130     mErrors.clear();
0131 }
0132 
0133 void SourceData::setEncoding(const char* encoding)
0134 {
0135     mEncoding = encoding;
0136 }
0137 
0138 void SourceData::setData(const QString& data)
0139 {
0140     mErrors.clear();
0141     // Create a temp file for preprocessing:
0142     if(m_tempInputFileName.isEmpty())
0143     {
0144         FileAccess::createTempFile(m_tempFile);
0145         m_tempInputFileName = m_tempFile.fileName();
0146     }
0147     m_fileAccess = FileAccess(m_tempInputFileName);
0148     QByteArray ba = QTextCodec::codecForName("UTF-8")->fromUnicode(data);
0149     bool bSuccess = m_fileAccess.writeFile(ba.constData(), ba.length());
0150     if(!bSuccess)
0151     {
0152         mErrors.append(i18n("Writing clipboard data to temp file failed."));
0153         return;
0154     }
0155     else
0156     {
0157         m_aliasName = i18n("From Clipboard");
0158         mFromClipBoard = true;
0159     }
0160 }
0161 
0162 const std::shared_ptr<LineDataVector>& SourceData::getLineDataForDiff() const
0163 {
0164     if(m_lmppData.m_pBuf == nullptr)
0165     {
0166         return m_normalData.m_v;
0167     }
0168     else
0169     {
0170         return m_lmppData.m_v;
0171     }
0172 }
0173 
0174 const std::shared_ptr<LineDataVector>& SourceData::getLineDataForDisplay() const
0175 {
0176     return m_normalData.m_v;
0177 }
0178 
0179 LineType SourceData::getSizeLines() const
0180 {
0181     return SafeInt<LineType>(m_normalData.lineCount());
0182 }
0183 
0184 qint64 SourceData::getSizeBytes() const
0185 {
0186     return m_normalData.byteCount();
0187 }
0188 
0189 const char* SourceData::getBuf() const
0190 {
0191     return m_normalData.m_pBuf.get();
0192 }
0193 
0194 const QString& SourceData::getText() const
0195 {
0196     return *m_normalData.m_unicodeBuf;
0197 }
0198 
0199 bool SourceData::isText() const
0200 {
0201     return m_normalData.isText() || m_normalData.isEmpty();
0202 }
0203 
0204 bool SourceData::isIncompleteConversion() const
0205 {
0206     return m_normalData.m_bIncompleteConversion;
0207 }
0208 
0209 bool SourceData::isFromBuffer() const
0210 {
0211     return mFromClipBoard;
0212 }
0213 
0214 bool SourceData::isBinaryEqualWith(const QSharedPointer<SourceData>& other) const
0215 {
0216     return m_fileAccess.exists() && other->m_fileAccess.exists() &&
0217            getSizeBytes() == other->getSizeBytes() &&
0218            (getSizeBytes() == 0 || memcmp(getBuf(), other->getBuf(), getSizeBytes()) == 0);
0219 }
0220 
0221 /*
0222     Warning: Do not call this function without re-running the comparison or
0223     otherwise resetting the DiffTextWindows as these store a pointer to the file
0224     data stored here.
0225 */
0226 void SourceData::FileData::reset()
0227 {
0228     m_pBuf.reset();
0229     m_v->clear();
0230     mDataSize = 0;
0231     mLineCount = 0;
0232     m_bIsText = false;
0233     m_bIncompleteConversion = false;
0234     m_eLineEndStyle = eLineEndStyleUndefined;
0235 }
0236 
0237 bool SourceData::FileData::readFile(FileAccess& file)
0238 {
0239     reset();
0240     if(file.fileName().isEmpty())
0241     {
0242         return true;
0243     }
0244 
0245     if(!file.isNormal())
0246         return true;
0247 
0248     mDataSize = file.sizeForReading();
0249     /*
0250         If the extra bytes are removed an unknown heap currption issue is triggered in the
0251         diff code. I don't have time to track this down to its true root cause.
0252     */
0253     m_pBuf = std::make_unique<char[]>(mDataSize + 100); // Alloc 100 byte extra: Safety hack, not nice but does no harm.
0254                                                         // Some extra bytes at the end of the buffer are needed by
0255                                                         // the diff algorithm. See also GnuDiff::diff_2_files().
0256     bool bSuccess = file.readFile(m_pBuf.get(), mDataSize);
0257     if(!bSuccess)
0258     {
0259         m_pBuf = nullptr;
0260         mDataSize = 0;
0261     }
0262     else
0263     {
0264         //null terminate buffer
0265         m_pBuf[mDataSize + 1] = 0;
0266         m_pBuf[mDataSize + 2] = 0;
0267         m_pBuf[mDataSize + 3] = 0;
0268         m_pBuf[mDataSize + 4] = 0;
0269     }
0270     return bSuccess;
0271 }
0272 
0273 bool SourceData::FileData::readFile(const QString& filename)
0274 {
0275     reset();
0276     if(filename.isEmpty())
0277     {
0278         return true;
0279     }
0280 
0281     FileAccess fa(filename);
0282     return readFile(fa);
0283 }
0284 
0285 bool SourceData::saveNormalDataAs(const QString& fileName)
0286 {
0287     return m_normalData.writeFile(fileName);
0288 }
0289 
0290 bool SourceData::FileData::writeFile(const QString& filename)
0291 {
0292     if(filename.isEmpty())
0293     {
0294         return true;
0295     }
0296 
0297     FileAccess fa(filename);
0298     bool bSuccess = fa.writeFile(m_pBuf.get(), mDataSize);
0299     return bSuccess;
0300 }
0301 
0302 //Deprecated
0303 void SourceData::FileData::copyBufFrom(const FileData& src) //TODO: Remove me.
0304 {
0305     reset();
0306     mDataSize = src.mDataSize;
0307     m_pBuf = std::make_unique<char[]>(mDataSize + 100);
0308     assert(src.m_pBuf != nullptr);
0309     memcpy(m_pBuf.get(), src.m_pBuf.get(), mDataSize);
0310 }
0311 
0312 std::optional<const char*> SourceData::detectEncoding(const QString& fileName)
0313 {
0314     QFile f(fileName);
0315     if(f.open(QIODevice::ReadOnly))
0316     {
0317         char buf[400];
0318 
0319         qint64 size = f.read(buf, sizeof(buf));
0320         FileOffset skipBytes = 0;
0321         return detectEncoding(buf, size, skipBytes);
0322     }
0323     return {};
0324 }
0325 
0326 void SourceData::readAndPreprocess(const char* encoding, bool bAutoDetect)
0327 {
0328     QTemporaryFile fileIn1, fileOut1;
0329     QString fileNameIn1;
0330     QString fileNameOut1;
0331     QString fileNameIn2;
0332     QString fileNameOut2;
0333 
0334     mEncoding = encoding;
0335 
0336     // Detect the input for the preprocessing operations
0337     if(!mFromClipBoard)
0338     {
0339         //Routine result of directory compare finding a file that isn't in all locations.
0340         if(!m_fileAccess.isValid()) return;
0341 
0342         assert(!m_fileAccess.exists() || !m_fileAccess.isDir());
0343         if(!m_fileAccess.isNormal())
0344         {
0345             mErrors.append(i18n("%1 is not a normal file.", m_fileAccess.prettyAbsPath()));
0346             return;
0347         }
0348 
0349         if(m_fileAccess.isLocal())
0350         {
0351             fileNameIn1 = m_fileAccess.absoluteFilePath();
0352         }
0353         else // File is not local: create a temporary local copy:
0354         {
0355             if(m_tempInputFileName.isEmpty())
0356             {
0357                 m_fileAccess.createLocalCopy();
0358                 m_tempInputFileName = m_fileAccess.getTempName();
0359             }
0360 
0361             fileNameIn1 = m_tempInputFileName;
0362         }
0363         if(bAutoDetect)
0364         {
0365             mEncoding = detectEncoding(fileNameIn1).value_or(encoding);
0366         }
0367     }
0368     else // The input was set via setData(), probably from clipboard.
0369     {
0370         /*
0371             Used to happen during early startup this is now a bug.
0372         */
0373         assert(!m_tempInputFileName.isEmpty());
0374 
0375         fileNameIn1 = m_tempInputFileName;
0376         mEncoding = "UTF-8";
0377     }
0378     const char* pEncoding1 = getEncoding();
0379     const char* pEncoding2 = getEncoding();
0380     const QString overSizedFile = i18nc("Error message. %1 = filepath", "File %1 too large to process. Skipping.", fileNameIn1);
0381 
0382     m_normalData.reset();
0383     m_lmppData.reset();
0384 
0385     FileAccess faIn(fileNameIn1);
0386     qint64 fileInSize = faIn.size();
0387 
0388     if(faIn.exists() && !faIn.isBrokenLink())
0389     {
0390         try
0391         {
0392             // Run the first preprocessor
0393             if(gOptions->m_PreProcessorCmd.isEmpty())
0394             {
0395                 // No preprocessing: Read the file directly:
0396                 if(!m_normalData.readFile(faIn))
0397                 {
0398                     mErrors.append(faIn.getStatusText());
0399                     return;
0400                 }
0401             }
0402             else
0403             {
0404                 unsigned char b;
0405                 //Don't fail the preprocessor command if the file can't be read.
0406                 if(!faIn.readFile(&b, 1))
0407                 {
0408                     mErrors.append(faIn.getStatusText());
0409                     mErrors.append(i18n("    Temp file is: %1", fileNameIn1));
0410                     return;
0411                 }
0412 
0413                 QTemporaryFile tmpInPPFile;
0414                 QString fileNameInPP = fileNameIn1;
0415 
0416                 if(pEncoding1 != gOptions->mEncodingPP)
0417                 {
0418                     // Before running the preprocessor convert to the format that the preprocessor expects.
0419                     FileAccess::createTempFile(tmpInPPFile);
0420                     fileNameInPP = tmpInPPFile.fileName();
0421                     pEncoding1 = gOptions->mEncodingPP;
0422                     convertFileEncoding(fileNameIn1, encoding, fileNameInPP, pEncoding1);
0423                 }
0424 
0425                 QString ppCmd = gOptions->m_PreProcessorCmd;
0426                 FileAccess::createTempFile(fileOut1);
0427                 fileNameOut1 = fileOut1.fileName();
0428 
0429                 QProcess ppProcess;
0430                 ppProcess.setStandardInputFile(fileNameInPP);
0431                 ppProcess.setStandardOutputFile(fileNameOut1);
0432                 QString program;
0433                 QStringList args;
0434                 QString errorReason = Utils::getArguments(ppCmd, program, args);
0435                 if(errorReason.isEmpty())
0436                 {
0437                     ppProcess.start(program, args);
0438                     ppProcess.waitForFinished(-1);
0439                 }
0440                 else
0441                     errorReason = "\n(" + errorReason + ')';
0442 
0443                 bool bSuccess = errorReason.isEmpty() && m_normalData.readFile(fileNameOut1);
0444                 if(fileInSize > 0 && (!bSuccess || m_normalData.byteCount() == 0))
0445                 {
0446                     mErrors.append(
0447                         i18n("Preprocessing possibly failed. Check this command:\n\n  %1"
0448                              "\n\nThe preprocessing command will be disabled now.",
0449                              ppCmd) +
0450                         errorReason);
0451                     gOptions->m_PreProcessorCmd = "";
0452 
0453                     pEncoding1 = getEncoding();
0454                 }
0455             }
0456         }
0457         catch(const std::bad_alloc&)
0458         {
0459             m_normalData.reset();
0460             mErrors.append(overSizedFile);
0461             return;
0462         }
0463 
0464         if(!m_normalData.preprocess(pEncoding1, false))
0465         {
0466             mErrors.append(overSizedFile);
0467             return;
0468         }
0469         //exit early for non text data further processing assumes a text file as input
0470         if(!m_normalData.isText())
0471             return;
0472 
0473         // LineMatching Preprocessor
0474         if(!gOptions->m_LineMatchingPreProcessorCmd.isEmpty())
0475         {
0476             QTemporaryFile tempOut2, fileInPP;
0477             fileNameIn2 = fileNameOut1.isEmpty() ? fileNameIn1 : fileNameOut1;
0478             QString fileNameInPP = fileNameIn2;
0479             pEncoding2 = pEncoding1;
0480             if(pEncoding2 != gOptions->mEncodingPP)
0481             {
0482                 // Before running the preprocessor convert to the format that the preprocessor expects.
0483                 FileAccess::createTempFile(fileInPP);
0484                 fileNameInPP = fileInPP.fileName();
0485                 pEncoding2 = gOptions->mEncodingPP;
0486                 convertFileEncoding(fileNameIn2, pEncoding1, fileNameInPP, pEncoding2);
0487             }
0488 
0489             QString ppCmd = gOptions->m_LineMatchingPreProcessorCmd;
0490             FileAccess::createTempFile(tempOut2);
0491             fileNameOut2 = tempOut2.fileName();
0492             QProcess ppProcess;
0493             ppProcess.setStandardInputFile(fileNameInPP);
0494             ppProcess.setStandardOutputFile(fileNameOut2);
0495             QString program;
0496             QStringList args;
0497             QString errorReason = Utils::getArguments(ppCmd, program, args);
0498             if(errorReason.isEmpty())
0499             {
0500                 ppProcess.start(program, args);
0501                 ppProcess.waitForFinished(-1);
0502             }
0503             else
0504                 errorReason = "\n(" + errorReason + ')';
0505 
0506             bool bSuccess = errorReason.isEmpty() && m_lmppData.readFile(fileNameOut2);
0507             if(FileAccess(fileNameIn2).size() > 0 && (!bSuccess || m_lmppData.byteCount() == 0))
0508             {
0509                 mErrors.append(
0510                     i18n("The line-matching-preprocessing possibly failed. Check this command:\n\n  %1"
0511                          "\n\nThe line-matching-preprocessing command will be disabled now.", ppCmd) +
0512                     errorReason);
0513                 gOptions->m_LineMatchingPreProcessorCmd = "";
0514                 if(!m_lmppData.readFile(fileNameIn2))
0515                 {
0516                     mErrors.append(i18nc("Read error message. %1 = filepath", "Failed to read file: %1", fileNameIn2));
0517                     return;
0518                 }
0519             }
0520         }
0521         else if(gOptions->ignoreComments() || gOptions->m_bIgnoreCase)
0522         {
0523             // We need a copy of the normal data.
0524             m_lmppData.copyBufFrom(m_normalData);
0525         }
0526     }
0527     else
0528     {
0529         //exit early for nonexistent files
0530         return;
0531     }
0532 
0533     if(!m_lmppData.preprocess(pEncoding2, true))
0534     {
0535         mErrors.append(overSizedFile);
0536         return;
0537     }
0538 
0539     assert(m_lmppData.isText());
0540     //TODO: Needed?
0541     if(m_lmppData.lineCount() < m_normalData.lineCount())
0542     {
0543         // Preprocessing command may result in smaller data buffer so adjust size
0544         for(qint64 i = m_lmppData.lineCount(); i < m_normalData.lineCount(); ++i)
0545         { // Set all empty lines to point to the end of the buffer.
0546             m_lmppData.m_v->push_back(LineData(m_lmppData.m_unicodeBuf, m_lmppData.m_unicodeBuf->length()));
0547         }
0548 
0549         m_lmppData.mLineCount = m_normalData.lineCount();
0550     }
0551 
0552     // Ignore comments
0553     if(gOptions->ignoreComments() && hasData())
0554     {
0555         qint64 vSize = std::min(m_normalData.lineCount(), m_lmppData.lineCount());
0556 
0557         for(qint64 i = 0; i < vSize; ++i)
0558         {
0559             //TODO: Phase this out. We should not be messing with these flags outside the parser.
0560             (*m_normalData.m_v)[i].setPureComment((*m_lmppData.m_v)[i].isPureComment());
0561             (*m_normalData.m_v)[i].setSkipable((*m_lmppData.m_v)[i].isSkipable());
0562         }
0563     }
0564 }
0565 
0566 /** Prepare the linedata vector for every input line.*/
0567 bool SourceData::FileData::preprocess(const QByteArray& encoding, bool removeComments)
0568 {
0569     if(m_pBuf == nullptr)
0570         return true;
0571 
0572     QString line;
0573     QChar curChar, prevChar = '\0';
0574     LineType lines = 0;
0575     QtSizeType lastOffset = 0;
0576     FileOffset skipBytes = 0;
0577     QScopedPointer<CommentParser> parser(new DefaultCommentParser());
0578 
0579     // detect line end style
0580     QVector<e_LineEndStyle> vOrigDataLineEndStyle;
0581     m_eLineEndStyle = eLineEndStyleUndefined;
0582 
0583     QByteArray pCodec = detectEncoding(m_pBuf.get(), mDataSize, skipBytes).value_or(encoding);
0584     if(pCodec != encoding)
0585         skipBytes = 0;
0586 
0587     if(mDataSize - skipBytes > limits<qint32>::max())
0588     {
0589         reset();
0590         return false;
0591     }
0592 
0593     try
0594     {
0595         const QByteArray ba = QByteArray::fromRawData(m_pBuf.get() + skipBytes, (QtSizeType)(mDataSize - skipBytes));
0596         EncodedDataStream ds(ba);
0597 
0598         mHasBOM = skipBytes != 0;
0599         ds.setEncoding(encoding);
0600         ds.setGenerateByteOrderMark(skipBytes != 0);
0601 
0602         m_bIncompleteConversion = false;
0603         m_unicodeBuf->clear();
0604 
0605         assert(m_unicodeBuf->length() == 0);
0606 
0607         mHasEOLTermination = false;
0608         bool skipNextRead = false;
0609         while(!ds.atEnd())
0610         {
0611             line.clear();
0612             if(lines >= limits<LineType>::max() - 5)
0613             {
0614                 reset();
0615                 return false;
0616             }
0617 
0618             if(!skipNextRead)
0619             {
0620                 prevChar = curChar;
0621                 ds.readChar(curChar);
0622             }
0623             else
0624                 skipNextRead = false;
0625 
0626             QtSizeType firstNonwhite = 0;
0627             bool foundNonWhite = false;
0628 
0629             while(curChar != '\n' && curChar != '\r')
0630             {
0631                 if(curChar.isNull() || curChar.isNonCharacter())
0632                 {
0633                     m_v->clear();
0634                     return true;
0635                 }
0636 
0637                 if(curChar == QChar::ReplacementCharacter)
0638                     m_bIncompleteConversion = true;
0639 
0640                 line.append(curChar);
0641                 if(!curChar.isSpace() && !foundNonWhite)
0642                 {
0643                     firstNonwhite = line.length();
0644                     foundNonWhite = true;
0645                 }
0646 
0647                 if(ds.atEnd())
0648                     break;
0649 
0650                 prevChar = curChar;
0651                 ds.readChar(curChar);
0652             }
0653 
0654             switch(curChar.unicode())
0655             {
0656                 case '\n':
0657                     vOrigDataLineEndStyle.push_back(eLineEndStyleUnix);
0658                     break;
0659                 case '\r':
0660                     if((FileOffset)lastOffset < mDataSize)
0661                     {
0662                         prevChar = curChar;
0663                         ds.readChar(curChar);
0664 
0665                         if(curChar == '\n')
0666                         {
0667                             vOrigDataLineEndStyle.push_back(eLineEndStyleDos);
0668                             break;
0669                         }
0670                         //work around for lack of seek API in QDataStream
0671                         skipNextRead = true;
0672                     }
0673 
0674                     //old mac style ending.
0675                     vOrigDataLineEndStyle.push_back(eLineEndStyleOldMac);
0676                     break;
0677             }
0678             parser->processLine(line);
0679             if(removeComments)
0680                 parser->removeComment(line);
0681             //Qt6 intrudes 64bit sizes
0682             if(line.size() >= limits<LineType>::max())
0683             {
0684                 reset();
0685                 return false;
0686             }
0687 
0688             ++lines;
0689             m_v->push_back(LineData(m_unicodeBuf, lastOffset, line.length(), firstNonwhite, parser->isSkipable(), parser->isPureComment()));
0690             //The last line may not have an EOL mark. In that case don't add one to our buffer.
0691             m_unicodeBuf->append(line);
0692             if(curChar == '\n' || curChar == '\r' || prevChar == '\r')
0693             {
0694                 //kdiff3 internally uses only unix style endings for simplicity.
0695                 m_unicodeBuf->append('\n');
0696             }
0697 
0698             assert(m_unicodeBuf->length() != lastOffset);
0699             lastOffset = m_unicodeBuf->length();
0700         }
0701 
0702         /*
0703             Process trailing new line as if there were a blank non-terminated line after it.
0704             But do nothing to the data buffer since this is a phantom line needed for internal purposes.
0705         */
0706         if(curChar == '\n' || curChar == '\r')
0707         {
0708             mHasEOLTermination = true;
0709             ++lines;
0710 
0711             parser->processLine("");
0712             m_v->push_back(LineData(m_unicodeBuf, lastOffset, 0, 0, parser->isSkipable(), parser->isPureComment()));
0713         }
0714 
0715         m_v->push_back(LineData(m_unicodeBuf, lastOffset));
0716 
0717         m_bIsText = true;
0718 
0719         if(!vOrigDataLineEndStyle.isEmpty())
0720             m_eLineEndStyle = vOrigDataLineEndStyle[0];
0721 
0722         mLineCount = lines;
0723         return true;
0724     }
0725     catch(const std::bad_alloc&)
0726     {
0727         reset();
0728         return false;
0729     }
0730 }
0731 
0732 // Convert the input file from input encoding to output encoding and write it to the output file.
0733 bool SourceData::convertFileEncoding(const QString& fileNameIn, const QByteArray& pCodecIn,
0734                                      const QString& fileNameOut, const QByteArray& pCodecOut)
0735 {
0736     QFile in(fileNameIn);
0737     if(!in.open(QIODevice::ReadOnly))
0738         return false;
0739     EncodedDataStream inStream(&in);
0740     inStream.setEncoding(pCodecIn);
0741 
0742     QFile out(fileNameOut);
0743     if(!out.open(QIODevice::WriteOnly))
0744         return false;
0745     EncodedDataStream outStream(&out);
0746     outStream.setEncoding(pCodecOut);
0747 
0748     QString data;
0749     while(!inStream.atEnd())
0750     {
0751         QChar c;
0752         inStream.readChar(c);
0753         data += c;
0754     }
0755     outStream << data;
0756 
0757     return true;
0758 }
0759 
0760 std::optional<const char*> SourceData::getEncodingFromTag(const QByteArray& s, const QByteArray& encodingTag)
0761 {
0762     QtSizeType encodingPos = s.indexOf(encodingTag);
0763     if(encodingPos >= 0)
0764     {
0765         QtSizeType apostrophPos = s.indexOf('"', encodingPos + encodingTag.length());
0766         QtSizeType apostroph2Pos = s.indexOf('\'', encodingPos + encodingTag.length());
0767         char apostroph = '"';
0768         if(apostroph2Pos >= 0 && (apostrophPos < 0 || apostroph2Pos < apostrophPos))
0769         {
0770             apostroph = '\'';
0771             apostrophPos = apostroph2Pos;
0772         }
0773 
0774         QtSizeType encodingEnd = s.indexOf(apostroph, apostrophPos + 1);
0775         if(encodingEnd >= 0) // e.g.: <meta charset="utf-8"> or <?xml version="1.0" encoding="ISO-8859-1"?>
0776         {
0777             QByteArray encoding = s.mid(apostrophPos + 1, encodingEnd - (apostrophPos + 1));
0778             if(QTextCodec::codecForName(encoding))
0779                 return encoding;
0780         }
0781         else // e.g.: <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
0782         {
0783             QByteArray encoding = s.mid(encodingPos + encodingTag.length(), apostrophPos - (encodingPos + encodingTag.length()));
0784             if(QTextCodec::codecForName(encoding))
0785                 return encoding;
0786         }
0787     }
0788     return {};
0789 }
0790 
0791 std::optional<const char*> SourceData::detectEncoding(const char* buf, qint64 size, FileOffset& skipBytes)
0792 {
0793     if(size >= 2)
0794     {
0795         if(buf[0] == '\xFF' && buf[1] == '\xFE')
0796         {
0797             skipBytes = 2;
0798             return "UTF-16LE";
0799         }
0800 
0801         if(buf[0] == '\xFE' && buf[1] == '\xFF')
0802         {
0803             skipBytes = 2;
0804             return "UTF-16BE";
0805         }
0806     }
0807     if(size >= 3)
0808     {
0809         if(buf[0] == '\xEF' && buf[1] == '\xBB' && buf[2] == '\xBF')
0810         {
0811             skipBytes = 3;
0812             return "UTF-8-BOM";
0813         }
0814     }
0815     skipBytes = 0;
0816     QByteArray s;
0817     /*
0818         We don't need the whole file here just the header.
0819     */
0820     if(size <= 5000)
0821         s = QByteArray(buf, (QtSizeType)size);
0822     else
0823         s = QByteArray(buf, 5000);
0824 
0825     QtSizeType xmlHeaderPos = s.indexOf("<?xml");
0826     if(xmlHeaderPos >= 0)
0827     {
0828         QtSizeType xmlHeaderEnd = s.indexOf("?>", xmlHeaderPos);
0829         if(xmlHeaderEnd >= 0)
0830         {
0831             std::optional<const char*> encoding = getEncodingFromTag(s.mid(xmlHeaderPos, xmlHeaderEnd - xmlHeaderPos), "encoding=");
0832             if(encoding.has_value())
0833                 return encoding;
0834         }
0835     }
0836     else // HTML
0837     {
0838         QtSizeType metaHeaderPos = s.indexOf("<meta");
0839         while(metaHeaderPos >= 0)
0840         {
0841             QtSizeType metaHeaderEnd = s.indexOf(">", metaHeaderPos);
0842             if(metaHeaderEnd >= 0)
0843             {
0844                 std::optional<const char*> encoding = getEncodingFromTag(s.mid(metaHeaderPos, metaHeaderEnd - metaHeaderPos), "charset=");
0845                 if(encoding.has_value())
0846                     return encoding;
0847 
0848                 metaHeaderPos = s.indexOf("<meta", metaHeaderEnd);
0849             }
0850             else
0851                 break;
0852         }
0853     }
0854     //Attempt to detect non-bom UTF8. This is a very common encoding.
0855     return detectUTF8(s);
0856 }
0857 
0858 std::optional<const char*> SourceData::detectUTF8(const QByteArray& data)
0859 {
0860     QTextCodec* utf8 = QTextCodec::codecForName("UTF-8");
0861 
0862     QTextCodec::ConverterState state;
0863     utf8->toUnicode(data.constData(), SafeInt<qint32>(data.size()), &state);
0864 
0865     if(state.invalidChars == 0)
0866         for (qint32 i = 0; i < data.size()-state.remainingChars; i++)
0867             if ((unsigned)data.at(i) > 127)
0868                 return "UTF-8";
0869 
0870     return {};
0871 }