File indexing completed on 2024-04-21 15:55:31

0001 /*************************************************************************************
0002     begin                : Sun Feb 29 2004
0003     copyright            : (C) 2004 by Jeroen Wijnhout (Jeroen.Wijnhout@kdemail.net)
0004  *************************************************************************************/
0005 
0006 /***************************************************************************
0007  *                                                                         *
0008  *   This program is free software; you can redistribute it and/or modify  *
0009  *   it under the terms of the GNU General Public License as published by  *
0010  *   the Free Software Foundation; either version 2 of the License, or     *
0011  *   (at your option) any later version.                                   *
0012  *                                                                         *
0013  ***************************************************************************/
0014 
0015 #include "convert.h"
0016 
0017 #include <QFile>
0018 #include <QRegExp>
0019 #include <QTextCodec>
0020 #include <QTextStream>
0021 
0022 #include <KMessageBox>
0023 #include <KTextEditor/Document>
0024 
0025 #include "kiledebug.h"
0026 #include "utilities.h"
0027 
0028 QMap<QString, ConvertMap*> ConvertMap::g_maps;
0029 
0030 bool ConvertMap::create(const QString & encoding)
0031 {
0032     KILE_DEBUG_MAIN << "\tlooking for map for " << encoding;
0033     ConvertMap * map = g_maps[encoding];
0034 
0035     if(!map) {
0036         KILE_DEBUG_MAIN << "\tcreating a map for " << encoding;
0037         map = new ConvertMap(encoding); // FIXME This will never be deleted if load() succeeds...
0038         if(map->load()) {
0039             g_maps[encoding] = map;
0040         }
0041         else {
0042             delete map;
0043             map = Q_NULLPTR;
0044         }
0045         map = g_maps[encoding];
0046     }
0047 
0048     return (map != Q_NULLPTR);
0049 }
0050 
0051 QString ConvertMap::encodingNameFor(const QString & name)
0052 {
0053     QString std;
0054     for(int i = 0; i < name.length(); ++i) {
0055         if(!name[i].isSpace()) {
0056             std += name[i];
0057         }
0058     }
0059 
0060     std = std.toLower();
0061 
0062     if(std.startsWith(QLatin1String("iso8859-"))) {
0063         return "latin" + std.right(1);
0064     }
0065 
0066     if(std.startsWith(QLatin1String("cp"))) {
0067         return "cp" + std.right(4);
0068     }
0069 
0070     return name;
0071 }
0072 
0073 QString ConvertMap::isoNameFor(const QString & name)
0074 {
0075     QString std;
0076     for(int i = 0; i < name.length(); ++i) {
0077         if(!name[i].isSpace()) {
0078             std += name[i];
0079         }
0080     }
0081 
0082     std = std.toLower();
0083 
0084     if(std.startsWith(QLatin1String("latin"))) {
0085         return "ISO 8859-" + std.right(1);
0086     }
0087 
0088     if(std.startsWith(QLatin1String("cp"))) {
0089         return "cp " + std.right(4);
0090     }
0091 
0092     return name;
0093 }
0094 
0095 ConvertMap::ConvertMap(const QString& enc)
0096 {
0097     m_aliases.append(encodingNameFor(enc));
0098     m_aliases.append(isoNameFor(enc));
0099 }
0100 
0101 void ConvertMap::addPair(QChar c, const QString& enc)
0102 {
0103     m_toASCII[c] = commandIsTerminated(enc) ? enc : enc + "{}" ;
0104     m_toEncoding[enc] = c;
0105 }
0106 
0107 bool ConvertMap::commandIsTerminated(const QString & command)
0108 {
0109     static QRegExp reCommandSequences("\\\\([a-zA-Z]+|\\\"|\\')$");
0110 
0111     return (reCommandSequences.indexIn(command) == -1);
0112 }
0113 
0114 bool ConvertMap::load()
0115 {
0116     static QRegExp reMap("^(.*):(.*)");
0117 
0118     //makeMap(encoding());
0119 
0120     //if map already exists, replace it
0121     QFile qf(KileUtilities::locate(QStandardPaths::AppDataLocation, "encodings/" + encoding() + ".enc"));
0122 
0123     if(qf.open(QIODevice::ReadOnly)) {
0124         QTextStream stream(&qf);
0125         QTextCodec *codec = QTextCodec::codecForName(isoName().toLatin1());
0126         if(codec) {
0127             stream.setCodec(codec);
0128         }
0129 
0130         while(!stream.atEnd()) {
0131             //parse the line
0132             if(stream.readLine().indexOf(reMap) != -1) {
0133                 addPair(reMap.cap(1)[0], reMap.cap(2));
0134             }
0135         }
0136         qf.close();
0137 
0138         return true;
0139     }
0140 
0141     return false;
0142 }
0143 
0144 //BEGIN ConvertIO classes
0145 ConvertIO::ConvertIO(KTextEditor::Document *doc) :
0146     m_doc(doc),
0147     m_text(QString()),
0148     m_line(QString()),
0149     m_nLine(0)
0150 {
0151 }
0152 
0153 QString & ConvertIO::currentLine()
0154 {
0155     return m_line;
0156 }
0157 
0158 void ConvertIO::nextLine()
0159 {
0160     m_line = m_doc->line(m_nLine++);
0161 }
0162 
0163 void ConvertIO::writeText()
0164 {
0165     m_doc->setText(m_text);
0166 }
0167 
0168 int ConvertIO::current()
0169 {
0170     return m_nLine;
0171 }
0172 
0173 bool ConvertIO::done()
0174 {
0175     return current() == m_doc->lines();
0176 }
0177 
0178 ConvertIOFile::ConvertIOFile(KTextEditor::Document *doc, const QUrl &url) : ConvertIO(doc), m_url(url)
0179 {
0180 }
0181 
0182 void ConvertIOFile::writeText()
0183 {
0184     QFile qf(m_url.toLocalFile());
0185     if(qf.open(QIODevice::WriteOnly)) {
0186         //read the file
0187         QTextStream stream(&qf);
0188         stream << m_text;
0189         qf.close();
0190     }
0191     else {
0192         qWarning() << "Could not open " << m_url.toLocalFile();
0193     }
0194 }
0195 
0196 ConvertBase::ConvertBase(const QString & encoding, ConvertIO * io) :
0197     m_io(io),
0198     m_encoding(encoding),
0199     m_map(Q_NULLPTR)
0200 {
0201 }
0202 
0203 //END ConvertIO classes
0204 
0205 //BEGIN ConvertBase
0206 QString ConvertBase::mapNext(int &i)
0207 {
0208     return (QString)m_io->currentLine()[i++];
0209 }
0210 
0211 bool ConvertBase::convert()
0212 {
0213     if(!setMap()) {
0214         return false;
0215     }
0216 
0217     m_io->text().clear();
0218     do {
0219         m_io->nextLine();
0220         int i = 0;
0221         while(i < m_io->currentLine().length()) {
0222             m_io->text() += mapNext(i);
0223         }
0224         if(!m_io->done()) {
0225             m_io->text() += '\n';
0226         }
0227     }
0228     while(!m_io->done());
0229 
0230     m_io->writeText();
0231     return true;
0232 }
0233 
0234 bool ConvertBase::setMap()
0235 {
0236     //create map (or use existing)
0237     if(ConvertMap::create(m_encoding)) {
0238         m_map = ConvertMap::mapFor(m_encoding);
0239     }
0240     else {
0241         m_map = Q_NULLPTR;
0242     }
0243 
0244     return (m_map != Q_NULLPTR);
0245 }
0246 //END ConvertBase
0247 
0248 //BEGIN ConvertEncToASCII
0249 QString ConvertEncToASCII::mapNext(int &i)
0250 {
0251     return m_map->canDecode(m_io->currentLine()[i]) ? m_map->toASCII(m_io->currentLine()[i++]) : (QString)m_io->currentLine()[i++];
0252 }
0253 //END ConvertEncToASCII
0254 
0255 //BEGIN ConvertASCIIToEnc
0256 
0257 //i is the position of the '\'
0258 QString ConvertASCIIToEnc::nextSequence(int &i)
0259 {
0260     //get first two characters
0261     QString seq = (QString)m_io->currentLine()[i++];
0262 
0263     if(m_io->currentLine()[i].isLetter()) {
0264         while(m_io->currentLine()[i].isLetter()) {
0265             seq += (QString)m_io->currentLine()[i++];
0266         }
0267     }
0268     else {
0269         return seq + (QString)m_io->currentLine()[i++];
0270     }
0271 
0272     return seq;
0273 }
0274 
0275 bool ConvertASCIIToEnc::isModifier(const QString& seq)
0276 {
0277     static QRegExp reModifier("\\\\([cHkruv]|\"|\'|\\^|`|~|=|\\.)");
0278     return reModifier.exactMatch(seq);
0279 }
0280 
0281 QString ConvertASCIIToEnc::getSequence(int &i)
0282 {
0283     QString seq = nextSequence(i);
0284     static QRegExp reBraces("\\{([a-zA-Z]?)\\}");
0285 
0286     if(isModifier(seq)) {
0287         KILE_DEBUG_MAIN << "\tisModifier true : " << seq;
0288         if(seq[seq.length() - 1].isLetter()) {
0289             seq += ' ';
0290         }
0291 
0292         while(m_io->currentLine()[i].isSpace()) {
0293             ++i;
0294         }
0295 
0296         if(m_io->currentLine().mid(i, 2) == "{}") {
0297             i = i + 2;
0298         }
0299 
0300         if(m_io->currentLine()[i] == '\\') {
0301             seq += nextSequence(i);
0302         }
0303         else {
0304             if(reBraces.exactMatch(m_io->currentLine().mid(i, 3))) {
0305                 KILE_DEBUG_MAIN << "\tbraces detected";
0306                 i = i + 3;
0307                 seq += reBraces.cap(1);
0308             }
0309             else {
0310                 QChar nextChar = m_io->currentLine()[i++];
0311                 if(!nextChar.isSpace()) {
0312                     seq += (QString)nextChar;
0313                 }
0314             }
0315         }
0316     }
0317     else if(m_map->canEncode(seq)) {
0318         if(m_io->currentLine().mid(i, 2) == "{}") {
0319             i = i + 2;
0320         }
0321         else if(m_io->currentLine()[i].isSpace()) {
0322             ++i;
0323         }
0324     }
0325 
0326     return seq;
0327 }
0328 
0329 QString ConvertASCIIToEnc::mapNext(int &i)
0330 {
0331     if(m_io->currentLine()[i] == '\\') {
0332         QString seq = getSequence(i);
0333         KILE_DEBUG_MAIN << "'\tsequence: " << seq;
0334         if(m_map->canEncode(seq)) {
0335             KILE_DEBUG_MAIN << "\tcan encode this";
0336             //if ( m_io->currentLine().mid(i, 2) == "{}" ) i = i + 2;
0337             return m_map->toEncoding(seq);
0338         }
0339         else {
0340             return seq;
0341         }
0342     }
0343 
0344     return ConvertBase::mapNext(i);
0345 }
0346 //END ConvertASCIIToEnc