File indexing completed on 2024-04-28 08:56:09
0001 /************************************************************************************* 0002 begin : Sun Feb 29 2004 0003 copyright : (C) 2004 by Jeroen Wijnhout (Jeroen.Wijnhout@kdemail.net) 0004 *************************************************************************************/ 0005 0006 /*************************************************************************** 0007 * * 0008 * This program is free software; you can redistribute it and/or modify * 0009 * it under the terms of the GNU General Public License as published by * 0010 * the Free Software Foundation; either version 2 of the License, or * 0011 * (at your option) any later version. * 0012 * * 0013 ***************************************************************************/ 0014 0015 #include "convert.h" 0016 0017 #include <QFile> 0018 #include <QRegExp> 0019 #include <QTextCodec> 0020 #include <QTextStream> 0021 0022 #include <KMessageBox> 0023 #include <KTextEditor/Document> 0024 0025 #include "kiledebug.h" 0026 #include "utilities.h" 0027 0028 QMap<QString, ConvertMap*> ConvertMap::g_maps; 0029 0030 bool ConvertMap::create(const QString & encoding) 0031 { 0032 KILE_DEBUG_MAIN << "\tlooking for map for " << encoding; 0033 ConvertMap * map = g_maps[encoding]; 0034 0035 if(!map) { 0036 KILE_DEBUG_MAIN << "\tcreating a map for " << encoding; 0037 map = new ConvertMap(encoding); // FIXME This will never be deleted if load() succeeds... 0038 if(map->load()) { 0039 g_maps[encoding] = map; 0040 } 0041 else { 0042 delete map; 0043 map = Q_NULLPTR; 0044 } 0045 map = g_maps[encoding]; 0046 } 0047 0048 return (map != Q_NULLPTR); 0049 } 0050 0051 QString ConvertMap::encodingNameFor(const QString & name) 0052 { 0053 QString std; 0054 for(int i = 0; i < name.length(); ++i) { 0055 if(!name[i].isSpace()) { 0056 std += name[i]; 0057 } 0058 } 0059 0060 std = std.toLower(); 0061 0062 if(std.startsWith(QLatin1String("iso8859-"))) { 0063 return "latin" + std.right(1); 0064 } 0065 0066 if(std.startsWith(QLatin1String("cp"))) { 0067 return "cp" + std.right(4); 0068 } 0069 0070 return name; 0071 } 0072 0073 QString ConvertMap::isoNameFor(const QString & name) 0074 { 0075 QString std; 0076 for(int i = 0; i < name.length(); ++i) { 0077 if(!name[i].isSpace()) { 0078 std += name[i]; 0079 } 0080 } 0081 0082 std = std.toLower(); 0083 0084 if(std.startsWith(QLatin1String("latin"))) { 0085 return "ISO 8859-" + std.right(1); 0086 } 0087 0088 if(std.startsWith(QLatin1String("cp"))) { 0089 return "cp " + std.right(4); 0090 } 0091 0092 return name; 0093 } 0094 0095 ConvertMap::ConvertMap(const QString& enc) 0096 { 0097 m_aliases.append(encodingNameFor(enc)); 0098 m_aliases.append(isoNameFor(enc)); 0099 } 0100 0101 void ConvertMap::addPair(QChar c, const QString& enc) 0102 { 0103 m_toASCII[c] = commandIsTerminated(enc) ? enc : enc + "{}" ; 0104 m_toEncoding[enc] = c; 0105 } 0106 0107 bool ConvertMap::commandIsTerminated(const QString & command) 0108 { 0109 static QRegExp reCommandSequences("\\\\([a-zA-Z]+|\\\"|\\')$"); 0110 0111 return (reCommandSequences.indexIn(command) == -1); 0112 } 0113 0114 bool ConvertMap::load() 0115 { 0116 static QRegExp reMap("^(.*):(.*)"); 0117 0118 //makeMap(encoding()); 0119 0120 //if map already exists, replace it 0121 QFile qf(KileUtilities::locate(QStandardPaths::AppDataLocation, "encodings/" + encoding() + ".enc")); 0122 0123 if(qf.open(QIODevice::ReadOnly)) { 0124 QTextStream stream(&qf); 0125 QTextCodec *codec = QTextCodec::codecForName(isoName().toLatin1()); 0126 if(codec) { 0127 stream.setCodec(codec); 0128 } 0129 0130 while(!stream.atEnd()) { 0131 //parse the line 0132 if(stream.readLine().indexOf(reMap) != -1) { 0133 addPair(reMap.cap(1)[0], reMap.cap(2)); 0134 } 0135 } 0136 qf.close(); 0137 0138 return true; 0139 } 0140 0141 return false; 0142 } 0143 0144 //BEGIN ConvertIO classes 0145 ConvertIO::ConvertIO(KTextEditor::Document *doc) : 0146 m_doc(doc), 0147 m_text(QString()), 0148 m_line(QString()), 0149 m_nLine(0) 0150 { 0151 } 0152 0153 QString & ConvertIO::currentLine() 0154 { 0155 return m_line; 0156 } 0157 0158 void ConvertIO::nextLine() 0159 { 0160 m_line = m_doc->line(m_nLine++); 0161 } 0162 0163 void ConvertIO::writeText() 0164 { 0165 m_doc->setText(m_text); 0166 } 0167 0168 int ConvertIO::current() 0169 { 0170 return m_nLine; 0171 } 0172 0173 bool ConvertIO::done() 0174 { 0175 return current() == m_doc->lines(); 0176 } 0177 0178 ConvertIOFile::ConvertIOFile(KTextEditor::Document *doc, const QUrl &url) : ConvertIO(doc), m_url(url) 0179 { 0180 } 0181 0182 void ConvertIOFile::writeText() 0183 { 0184 QFile qf(m_url.toLocalFile()); 0185 if(qf.open(QIODevice::WriteOnly)) { 0186 //read the file 0187 QTextStream stream(&qf); 0188 stream << m_text; 0189 qf.close(); 0190 } 0191 else { 0192 qWarning() << "Could not open " << m_url.toLocalFile(); 0193 } 0194 } 0195 0196 ConvertBase::ConvertBase(const QString & encoding, ConvertIO * io) : 0197 m_io(io), 0198 m_encoding(encoding), 0199 m_map(Q_NULLPTR) 0200 { 0201 } 0202 0203 //END ConvertIO classes 0204 0205 //BEGIN ConvertBase 0206 QString ConvertBase::mapNext(int &i) 0207 { 0208 return (QString)m_io->currentLine()[i++]; 0209 } 0210 0211 bool ConvertBase::convert() 0212 { 0213 if(!setMap()) { 0214 return false; 0215 } 0216 0217 m_io->text().clear(); 0218 do { 0219 m_io->nextLine(); 0220 int i = 0; 0221 while(i < m_io->currentLine().length()) { 0222 m_io->text() += mapNext(i); 0223 } 0224 if(!m_io->done()) { 0225 m_io->text() += '\n'; 0226 } 0227 } 0228 while(!m_io->done()); 0229 0230 m_io->writeText(); 0231 return true; 0232 } 0233 0234 bool ConvertBase::setMap() 0235 { 0236 //create map (or use existing) 0237 if(ConvertMap::create(m_encoding)) { 0238 m_map = ConvertMap::mapFor(m_encoding); 0239 } 0240 else { 0241 m_map = Q_NULLPTR; 0242 } 0243 0244 return (m_map != Q_NULLPTR); 0245 } 0246 //END ConvertBase 0247 0248 //BEGIN ConvertEncToASCII 0249 QString ConvertEncToASCII::mapNext(int &i) 0250 { 0251 return m_map->canDecode(m_io->currentLine()[i]) ? m_map->toASCII(m_io->currentLine()[i++]) : (QString)m_io->currentLine()[i++]; 0252 } 0253 //END ConvertEncToASCII 0254 0255 //BEGIN ConvertASCIIToEnc 0256 0257 //i is the position of the '\' 0258 QString ConvertASCIIToEnc::nextSequence(int &i) 0259 { 0260 //get first two characters 0261 QString seq = (QString)m_io->currentLine()[i++]; 0262 0263 if(m_io->currentLine()[i].isLetter()) { 0264 while(m_io->currentLine()[i].isLetter()) { 0265 seq += (QString)m_io->currentLine()[i++]; 0266 } 0267 } 0268 else { 0269 return seq + (QString)m_io->currentLine()[i++]; 0270 } 0271 0272 return seq; 0273 } 0274 0275 bool ConvertASCIIToEnc::isModifier(const QString& seq) 0276 { 0277 static QRegExp reModifier("\\\\([cHkruv]|\"|\'|\\^|`|~|=|\\.)"); 0278 return reModifier.exactMatch(seq); 0279 } 0280 0281 QString ConvertASCIIToEnc::getSequence(int &i) 0282 { 0283 QString seq = nextSequence(i); 0284 static QRegExp reBraces("\\{([a-zA-Z]?)\\}"); 0285 0286 if(isModifier(seq)) { 0287 KILE_DEBUG_MAIN << "\tisModifier true : " << seq; 0288 if(seq[seq.length() - 1].isLetter()) { 0289 seq += ' '; 0290 } 0291 0292 while(m_io->currentLine()[i].isSpace()) { 0293 ++i; 0294 } 0295 0296 if(m_io->currentLine().mid(i, 2) == "{}") { 0297 i = i + 2; 0298 } 0299 0300 if(m_io->currentLine()[i] == '\\') { 0301 seq += nextSequence(i); 0302 } 0303 else { 0304 if(reBraces.exactMatch(m_io->currentLine().mid(i, 3))) { 0305 KILE_DEBUG_MAIN << "\tbraces detected"; 0306 i = i + 3; 0307 seq += reBraces.cap(1); 0308 } 0309 else { 0310 QChar nextChar = m_io->currentLine()[i++]; 0311 if(!nextChar.isSpace()) { 0312 seq += (QString)nextChar; 0313 } 0314 } 0315 } 0316 } 0317 else if(m_map->canEncode(seq)) { 0318 if(m_io->currentLine().mid(i, 2) == "{}") { 0319 i = i + 2; 0320 } 0321 else if(m_io->currentLine()[i].isSpace()) { 0322 ++i; 0323 } 0324 } 0325 0326 return seq; 0327 } 0328 0329 QString ConvertASCIIToEnc::mapNext(int &i) 0330 { 0331 if(m_io->currentLine()[i] == '\\') { 0332 QString seq = getSequence(i); 0333 KILE_DEBUG_MAIN << "'\tsequence: " << seq; 0334 if(m_map->canEncode(seq)) { 0335 KILE_DEBUG_MAIN << "\tcan encode this"; 0336 //if ( m_io->currentLine().mid(i, 2) == "{}" ) i = i + 2; 0337 return m_map->toEncoding(seq); 0338 } 0339 else { 0340 return seq; 0341 } 0342 } 0343 0344 return ConvertBase::mapNext(i); 0345 } 0346 //END ConvertASCIIToEnc