File indexing completed on 2024-04-21 04:36:19

0001 /* This file is part of kdev-pg-qt
0002  * Copyright (C) 2011 Jonathan Schmidt-Dominé <devel@the-user.org>
0003  * 
0004  * This library is free software; you can redistribute it and/or
0005  * modify it under the terms of the GNU Library General Public
0006  * License as published by the Free Software Foundation; either
0007  * version 2 of the License, or (at your option) any later version.
0008  * 
0009  * This library is distributed in the hope that it will be useful,
0010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0012  * Library General Public License for more details.
0013  * 
0014  * You should have received a copy of the GNU Library General Public License
0015  * along with this library; see the file COPYING.LIB.  If not, write to
0016  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0017  * Boston, MA 02110-1301, USA.
0018  */
0019 
0020 #include "kdev-pg-unicode-loader.h"
0021 
0022 #include "kdev-pg.h"
0023 #include "kdev-pg-regexp.h"
0024 
0025 #include <QFile>
0026 
0027 namespace KDevPG
0028 {
0029 
0030 #define SET_CHAR(str, i, x) \
0031 { \
0032 x = 0; \
0033 for(; ; ++i) \
0034 { \
0035   x *= 16; \
0036   if(str[i] >= 'a' && str[i] <= 'f') \
0037     x += (str[i] - 'a' + 10); \
0038   else if(str[i] >= 'A' && str[i] <= 'F') \
0039     x += (str[i] - 'A' + 10); \
0040   else if(str[i] >= '0' && str[i] <= '9') \
0041     x += (str[i] - '0'); \
0042   else \
0043     break; \
0044 } \
0045 x /= 16; \
0046 }
0047 
0048 void standardFormat(const QString fileName)
0049 {
0050   QMap<QByteArray, GNFA> res;
0051   QFile file(fileName);
0052   if(file.open(QIODevice::ReadOnly))
0053   {
0054     while(!file.atEnd())
0055     {
0056       auto line = file.readLine();
0057       if(line.size() > 0 && line[0] != '#')
0058       {
0059         if(line[0] != '#')
0060         {
0061           int idxDotDot = line.indexOf("..");
0062           if(idxDotDot != -1)
0063           {
0064             quint32 start;
0065             int i = 0;
0066             SET_CHAR(line, i, start)
0067             assert(i <= idxDotDot);
0068             i += 2;
0069             quint32 end = 0;
0070             int idxSemicolon = line.indexOf(';', idxDotDot + 2);
0071             SET_CHAR(line, i, end)
0072             assert(i <= idxSemicolon);
0073             QByteArray name = line.mid(idxSemicolon+1, (uint)(line.indexOf('#', idxSemicolon + 1)) - idxSemicolon - 1).trimmed().toLower();
0074             name.replace(' ', '_');
0075             name.replace('-', '_');
0076             auto toInsert = GNFA::range(start, end+1);
0077             if(globalSystem.regexpById[name] == nullptr)
0078               globalSystem.regexpById[name] = new GNFA(toInsert);
0079             else
0080               *globalSystem.regexpById[name] |= toInsert;
0081           }
0082           else
0083           {
0084             quint32 single;
0085             int i = 0;
0086             SET_CHAR(line, i, single);
0087             int idxSemicolon = line.indexOf(';', i);
0088             QByteArray name = line.mid(idxSemicolon+1, (uint)(line.indexOf('#', idxSemicolon + 1)) - idxSemicolon - 1).trimmed().toLower();
0089             name.replace(' ', '_');
0090             name.replace('-', '_');
0091             auto toInsert = GNFA::character(single);
0092             if(globalSystem.regexpById[name] == nullptr)
0093               globalSystem.regexpById[name] = new GNFA(toInsert);
0094             else
0095               *globalSystem.regexpById[name] |= toInsert;
0096           }
0097         }
0098       }
0099     }
0100   }
0101   else
0102     qFatal("** ERROR Failed to open unicode-data-file ``%s''", fileName.toUtf8().data());
0103 }
0104 
0105 void loadUnicodeData()
0106 {
0107   static bool loaded = false;
0108   if(!loaded)
0109   {
0110     loaded = true;
0111     standardFormat(":/unidata/Blocks.txt");
0112     standardFormat(":/unidata/PropList.txt");
0113     standardFormat(":/unidata/DerivedCoreProperties.txt");
0114     standardFormat(":/unidata/Scripts.txt");
0115     standardFormat(":/unidata/ScriptExtensions.txt");
0116     standardFormat(":/unidata/DerivedNumericType.txt");
0117     globalSystem.regexpById["num"] = new GNFA(*globalSystem.regexpById["numeric"]);
0118     *globalSystem.regexpById["num"] |= *globalSystem.regexpById["digit"];
0119     *globalSystem.regexpById["num"] |= *globalSystem.regexpById["decimal"];
0120     globalSystem.regexpById["ascii-range"] = new GNFA(GNFA::range(0, 0x80));
0121     globalSystem.regexpById["latin1-range"] = new GNFA(GNFA::range(0, 0x100));
0122     // IndicMatraCategory and IndicSyllabicCategory: same format, but should have a prefix, names like “vowel” are confusing when used for Indian vowels only
0123     // named sequences: other format
0124   }
0125 }
0126 
0127 }