File indexing completed on 2024-04-28 15:54:30
0001 /* This file is part of kdev-pg-qt 0002 * Copyright (C) 2011 Jonathan Schmidt-Dominé <devel@the-user.org> 0003 * 0004 * This library is free software; you can redistribute it and/or 0005 * modify it under the terms of the GNU Library General Public 0006 * License as published by the Free Software Foundation; either 0007 * version 2 of the License, or (at your option) any later version. 0008 * 0009 * This library is distributed in the hope that it will be useful, 0010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0012 * Library General Public License for more details. 0013 * 0014 * You should have received a copy of the GNU Library General Public License 0015 * along with this library; see the file COPYING.LIB. If not, write to 0016 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 0017 * Boston, MA 02110-1301, USA. 0018 */ 0019 0020 #include "kdev-pg-unicode-loader.h" 0021 0022 #include "kdev-pg.h" 0023 #include "kdev-pg-regexp.h" 0024 0025 #include <QFile> 0026 0027 namespace KDevPG 0028 { 0029 0030 #define SET_CHAR(str, i, x) \ 0031 { \ 0032 x = 0; \ 0033 for(; ; ++i) \ 0034 { \ 0035 x *= 16; \ 0036 if(str[i] >= 'a' && str[i] <= 'f') \ 0037 x += (str[i] - 'a' + 10); \ 0038 else if(str[i] >= 'A' && str[i] <= 'F') \ 0039 x += (str[i] - 'A' + 10); \ 0040 else if(str[i] >= '0' && str[i] <= '9') \ 0041 x += (str[i] - '0'); \ 0042 else \ 0043 break; \ 0044 } \ 0045 x /= 16; \ 0046 } 0047 0048 void standardFormat(const QString fileName) 0049 { 0050 QMap<QByteArray, GNFA> res; 0051 QFile file(fileName); 0052 if(file.open(QIODevice::ReadOnly)) 0053 { 0054 while(!file.atEnd()) 0055 { 0056 auto line = file.readLine(); 0057 if(line.size() > 0 && line[0] != '#') 0058 { 0059 if(line[0] != '#') 0060 { 0061 int idxDotDot = line.indexOf(".."); 0062 if(idxDotDot != -1) 0063 { 0064 quint32 start; 0065 int i = 0; 0066 SET_CHAR(line, i, start) 0067 assert(i <= idxDotDot); 0068 i += 2; 0069 quint32 end = 0; 0070 int idxSemicolon = line.indexOf(';', idxDotDot + 2); 0071 SET_CHAR(line, i, end) 0072 assert(i <= idxSemicolon); 0073 QByteArray name = line.mid(idxSemicolon+1, (uint)(line.indexOf('#', idxSemicolon + 1)) - idxSemicolon - 1).trimmed().toLower(); 0074 name.replace(' ', '_'); 0075 name.replace('-', '_'); 0076 auto toInsert = GNFA::range(start, end+1); 0077 if(globalSystem.regexpById[name] == nullptr) 0078 globalSystem.regexpById[name] = new GNFA(toInsert); 0079 else 0080 *globalSystem.regexpById[name] |= toInsert; 0081 } 0082 else 0083 { 0084 quint32 single; 0085 int i = 0; 0086 SET_CHAR(line, i, single); 0087 int idxSemicolon = line.indexOf(';', i); 0088 QByteArray name = line.mid(idxSemicolon+1, (uint)(line.indexOf('#', idxSemicolon + 1)) - idxSemicolon - 1).trimmed().toLower(); 0089 name.replace(' ', '_'); 0090 name.replace('-', '_'); 0091 auto toInsert = GNFA::character(single); 0092 if(globalSystem.regexpById[name] == nullptr) 0093 globalSystem.regexpById[name] = new GNFA(toInsert); 0094 else 0095 *globalSystem.regexpById[name] |= toInsert; 0096 } 0097 } 0098 } 0099 } 0100 } 0101 else 0102 qFatal("** ERROR Failed to open unicode-data-file ``%s''", fileName.toUtf8().data()); 0103 } 0104 0105 void loadUnicodeData() 0106 { 0107 static bool loaded = false; 0108 if(!loaded) 0109 { 0110 loaded = true; 0111 standardFormat(":/unidata/Blocks.txt"); 0112 standardFormat(":/unidata/PropList.txt"); 0113 standardFormat(":/unidata/DerivedCoreProperties.txt"); 0114 standardFormat(":/unidata/Scripts.txt"); 0115 standardFormat(":/unidata/ScriptExtensions.txt"); 0116 standardFormat(":/unidata/DerivedNumericType.txt"); 0117 globalSystem.regexpById["num"] = new GNFA(*globalSystem.regexpById["numeric"]); 0118 *globalSystem.regexpById["num"] |= *globalSystem.regexpById["digit"]; 0119 *globalSystem.regexpById["num"] |= *globalSystem.regexpById["decimal"]; 0120 globalSystem.regexpById["ascii-range"] = new GNFA(GNFA::range(0, 0x80)); 0121 globalSystem.regexpById["latin1-range"] = new GNFA(GNFA::range(0, 0x100)); 0122 // IndicMatraCategory and IndicSyllabicCategory: same format, but should have a prefix, names like “vowel” are confusing when used for Indian vowels only 0123 // named sequences: other format 0124 } 0125 } 0126 0127 }