Warning, file /frameworks/khtml/src/rendering/break_lines.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 #include <break_lines.h> 0002 #include <QLibrary> 0003 #include <QTextCodec> 0004 #include <stdio.h> 0005 #include <stdlib.h> 0006 0007 /* If HAVE_LIBTHAI is defined, libkhtml will link against 0008 * libthai since compile time. Otherwise it will try to 0009 * dlopen at run-time 0010 * 0011 * Ott Pattara Nov 14, 2004 0012 */ 0013 0014 #ifndef HAVE_LIBTHAI 0015 typedef int (*th_brk_def)(const unsigned char *, int[], int); 0016 static th_brk_def th_brk; 0017 #else 0018 #include <thai/thailib.h> 0019 #include <thai/thbrk.h> 0020 #endif 0021 0022 namespace khtml 0023 { 0024 struct ThaiCache { 0025 ThaiCache() 0026 { 0027 string = nullptr; 0028 allocated = 0x400; 0029 wbrpos = (int *) malloc(allocated * sizeof(int)); 0030 numwbrpos = 0; 0031 numisbreakable = 0x400; 0032 isbreakable = (int *) malloc(numisbreakable * sizeof(int)); 0033 library = nullptr; 0034 } 0035 ~ThaiCache() 0036 { 0037 free(wbrpos); 0038 free(isbreakable); 0039 if (library) { 0040 library->unload(); 0041 } 0042 delete library; 0043 } 0044 const QChar *string; 0045 int *wbrpos; 0046 int *isbreakable; 0047 int allocated; 0048 int numwbrpos, numisbreakable; 0049 QLibrary *library; 0050 }; 0051 static ThaiCache *cache = nullptr; 0052 0053 void cleanup_thaibreaks() 0054 { 0055 delete cache; 0056 cache = nullptr; 0057 #ifndef HAVE_LIBTHAI 0058 th_brk = nullptr; 0059 #endif 0060 } 0061 0062 bool isBreakableThai(const QChar *string, const int pos, const int len) 0063 { 0064 static QTextCodec *thaiCodec = QTextCodec::codecForMib(2259); 0065 //printf("Entering isBreakableThai with pos = %d\n", pos); 0066 0067 #ifndef HAVE_LIBTHAI 0068 0069 QLibrary *lib = new QLibrary(QLatin1String("libthai")); 0070 0071 /* load libthai dynamically */ 0072 if ((!th_brk) && thaiCodec) { 0073 printf("Try to load libthai dynamically...\n"); 0074 if (lib->load()) { 0075 th_brk = (th_brk_def) lib->resolve("th_brk"); 0076 } 0077 if (!th_brk) { 0078 // indication that loading failed and we shouldn't try to load again 0079 printf("Error, can't load libthai...\n"); 0080 thaiCodec = nullptr; 0081 if (lib->isLoaded()) { 0082 lib->unload(); 0083 } 0084 } 0085 } 0086 0087 if (!th_brk) { 0088 return true; 0089 } 0090 #endif 0091 0092 if (!cache) { 0093 cache = new ThaiCache; 0094 #ifndef HAVE_LIBTHAI 0095 cache->library = lib; 0096 #endif 0097 } 0098 0099 // build up string of thai chars 0100 if (string != cache->string) { 0101 //fprintf(stderr,"new string found (not in cache), calling libthai\n"); 0102 QByteArray cstr = thaiCodec->fromUnicode(QString::fromRawData(string, len)); 0103 //printf("About to call libthai::th_brk with str: %s",cstr.data()); 0104 0105 cache->numwbrpos = th_brk((const unsigned char *) cstr.data(), cache->wbrpos, cache->allocated); 0106 //fprintf(stderr,"libthai returns with value %d\n",cache->numwbrpos); 0107 if (cache->numwbrpos > cache->allocated) { 0108 cache->allocated = cache->numwbrpos; 0109 cache->wbrpos = (int *)realloc(cache->wbrpos, cache->allocated * sizeof(int)); 0110 cache->numwbrpos = th_brk((const unsigned char *) cstr.data(), cache->wbrpos, cache->allocated); 0111 } 0112 if (len > cache->numisbreakable) { 0113 cache->numisbreakable = len; 0114 cache->isbreakable = (int *)realloc(cache->isbreakable, cache->numisbreakable * sizeof(int)); 0115 } 0116 for (int i = 0; i < len; ++i) { 0117 cache->isbreakable[i] = 0; 0118 } 0119 if (cache->numwbrpos > 0) { 0120 for (int i = cache->numwbrpos - 1; i >= 0; --i) { 0121 cache->isbreakable[cache->wbrpos[i]] = 1; 0122 } 0123 } 0124 cache->string = string; 0125 } 0126 //printf("Returning %d\n", cache->isbreakable[pos]); 0127 return cache->isbreakable[pos]; 0128 } 0129 0130 /* 0131 array of unicode codes where breaking shouldn't occur. 0132 (in sorted order because of using with binary search) 0133 these are currently for Japanese, though simply adding 0134 Korean, Chinese ones should work as well 0135 */ 0136 /* 0137 dontbreakbefore[] contains characters not covered by QChar::Punctuation_Close that shouldn't be broken before. 0138 chars included in QChar::Punctuation_Close are listed below.(look at UAX #14) 0139 - 3001 ideographic comma 0140 - 3002 ideographic full stop 0141 - FE50 small comma 0142 - FF52 small full stop 0143 - FF0C fullwidth comma 0144 - FF0E fullwidth full stop 0145 - FF61 halfwidth ideographic full stop 0146 - FF64 halfwidth ideographic comma 0147 these character is commented out. 0148 */ 0149 static const ushort dontbreakbefore[] = { 0150 //0x3001, //ideographic comma 0151 //0x3002, //ideographic full stop 0152 0x3005, //ideographic iteration mark 0153 0x3009, //right angle bracket 0154 0x300b, //right double angle bracket 0155 0x300d, //right corner bracket 0156 0x300f, //right white corner bracket 0157 0x3011, //right black lenticular bracket 0158 0x3015, //right tortoise shell bracket 0159 0x3041, //small a hiragana 0160 0x3043, //small i hiragana 0161 0x3045, //small u hiragana 0162 0x3047, //small e hiragana 0163 0x3049, //small o hiragana 0164 0x3063, //small tsu hiragana 0165 0x3083, //small ya hiragana 0166 0x3085, //small yu hiragana 0167 0x3087, //small yo hiragana 0168 0x308E, //small wa hiragana 0169 0x309B, //jap voiced sound mark 0170 0x309C, //jap semi-voiced sound mark 0171 0x309D, //jap iteration mark hiragana 0172 0x309E, //jap voiced iteration mark hiragana 0173 0x30A1, //small a katakana 0174 0x30A3, //small i katakana 0175 0x30A5, //small u katakana 0176 0x30A7, //small e katakana 0177 0x30A9, //small o katakana 0178 0x30C3, //small tsu katakana 0179 0x30E3, //small ya katakana 0180 0x30E5, //small yu katakana 0181 0x30E7, //small yo katakana 0182 0x30EE, //small wa katakana 0183 0x30F5, //small ka katakana 0184 0x30F6, //small ke katakana 0185 0x30FC, //jap prolonged sound mark 0186 0x30FD, //jap iteration mark katakana 0187 0x30FE, //jap voiced iteration mark katakana 0188 //0xFE50, //small comma 0189 //0xFF52, //small full stop 0190 0xFF01, //fullwidth exclamation mark 0191 0xFF09, //fullwidth right parenthesis 0192 //0xFF0C, //fullwidth comma 0193 0xFF0D, //fullwidth hyphen-minus 0194 //0xFF0E, //fullwidth full stop 0195 0xFF1F, //fullwidth question mark 0196 0xFF3D, //fullwidth right square bracket 0197 0xFF5D, //fullwidth right curly bracket 0198 //0xFF61, //halfwidth ideographic full stop 0199 0xFF63, //halfwidth right corner bracket 0200 //0xFF64, //halfwidth ideographic comma 0201 0xFF67, //halfwidth katakana letter small a 0202 0xFF68, //halfwidth katakana letter small i 0203 0xFF69, //halfwidth katakana letter small u 0204 0xFF6a, //halfwidth katakana letter small e 0205 0xFF6b, //halfwidth katakana letter small o 0206 0xFF6c, //halfwidth katakana letter small ya 0207 0xFF6d, //halfwidth katakana letter small yu 0208 0xFF6e, //halfwidth katakana letter small yo 0209 0xFF6f, //halfwidth katakana letter small tu 0210 0xFF70 //halfwidth katakana-hiragana prolonged sound mark 0211 }; 0212 0213 // characters that aren't covered by QChar::Punctuation_Open 0214 static const ushort dontbreakafter[] = { 0215 0x3012, //postal mark 0216 0xFF03, //full width pound mark 0217 0xFF04, //full width dollar sign 0218 0xFF20, //full width @ 0219 0xFFE1, //full width british pound sign 0220 0xFFE5 //full width yen sign 0221 }; 0222 0223 static bool break_bsearch(const ushort *arr, const unsigned int count, const ushort val) 0224 { 0225 unsigned int left = 0; 0226 unsigned int right = count - 1; 0227 0228 while (left != right) { 0229 unsigned int i = (left + right) / 2; 0230 if (val == arr[i]) { 0231 return false; 0232 } 0233 if (val < arr[i]) { 0234 right = i; 0235 } else { 0236 left = i + 1; 0237 } 0238 } 0239 0240 return val != arr[left]; 0241 } 0242 0243 bool isBreakable(const QChar *str, const int pos, int len) 0244 { 0245 const QChar *c = str + pos; 0246 unsigned short ch = c->unicode(); 0247 if (ch > 0xff) { 0248 // not latin1, need to do more sophisticated checks for asian fonts 0249 unsigned char row = c->row(); 0250 if (row == 0x0e) { 0251 // 0e00 - 0e7f == Thai 0252 if (c->cell() < 0x80) { 0253 // consult libthai 0254 return isBreakableThai(str, pos, len); 0255 } else { 0256 return false; 0257 } 0258 } 0259 if ((row > 0x2d && row < 0xfb) || row == 0x11) { 0260 /* asian line breaking. */ 0261 if (pos == 0) { 0262 return false; // never break before first character 0263 } 0264 0265 // check for simple punctuation cases 0266 QChar::Category cat = c->category(); 0267 if (cat == QChar::Punctuation_Close || 0268 cat == QChar::Punctuation_Other || 0269 (str + (pos - 1))->category() == QChar::Punctuation_Open) { 0270 return false; 0271 } 0272 0273 // do binary search in dontbreak[] 0274 return break_bsearch(dontbreakbefore, (sizeof(dontbreakbefore) / sizeof(*dontbreakbefore)), c->unicode()) && 0275 break_bsearch(dontbreakafter, (sizeof(dontbreakafter) / sizeof(*dontbreakafter)), (str + (pos - 1))->unicode()); 0276 } else { // no asian font 0277 return c->isSpace(); 0278 } 0279 } else { 0280 if (ch == ' ' || ch == '\n') { 0281 return true; 0282 } 0283 } 0284 return false; 0285 } 0286 0287 }