File indexing completed on 2024-05-12 16:46:38

0001 /***************************************************************************
0002     Copyright (C) 2006-2009 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 // This class is adapted from Iso6937ToUnicode from the MARC4J project, available
0026 // from https://github.com/marc4j/marc4j, with the following notice:
0027 // * Copyright (C) 2002 Bas  Peters  (mail@bpeters.com)
0028 // * Copyright (C) 2002 Yves Pratter (ypratter@club-internet.fr)
0029 //
0030 // That source was released under the terms of the GNU Lesser General Public
0031 // License, version 2.1. In accordance with Condition 3 of that license,
0032 // I am applying the terms of the GNU General Public License to the source
0033 // code, and including a large portion of it here
0034 
0035 #include "iso6937converter.h"
0036 #include "../tellico_debug.h"
0037 
0038 #include <QString>
0039 #include <QByteArray>
0040 
0041 using Tellico::Iso6937Converter;
0042 
0043 QString Iso6937Converter::toUtf8(const QByteArray& text_) {
0044   const uint len = text_.length();
0045   QString result;
0046   result.reserve(len);
0047   for(uint i = 0; i < len; ++i) {
0048     uchar c = uchar(text_.at(i));
0049     if(isAscii(c)) {
0050       result.append(QLatin1Char(c));
0051     } else if(isCombining(c) && hasNext(i, len)) {
0052       const uchar next = uchar(text_.at(i+1));
0053       QChar d = getCombiningChar((c << 8) + next);
0054       if(!d.isNull()) {
0055         result.append(d);
0056         ++i;
0057       } else {
0058         result.append(getChar(c));
0059       }
0060     } else {
0061       result.append(getChar(c));
0062     }
0063   }
0064   result.squeeze();
0065   return result;
0066 }
0067 
0068 inline
0069 bool Iso6937Converter::hasNext(uint pos, uint len) {
0070   return pos < (len - 1);
0071 }
0072 
0073 inline
0074 bool Iso6937Converter::isAscii(uchar c) {
0075   return c <= 0x7F;
0076 }
0077 
0078 inline
0079 bool Iso6937Converter::isCombining(uchar c) {
0080   return c >= 0xC0 && c <= 0xDF;
0081 }
0082 
0083 // Source : http://www.open-std.org/jtc1/sc2/wg3/docs/6937cd.pdf
0084 QChar Iso6937Converter::getChar(uchar c) {
0085   switch(c) {
0086   case 0xA0:
0087     return 0x00A0; // 10/00 NO-BREAK SPACE
0088   case 0xA1:
0089     return 0x00A1; // 10/01 INVERTED EXCLAMATION MARK
0090   case 0xA2:
0091     return 0x00A2; // 10/02 CENT SIGN
0092   case 0xA3:
0093     return 0x00A3; // 10/03 POUND SIGN
0094     // 10/04 (This position shall not be used)
0095   case 0xA5:
0096     return 0x00A5; // 10/05 YEN SIGN
0097     // 10/06 (This position shall not be used)
0098   case 0xA7:
0099     return 0x00A7; // 10/07 SECTION SIGN
0100   case 0xA8:
0101     return 0x00A4; // 10/08 CURRENCY SIGN
0102   case 0xA9:
0103     return 0x2018; // 10/09 LEFT SINGLE QUOTATION MARK
0104   case 0xAA:
0105     return 0x201C; // 10/10 LEFT DOUBLE QUOTATION MARK
0106   case 0xAB:
0107     return 0x00AB; // 10/11 LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0108   case 0xAC:
0109     return 0x2190; // 10/12 LEFTWARDS ARROW
0110   case 0xAD:
0111     return 0x2191; // 10/13 UPWARDS ARROW
0112   case 0xAE:
0113     return 0x2192; // 10/14 RIGHTWARDS ARROW
0114   case 0xAF:
0115     return 0x2193; // 10/15 DOWNWARDS ARROW
0116 
0117   case 0xB0:
0118     return 0x00B0; // 11/00 DEGREE SIGN
0119   case 0xB1:
0120     return 0x00B1; // 11/01 PLUS-MINUS SIGN
0121   case 0xB2:
0122     return 0x00B2; // 11/02 SUPERSCRIPT TWO
0123   case 0xB3:
0124     return 0x00B3; // 11/03 SUPERSCRIPT THREE
0125   case 0xB4:
0126     return 0x00D7; // 11/04 MULTIPLICATION SIGN
0127   case 0xB5:
0128     return 0x00B5; // 11/05 MICRO SIGN
0129   case 0xB6:
0130     return 0x00B6; // 11/06 PILCROW SIGN
0131   case 0xB7:
0132     return 0x00B7; // 11/07 MIDDLE DOT
0133   case 0xB8:
0134     return 0x00F7; // 11/08 DIVISION SIGN
0135   case 0xB9:
0136     return 0x2019; // 11/09 RIGHT SINGLE QUOTATION MARK
0137   case 0xBA:
0138     return 0x201D; // 11/10 RIGHT DOUBLE QUOTATION MARK
0139   case 0xBB:
0140     return 0x00BB; // 11/11 RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0141   case 0xBC:
0142     return 0x00BC; // 11/12 VULGAR FRACTION ONE QUARTER
0143   case 0xBD:
0144     return 0x00BD; // 11/13 VULGAR FRACTION ONE HALF
0145   case 0xBE:
0146     return 0x00BE; // 11/14 VULGAR FRACTION THREE QUARTERS
0147   case 0xBF:
0148     return 0x00BF; // 11/15 INVERTED QUESTION MARK
0149 
0150     // 4/0 to 5/15 diacritic characters
0151 
0152   case 0xD0:
0153     return 0x2015; // 13/00 HORIZONTAL BAR
0154   case 0xD1:
0155     return 0x00B9; // 13/01 SUPERSCRIPT ONE
0156   case 0xD2:
0157     return 0x00AE; // 13/02 REGISTERED SIGN
0158   case 0xD3:
0159     return 0x00A9; // 13/03 COPYRIGHT SIGN
0160   case 0xD4:
0161     return 0x00AE; // 13/04 TRADE MARK SIGN
0162   case 0xD5:
0163     return 0x266A; // 13/05 EIGHTH NOTE
0164   case 0xD6:
0165     return 0x00AC; // 13/06 NOT SIGN
0166   case 0xD7:
0167     return 0x00A6; // 13/07 BROKEN BAR
0168     // 13/08 (This position shall not be used)
0169     // 13/09 (This position shall not be used)
0170     // 13/10 (This position shall not be used)
0171     // 13/11 (This position shall not be used)
0172   case 0xDC:
0173     return 0x215B; // 13/12 VULGAR FRACTION ONE EIGHTH
0174   case 0xDF:
0175     return 0x215E; // 13/15 VULGAR FRACTION SEVEN EIGHTHS
0176 
0177   case 0xE0:
0178     return 0x2126; // 14/00 OHM SIGN
0179   case 0xE1:
0180     return 0x00C6; // 14/01 LATIN CAPITAL LETTER AE
0181   case 0xE2:
0182     return 0x0110; // 14/02 LATIN CAPITAL LETTER D WITH STROKE
0183   case 0xE3:
0184     return 0x00AA; // 14/03 FEMININE ORDINAL INDICATOR
0185   case 0xE4:
0186     return 0x0126; // 14/04 LATIN CAPITAL LETTER H WITH STROKE
0187     // 14/05 (This position shall not be used)
0188   case 0xE6:
0189     return 0x0132; // 14/06 LATIN CAPITAL LIGATURE IJ
0190   case 0xE7:
0191     return 0x013F; // 14/07 LATIN CAPITAL LETTER L WITH MIDDLE DOT
0192   case 0xE8:
0193     return 0x0141; // 14/08 LATIN CAPITAL LETTER L WITH STROKE
0194   case 0xE9:
0195     return 0x00D8; // 14/09 LATIN CAPITAL LETTER O WITH STROKE
0196   case 0xEA:
0197     return 0x0152; // 14/10 LATIN CAPITAL LIGATURE OE
0198   case 0xEB:
0199     return 0x00BA; // 14/11 MASCULINE ORDINAL INDICATOR
0200   case 0xEC:
0201     return 0x00DE; // 14/12 LATIN CAPITAL LETTER THORN
0202   case 0xED:
0203     return 0x0166; // 14/13 LATIN CAPITAL LETTER T WITH STROKE
0204   case 0xEE:
0205     return 0x014A; // 14/14 LATIN CAPITAL LETTER ENG
0206   case 0xEF:
0207     return 0x0149; // 14/15 LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
0208 
0209   case 0xF0:
0210     return 0x0138; // 15/00 LATIN SMALL LETTER KRA
0211   case 0xF1:
0212     return 0x00E6; // 15/01 LATIN SMALL LETTER AE
0213   case 0xF2:
0214     return 0x0111; // 15/02 LATIN SMALL LETTER D WITH STROKE
0215   case 0xF3:
0216     return 0x00F0; // 15/03 LATIN SMALL LETTER ETH
0217   case 0xF4:
0218     return 0x0127; // 15/04 LATIN SMALL LETTER H WITH STROKE
0219   case 0xF5:
0220     return 0x0131; // 15/05 LATIN SMALL LETTER DOTLESS I
0221   case 0xF6:
0222     return 0x0133; // 15/06 LATIN SMALL LIGATURE IJ
0223   case 0xF7:
0224     return 0x0140; // 15/07 LATIN SMALL LETTER L WITH MIDDLE DOT
0225   case 0xF8:
0226     return 0x0142; // 15/08 LATIN SMALL LETTER L WITH STROKE
0227   case 0xF9:
0228     return 0x00F8; // 15/09 LATIN SMALL LETTER O WITH STROKE
0229   case 0xFA:
0230     return 0x0153; // 15/10 LATIN SMALL LIGATURE OE
0231   case 0xFB:
0232     return 0x00DF; // 15/11 LATIN SMALL LETTER SHARP S
0233   case 0xFC:
0234     return 0x00FE; // 15/12 LATIN SMALL LETTER THORN
0235   case 0xFD:
0236     return 0x0167; // 15/13 LATIN SMALL LETTER T WITH STROKE
0237   case 0xFE:
0238     return 0x014B; // 15/14 LATIN SMALL LETTER ENG
0239   case 0xFF:
0240     return 0x00AD; // 15/15 SOFT HYPHEN$
0241   default:
0242     return QLatin1Char(c);
0243   }
0244 }
0245 
0246 QChar Iso6937Converter::getCombiningChar(uint c) {
0247   switch(c) {
0248     // 12/00 (This position shall not be used)
0249 
0250     // 12/01 non-spacing grave accent
0251   case 0xC141:
0252     return 0x00C0; // LATIN CAPITAL LETTER A WITH GRAVE
0253   case 0xC145:
0254     return 0x00C8; // LATIN CAPITAL LETTER E WITH GRAVE
0255   case 0xC149:
0256     return 0x00CC; // LATIN CAPITAL LETTER I WITH GRAVE
0257   case 0xC14F:
0258     return 0x00D2; // LATIN CAPITAL LETTER O WITH GRAVE
0259   case 0xC155:
0260     return 0x00D9; // LATIN CAPITAL LETTER U WITH GRAVE
0261   case 0xC161:
0262     return 0x00E0; // LATIN SMALL LETTER A WITH GRAVE
0263   case 0xC165:
0264     return 0x00E8; // LATIN SMALL LETTER E WITH GRAVE
0265   case 0xC169:
0266     return 0x00EC; // LATIN SMALL LETTER I WITH GRAVE
0267   case 0xC16F:
0268     return 0x00F2; // LATIN SMALL LETTER O WITH GRAVE
0269   case 0xC175:
0270     return 0x00F9; // LATIN SMALL LETTER U WITH GRAVE
0271 
0272     // 12/02 non-spacing acute accent
0273   case 0xC220:
0274     return 0x00B4; // ACUTE ACCENT
0275   case 0xC241:
0276     return 0x00C1; // LATIN CAPITAL LETTER A WITH ACUTE
0277   case 0xC243:
0278     return 0x0106; // LATIN CAPITAL LETTER C WITH ACUTE
0279   case 0xC245:
0280     return 0x00C9; // LATIN CAPITAL LETTER E WITH ACUTE
0281   case 0xC249:
0282     return 0x00CD; // LATIN CAPITAL LETTER I WITH ACUTE
0283   case 0xC24C:
0284     return 0x0139; // LATIN CAPITAL LETTER L WITH ACUTE
0285   case 0xC24E:
0286     return 0x0143; // LATIN CAPITAL LETTER N WITH ACUTE
0287   case 0xC24F:
0288     return 0x00D3; // LATIN CAPITAL LETTER O WITH ACUTE
0289   case 0xC252:
0290     return 0x0154; // LATIN CAPITAL LETTER R WITH ACUTE
0291   case 0xC253:
0292     return 0x015A; // LATIN CAPITAL LETTER S WITH ACUTE
0293   case 0xC255:
0294     return 0x00DA; // LATIN CAPITAL LETTER U WITH ACUTE
0295   case 0xC259:
0296     return 0x00DD; // LATIN CAPITAL LETTER Y WITH ACUTE
0297   case 0xC25A:
0298     return 0x0179; // LATIN CAPITAL LETTER Z WITH ACUTE
0299   case 0xC261:
0300     return 0x00E1; // LATIN SMALL LETTER A WITH ACUTE
0301   case 0xC263:
0302     return 0x0107; // LATIN SMALL LETTER C WITH ACUTE
0303   case 0xC265:
0304     return 0x00E9; // LATIN SMALL LETTER E WITH ACUTE
0305   case 0xC267:
0306     return 0x01F5; // LATIN SMALL LETTER G WITH CEDILLA(4)
0307   case 0xC269:
0308     return 0x00ED; // LATIN SMALL LETTER I WITH ACUTE
0309   case 0xC26C:
0310     return 0x013A; // LATIN SMALL LETTER L WITH ACUTE
0311   case 0xC26E:
0312     return 0x0144; // LATIN SMALL LETTER N WITH ACUTE
0313   case 0xC26F:
0314     return 0x00F3; // LATIN SMALL LETTER O WITH ACUTE
0315   case 0xC272:
0316     return 0x0155; // LATIN SMALL LETTER R WITH ACUTE
0317   case 0xC273:
0318     return 0x015B; // LATIN SMALL LETTER S WITH ACUTE
0319   case 0xC275:
0320     return 0x00FA; // LATIN SMALL LETTER U WITH ACUTE
0321   case 0xC279:
0322     return 0x00FD; // LATIN SMALL LETTER Y WITH ACUTE
0323   case 0xC27A:
0324     return 0x017A; // LATIN SMALL LETTER Z WITH ACUTE
0325 
0326     // 12/03 non-spacing circumflex accent
0327   case 0xC341:
0328     return 0x00C2; // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0329   case 0xC343:
0330     return 0x0108; // LATIN CAPITAL LETTER C WITH CIRCUMFLEX
0331   case 0xC345:
0332     return 0x00CA; // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0333   case 0xC347:
0334     return 0x011C; // LATIN CAPITAL LETTER G WITH CIRCUMFLEX
0335   case 0xC348:
0336     return 0x0124; // LATIN CAPITAL LETTER H WITH CIRCUMFLEX
0337   case 0xC349:
0338     return 0x00CE; // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0339   case 0xC34A:
0340     return 0x0134; // LATIN CAPITAL LETTER J WITH CIRCUMFLEX
0341   case 0xC34F:
0342     return 0x00D4; // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0343   case 0xC353:
0344     return 0x015C; // LATIN CAPITAL LETTER S WITH CIRCUMFLEX
0345   case 0xC355:
0346     return 0x00DB; // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0347   case 0xC357:
0348     return 0x0174; // LATIN CAPITAL LETTER W WITH CIRCUMFLEX
0349   case 0xC359:
0350     return 0x0176; // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
0351   case 0xC361:
0352     return 0x00E2; // LATIN SMALL LETTER A WITH CIRCUMFLEX
0353   case 0xC363:
0354     return 0x0109; // LATIN SMALL LETTER C WITH CIRCUMFLEX
0355   case 0xC365:
0356     return 0x00EA; // LATIN SMALL LETTER E WITH CIRCUMFLEX
0357   case 0xC367:
0358     return 0x011D; // LATIN SMALL LETTER G WITH CIRCUMFLEX
0359   case 0xC368:
0360     return 0x0125; // LATIN SMALL LETTER H WITH CIRCUMFLEX
0361   case 0xC369:
0362     return 0x00EE; // LATIN SMALL LETTER I WITH CIRCUMFLEX
0363   case 0xC36A:
0364     return 0x0135; // LATIN SMALL LETTER J WITH CIRCUMFLEX
0365   case 0xC36F:
0366     return 0x00F4; // LATIN SMALL LETTER O WITH CIRCUMFLEX
0367   case 0xC373:
0368     return 0x015D; // LATIN SMALL LETTER S WITH CIRCUMFLEX
0369   case 0xC375:
0370     return 0x00FB; // LATIN SMALL LETTER U WITH CIRCUMFLEX
0371   case 0xC377:
0372     return 0x0175; // LATIN SMALL LETTER W WITH CIRCUMFLEX
0373   case 0xC379:
0374     return 0x0177; // LATIN SMALL LETTER Y WITH CIRCUMFLEX
0375 
0376     // 12/04 non-spacing tilde
0377   case 0xC441:
0378     return 0x00C3; // LATIN CAPITAL LETTER A WITH TILDE
0379   case 0xC449:
0380     return 0x0128; // LATIN CAPITAL LETTER I WITH TILDE
0381   case 0xC44E:
0382     return 0x00D1; // LATIN CAPITAL LETTER N WITH TILDE
0383   case 0xC44F:
0384     return 0x00D5; // LATIN CAPITAL LETTER O WITH TILDE
0385   case 0xC455:
0386     return 0x0168; // LATIN CAPITAL LETTER U WITH TILDE
0387   case 0xC461:
0388     return 0x00E3; // LATIN SMALL LETTER A WITH TILDE
0389   case 0xC469:
0390     return 0x0129; // LATIN SMALL LETTER I WITH TILDE
0391   case 0xC46E:
0392     return 0x00F1; // LATIN SMALL LETTER N WITH TILDE
0393   case 0xC46F:
0394     return 0x00F5; // LATIN SMALL LETTER O WITH TILDE
0395   case 0xC475:
0396     return 0x0169; // LATIN SMALL LETTER U WITH TILDE
0397 
0398     // 12/05 non-spacing macron
0399   case 0xC541:
0400     return 0x0100; // LATIN CAPITAL LETTER A WITH MACRON
0401   case 0xC545:
0402     return 0x0112; // LATIN CAPITAL LETTER E WITH MACRON
0403   case 0xC549:
0404     return 0x012A; // LATIN CAPITAL LETTER I WITH MACRON
0405   case 0xC54F:
0406     return 0x014C; // LATIN CAPITAL LETTER O WITH MACRON
0407   case 0xC555:
0408     return 0x016A; // LATIN CAPITAL LETTER U WITH MACRON
0409   case 0xC561:
0410     return 0x0101; // LATIN SMALL LETTER A WITH MACRON
0411   case 0xC565:
0412     return 0x0113; // LATIN SMALL LETTER E WITH MACRON
0413   case 0xC569:
0414     return 0x012B; // LATIN SMALL LETTER I WITH MACRON
0415   case 0xC56F:
0416     return 0x014D; // LATIN SMALL LETTER O WITH MACRON
0417   case 0xC575:
0418     return 0x016B; // LATIN SMALL LETTER U WITH MACRON
0419 
0420     // 12/06 non-spacing breve
0421   case 0xC620:
0422     return 0x02D8; // BREVE
0423   case 0xC641:
0424     return 0x0102; // LATIN CAPITAL LETTER A WITH BREVE
0425   case 0xC647:
0426     return 0x011E; // LATIN CAPITAL LETTER G WITH BREVE
0427   case 0xC655:
0428     return 0x016C; // LATIN CAPITAL LETTER U WITH BREVE
0429   case 0xC661:
0430     return 0x0103; // LATIN SMALL LETTER A WITH BREVE
0431   case 0xC667:
0432     return 0x011F; // LATIN SMALL LETTER G WITH BREVE
0433   case 0xC675:
0434     return 0x016D; // LATIN SMALL LETTER U WITH BREVE
0435 
0436     // 12/07 non-spacing dot above
0437   case 0xC743:
0438     return 0x010A; // LATIN CAPITAL LETTER C WITH DOT ABOVE
0439   case 0xC745:
0440     return 0x0116; // LATIN CAPITAL LETTER E WITH DOT ABOVE
0441   case 0xC747:
0442     return 0x0120; // LATIN CAPITAL LETTER G WITH DOT ABOVE
0443   case 0xC749:
0444     return 0x0130; // LATIN CAPITAL LETTER I WITH DOT ABOVE
0445   case 0xC75A:
0446     return 0x017B; // LATIN CAPITAL LETTER Z WITH DOT ABOVE
0447   case 0xC763:
0448     return 0x010B; // LATIN SMALL LETTER C WITH DOT ABOVE
0449   case 0xC765:
0450     return 0x0117; // LATIN SMALL LETTER E WITH DOT ABOVE
0451   case 0xC767:
0452     return 0x0121; // LATIN SMALL LETTER G WITH DOT ABOVE
0453   case 0xC77A:
0454     return 0x017C; // LATIN SMALL LETTER Z WITH DOT ABOVE
0455 
0456     // 12/08 non-spacing diaeresis
0457   case 0xC820:
0458     return 0x00A8; // DIAERESIS
0459   case 0xC841:
0460     return 0x00C4; // LATIN CAPITAL LETTER A WITH DIAERESIS
0461   case 0xC845:
0462     return 0x00CB; // LATIN CAPITAL LETTER E WITH DIAERESIS
0463   case 0xC849:
0464     return 0x00CF; // LATIN CAPITAL LETTER I WITH DIAERESIS
0465   case 0xC84F:
0466     return 0x00D6; // LATIN CAPITAL LETTER O WITH DIAERESIS
0467   case 0xC855:
0468     return 0x00DC; // LATIN CAPITAL LETTER U WITH DIAERESIS
0469   case 0xC859:
0470     return 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS
0471   case 0xC861:
0472     return 0x00E4; // LATIN SMALL LETTER A WITH DIAERESIS
0473   case 0xC865:
0474     return 0x00EB; // LATIN SMALL LETTER E WITH DIAERESIS
0475   case 0xC869:
0476     return 0x00EF; // LATIN SMALL LETTER I WITH DIAERESIS
0477   case 0xC86F:
0478     return 0x00F6; // LATIN SMALL LETTER O WITH DIAERESIS
0479   case 0xC875:
0480     return 0x00FC; // LATIN SMALL LETTER U WITH DIAERESIS
0481   case 0xC879:
0482     return 0x00FF; // LATIN SMALL LETTER Y WITH DIAERESIS
0483 
0484     // 12/09 (This position shall not be used)
0485 
0486     // 12/10 non-spacing ring above
0487   case 0xCA20:
0488     return 0x02DA; // RING ABOVE
0489   case 0xCA41:
0490     return 0x00C5; // LATIN CAPITAL LETTER A WITH RING ABOVE
0491   case 0xCA55:
0492     return 0x016E; // LATIN CAPITAL LETTER U WITH RING ABOVE
0493   case 0xCA61:
0494     return 0x00E5; // LATIN SMALL LETTER A WITH RING ABOVE
0495   case 0xCA75:
0496     return 0x016F; // LATIN SMALL LETTER U WITH RING ABOVE
0497 
0498     // 12/11 non-spacing cedilla
0499   case 0xCB20:
0500     return 0x00B8; // CEDILLA
0501   case 0xCB43:
0502     return 0x00C7; // LATIN CAPITAL LETTER C WITH CEDILLA
0503   case 0xCB47:
0504     return 0x0122; // LATIN CAPITAL LETTER G WITH CEDILLA
0505   case 0xCB4B:
0506     return 0x0136; // LATIN CAPITAL LETTER K WITH CEDILLA
0507   case 0xCB4C:
0508     return 0x013B; // LATIN CAPITAL LETTER L WITH CEDILLA
0509   case 0xCB4E:
0510     return 0x0145; // LATIN CAPITAL LETTER N WITH CEDILLA
0511   case 0xCB52:
0512     return 0x0156; // LATIN CAPITAL LETTER R WITH CEDILLA
0513   case 0xCB53:
0514     return 0x015E; // LATIN CAPITAL LETTER S WITH CEDILLA
0515   case 0xCB54:
0516     return 0x0162; // LATIN CAPITAL LETTER T WITH CEDILLA
0517   case 0xCB63:
0518     return 0x00E7; // LATIN SMALL LETTER C WITH CEDILLA
0519   case 0xCB67:
0520     return 0x0123; // small g with cedilla
0521   case 0xCB6B:
0522     return 0x0137; // LATIN SMALL LETTER K WITH CEDILLA
0523   case 0xCB6C:
0524     return 0x013C; // LATIN SMALL LETTER L WITH CEDILLA
0525   case 0xCB6E:
0526     return 0x0146; // LATIN SMALL LETTER N WITH CEDILLA
0527   case 0xCB72:
0528     return 0x0157; // LATIN SMALL LETTER R WITH CEDILLA
0529   case 0xCB73:
0530     return 0x015F; // LATIN SMALL LETTER S WITH CEDILLA
0531   case 0xCB74:
0532     return 0x0163; // LATIN SMALL LETTER T WITH CEDILLA
0533 
0534     // 12/12 (This position shall not be used)
0535 
0536     // 12/13 non-spacing double acute accent
0537   case 0xCD4F:
0538     return 0x0150; // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
0539   case 0xCD55:
0540     return 0x0170; // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
0541   case 0xCD6F:
0542     return 0x0151; // LATIN SMALL LETTER O WITH DOUBLE ACUTE
0543   case 0xCD75:
0544     return 0x0171; // LATIN SMALL LETTER U WITH DOUBLE ACUTE
0545 
0546     // 12/14 non-spacing ogonek
0547   case 0xCE20:
0548     return 0x02DB; // ogonek
0549   case 0xCE41:
0550     return 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK
0551   case 0xCE45:
0552     return 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK
0553   case 0xCE49:
0554     return 0x012E; // LATIN CAPITAL LETTER I WITH OGONEK
0555   case 0xCE55:
0556     return 0x0172; // LATIN CAPITAL LETTER U WITH OGONEK
0557   case 0xCE61:
0558     return 0x0105; // LATIN SMALL LETTER A WITH OGONEK
0559   case 0xCE65:
0560     return 0x0119; // LATIN SMALL LETTER E WITH OGONEK
0561   case 0xCE69:
0562     return 0x012F; // LATIN SMALL LETTER I WITH OGONEK
0563   case 0xCE75:
0564     return 0x0173; // LATIN SMALL LETTER U WITH OGONEK
0565 
0566     // 12/15 non-spacing caron
0567   case 0xCF20:
0568     return 0x02C7; // CARON
0569   case 0xCF43:
0570     return 0x010C; // LATIN CAPITAL LETTER C WITH CARON
0571   case 0xCF44:
0572     return 0x010E; // LATIN CAPITAL LETTER D WITH CARON
0573   case 0xCF45:
0574     return 0x011A; // LATIN CAPITAL LETTER E WITH CARON
0575   case 0xCF4C:
0576     return 0x013D; // LATIN CAPITAL LETTER L WITH CARON
0577   case 0xCF4E:
0578     return 0x0147; // LATIN CAPITAL LETTER N WITH CARON
0579   case 0xCF52:
0580     return 0x0158; // LATIN CAPITAL LETTER R WITH CARON
0581   case 0xCF53:
0582     return 0x0160; // LATIN CAPITAL LETTER S WITH CARON
0583   case 0xCF54:
0584     return 0x0164; // LATIN CAPITAL LETTER T WITH CARON
0585   case 0xCF5A:
0586     return 0x017D; // LATIN CAPITAL LETTER Z WITH CARON
0587   case 0xCF63:
0588     return 0x010D; // LATIN SMALL LETTER C WITH CARON
0589   case 0xCF64:
0590     return 0x010F; // LATIN SMALL LETTER D WITH CARON
0591   case 0xCF65:
0592     return 0x011B; // LATIN SMALL LETTER E WITH CARON
0593   case 0xCF6C:
0594     return 0x013E; // LATIN SMALL LETTER L WITH CARON
0595   case 0xCF6E:
0596     return 0x0148; // LATIN SMALL LETTER N WITH CARON
0597   case 0xCF72:
0598     return 0x0159; // LATIN SMALL LETTER R WITH CARON
0599   case 0xCF73:
0600     return 0x0161; // LATIN SMALL LETTER S WITH CARON
0601   case 0xCF74:
0602     return 0x0165; // LATIN SMALL LETTER T WITH CARON
0603   case 0xCF7A:
0604     return 0x017E; // LATIN SMALL LETTER Z WITH CARON
0605 
0606   default:
0607     myDebug() << "no match for " << c;
0608     return QChar();
0609   }
0610 }