File indexing completed on 2024-05-26 04:37:15

0001 /*
0002     Kchmviewer - a CHM and EPUB file viewer with broad language support
0003     SPDX-FileCopyrightText: 2004-2014 George Yunaev gyunaev@ulduzsoft.com
0004 
0005     SPDX-License-Identifier: GPL-3.0-or-later
0006 */
0007 
0008 #include "ebook_chm_encoding.h"
0009 
0010 typedef struct {
0011     const char *qtcodec;
0012     const short *lcids;
0013 } EbookChmTextEncodingEntry;
0014 
0015 // Do not try to embed those in the text_encoding_table, it does not work - at least with gcc.
0016 static short lcid_arabic[] = {
0017     0x1401, 0x3C01, 0x0C01, 0x0801, 0x2C01, 0x3401, 0x3001, 0x1001, 0x1801, 0x2001, 0x4001, 0x0401, 0x2801, 0x1C01, 0x3801, 0x2401, 0x0429, 0x0420, 0,
0018 };
0019 
0020 static short lcid_baltic[] = {0x0425, 0x0426, 0x0427, 0};
0021 
0022 static short lcid_centralEuropean[] = {0x041C, 0x041A, 0x0405, 0x040E, 0x0415, 0x0418, 0x081A, 0x041B, 0x0424, 0};
0023 
0024 static short lcid_ChineseSimplifiedGB18030[] = {0x0804, 0};
0025 
0026 static short lcid_ChineseSimplifiedGBK[] = {0x0804, 0};
0027 
0028 static short lcid_ChineseSimplifiedGB2313[] = {0x1004, 0};
0029 
0030 static short lcid_ChineseTraditionalBig5[] = {0x0404, 0x1404, 0};
0031 
0032 static short lcid_ChineseTraditionalBigHKSCS[] = {0x0C04, 0};
0033 
0034 static short lcid_CyrillicCP1251[] = {0x082C, 0x0423, 0x0402, 0x042F, 0x0419, 0x0C1A, 0x0444, 0x0422, 0x0843, 0};
0035 
0036 static short lcid_CyrillicKOI8R[] = {0x7001, // artifical LCID
0037                                      0};
0038 
0039 static short lcid_Greek[] = {0x0408, 0};
0040 
0041 static short lcid_Hebrew[] = {0x040D, 0};
0042 
0043 static short lcid_Japanese_eucJP[] = {0x0411, 0};
0044 
0045 static short lcid_Japanese_JIS7[] = {0x0411, 0};
0046 
0047 static short lcid_Japanese_ShiftJIS[] = {0x0411, 0};
0048 
0049 static short lcid_Korean_eucKR[] = {0x0412, 0};
0050 
0051 static short lcid_TamilTSCII[] = {0x0449, 0};
0052 
0053 static short lcid_ThaiTIS[] = {0x041E, 0};
0054 
0055 static short lcid_UkrainianKOI[] = {0x7006, 0};
0056 
0057 static short lcid_Turkish[] = {0x042C, 0x041F, 0x0443, 0};
0058 
0059 static short lcid_Vietnamese[] = {0x042A, 0};
0060 
0061 static short lcid_UnicodeUTF8[] = {0x7004, // artifical LCID
0062                                    0};
0063 
0064 static short lcid_UnicodeUTF16[] = {0x7005, // artifical LCID
0065                                     0};
0066 
0067 static short lcid_Western[] = {0x0436, 0x042D, 0x0403, 0x0406, 0x0813, 0x0413, 0x0C09, 0x2809, 0x1009, 0x2409, 0x1809, 0x2009, 0x1409, 0x3409, 0x1C09, 0x2C09, 0x0809, 0x0409, 0x0438, 0x040B, 0x080C, 0x0C0C, 0x040C, 0x140C, 0x100C,
0068                                0x0C07, 0x0407, 0x1407, 0x1007, 0x0807, 0x040F, 0x0421, 0x0410, 0x0810, 0x083E, 0x043E, 0x0414, 0x0814, 0x0416, 0x0816, 0x0432, 0x2C0A, 0x400A, 0x340A, 0x240A, 0x140A, 0x1C0A, 0x300A, 0x440A, 0x100A,
0069                                0x480A, 0x080A, 0x4C0A, 0x180A, 0x3C0A, 0x280A, 0x500A, 0x0C0A, 0x380A, 0x200A, 0x0441, 0x081D, 0x041D, 0x0434, 0x0435, 0x042B, 0x042C, 0x0439, 0x043A, 0x044E, 0x044F, 0x081A, 0x0443, 0};
0070 
0071 static const EbookChmTextEncodingEntry text_encoding_table[] = {{"CP1256", lcid_arabic},
0072 
0073                                                                 {"CP1257", lcid_baltic},
0074 
0075                                                                 {"CP1250", lcid_centralEuropean},
0076 
0077                                                                 {"GB18030", lcid_ChineseSimplifiedGB18030},
0078 
0079                                                                 {"GBK", lcid_ChineseSimplifiedGBK},
0080 
0081                                                                 {"GB2313", lcid_ChineseSimplifiedGB2313},
0082 
0083                                                                 {"Big5", lcid_ChineseTraditionalBig5},
0084 
0085                                                                 {"Big5-HKSCS", lcid_ChineseTraditionalBigHKSCS},
0086 
0087                                                                 {"CP1251", lcid_CyrillicCP1251},
0088 
0089                                                                 {"KOI8-R", lcid_CyrillicKOI8R},
0090 
0091                                                                 {"CP1253", lcid_Greek},
0092 
0093                                                                 {"CP1255", lcid_Hebrew},
0094 
0095                                                                 {"Shift-JIS", lcid_Japanese_ShiftJIS},
0096 
0097                                                                 {"eucJP", lcid_Japanese_eucJP},
0098 
0099                                                                 {"JIS7", lcid_Japanese_JIS7},
0100 
0101                                                                 {"eucKR", lcid_Korean_eucKR},
0102 
0103                                                                 {"TSCII", lcid_TamilTSCII},
0104 
0105                                                                 {"TIS-620", lcid_ThaiTIS},
0106 
0107                                                                 {"KOI8-U", lcid_UkrainianKOI},
0108 
0109                                                                 {"CP1254", lcid_Turkish},
0110 
0111                                                                 {"CP1258", lcid_Vietnamese},
0112 
0113                                                                 {"UTF-8", lcid_UnicodeUTF8},
0114 
0115                                                                 {"UTF-16", lcid_UnicodeUTF16},
0116 
0117                                                                 {"CP1252", lcid_Western},
0118 
0119                                                                 {nullptr, nullptr}};
0120 
0121 QString Ebook_CHM_Encoding::guessByLCID(unsigned short lcid)
0122 {
0123     for (const EbookChmTextEncodingEntry *t = text_encoding_table; t->qtcodec; ++t) {
0124         for (const short *lcids = t->lcids; *lcids; lcids++) {
0125             if (*lcids == lcid) {
0126                 return QString::fromLatin1(t->qtcodec);
0127             }
0128         }
0129     }
0130 
0131     return QStringLiteral("UTF-8");
0132 }