File indexing completed on 2024-04-28 03:53:03

0001 /*  -*- C++ -*-
0002     SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
0003 
0004     SPDX-License-Identifier: MIT
0005 */
0006 
0007 #include "nsCodingStateMachine.h"
0008 
0009 namespace kencodingprober
0010 {
0011 static const unsigned int HZ_cls[256 / 8] = {
0012     PCK4BITS(1, 0, 0, 0, 0, 0, 0, 0), // 00 - 07
0013     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 08 - 0f
0014     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17
0015     PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f
0016     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 20 - 27
0017     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 28 - 2f
0018     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37
0019     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f
0020     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 40 - 47
0021     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 48 - 4f
0022     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57
0023     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f
0024     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67
0025     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f
0026     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77
0027     PCK4BITS(0, 0, 0, 4, 0, 5, 2, 0), // 78 - 7f
0028     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 80 - 87
0029     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 88 - 8f
0030     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 90 - 97
0031     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 98 - 9f
0032     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // a0 - a7
0033     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // a8 - af
0034     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // b0 - b7
0035     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // b8 - bf
0036     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // c0 - c7
0037     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // c8 - cf
0038     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // d0 - d7
0039     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // d8 - df
0040     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // e0 - e7
0041     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // e8 - ef
0042     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // f0 - f7
0043     PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1) // f8 - ff
0044 };
0045 
0046 static const unsigned int HZ_st[6] = {
0047     PCK4BITS(eStart, eError, 3, eStart, eStart, eStart, eError, eError), // 00-07
0048     PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), // 08-0f
0049     PCK4BITS(eItsMe, eItsMe, eError, eError, eStart, eStart, 4, eError), // 10-17
0050     PCK4BITS(5, eError, 6, eError, 5, 5, 4, eError), // 18-1f
0051     PCK4BITS(4, eError, 4, 4, 4, eError, 4, eError), // 20-27
0052     PCK4BITS(4, eItsMe, eStart, eStart, eStart, eStart, eStart, eStart) // 28-2f
0053 };
0054 
0055 static const unsigned int HZCharLenTable[] = {0, 0, 0, 0, 0, 0};
0056 
0057 const SMModel HZSMModel = {
0058     {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_cls},
0059     6,
0060     {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_st},
0061     HZCharLenTable,
0062     "HZ-GB-2312",
0063 };
0064 
0065 static const unsigned int ISO2022CN_cls[256 / 8] = {
0066     PCK4BITS(2, 0, 0, 0, 0, 0, 0, 0), // 00 - 07
0067     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 08 - 0f
0068     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17
0069     PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f
0070     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 20 - 27
0071     PCK4BITS(0, 3, 0, 0, 0, 0, 0, 0), // 28 - 2f
0072     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37
0073     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f
0074     PCK4BITS(0, 0, 0, 4, 0, 0, 0, 0), // 40 - 47
0075     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 48 - 4f
0076     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57
0077     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f
0078     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67
0079     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f
0080     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77
0081     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f
0082     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87
0083     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f
0084     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 90 - 97
0085     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 98 - 9f
0086     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
0087     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
0088     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
0089     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
0090     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
0091     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
0092     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
0093     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
0094     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7
0095     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef
0096     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7
0097     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2) // f8 - ff
0098 };
0099 
0100 static const unsigned int ISO2022CN_st[8] = {
0101     PCK4BITS(eStart, 3, eError, eStart, eStart, eStart, eStart, eStart), // 00-07
0102     PCK4BITS(eStart, eError, eError, eError, eError, eError, eError, eError), // 08-0f
0103     PCK4BITS(eError, eError, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe), // 10-17
0104     PCK4BITS(eItsMe, eItsMe, eItsMe, eError, eError, eError, 4, eError), // 18-1f
0105     PCK4BITS(eError, eError, eError, eItsMe, eError, eError, eError, eError), // 20-27
0106     PCK4BITS(5, 6, eError, eError, eError, eError, eError, eError), // 28-2f
0107     PCK4BITS(eError, eError, eError, eItsMe, eError, eError, eError, eError), // 30-37
0108     PCK4BITS(eError, eError, eError, eError, eError, eItsMe, eError, eStart) // 38-3f
0109 };
0110 
0111 static const unsigned int ISO2022CNCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
0112 
0113 const SMModel ISO2022CNSMModel = {
0114     {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_cls},
0115     9,
0116     {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_st},
0117     ISO2022CNCharLenTable,
0118     "ISO-2022-CN",
0119 };
0120 
0121 static const unsigned int ISO2022JP_cls[256 / 8] = {
0122     PCK4BITS(2, 0, 0, 0, 0, 0, 0, 0), // 00 - 07
0123     PCK4BITS(0, 0, 0, 0, 0, 0, 2, 2), // 08 - 0f
0124     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17
0125     PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f
0126     PCK4BITS(0, 0, 0, 0, 7, 0, 0, 0), // 20 - 27
0127     PCK4BITS(3, 0, 0, 0, 0, 0, 0, 0), // 28 - 2f
0128     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37
0129     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f
0130     PCK4BITS(6, 0, 4, 0, 8, 0, 0, 0), // 40 - 47
0131     PCK4BITS(0, 9, 5, 0, 0, 0, 0, 0), // 48 - 4f
0132     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57
0133     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f
0134     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67
0135     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f
0136     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77
0137     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f
0138     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87
0139     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f
0140     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 90 - 97
0141     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 98 - 9f
0142     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
0143     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
0144     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
0145     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
0146     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
0147     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
0148     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
0149     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
0150     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7
0151     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef
0152     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7
0153     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2) // f8 - ff
0154 };
0155 
0156 static const unsigned int ISO2022JP_st[9] = {
0157     PCK4BITS(eStart, 3, eError, eStart, eStart, eStart, eStart, eStart), // 00-07
0158     PCK4BITS(eStart, eStart, eError, eError, eError, eError, eError, eError), // 08-0f
0159     PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), // 10-17
0160     PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eError, eError), // 18-1f
0161     PCK4BITS(eError, 5, eError, eError, eError, 4, eError, eError), // 20-27
0162     PCK4BITS(eError, eError, eError, 6, eItsMe, eError, eItsMe, eError), // 28-2f
0163     PCK4BITS(eError, eError, eError, eError, eError, eError, eItsMe, eItsMe), // 30-37
0164     PCK4BITS(eError, eError, eError, eItsMe, eError, eError, eError, eError), // 38-3f
0165     PCK4BITS(eError, eError, eError, eError, eItsMe, eError, eStart, eStart) // 40-47
0166 };
0167 
0168 static const unsigned int ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
0169 
0170 const SMModel ISO2022JPSMModel = {
0171     {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls},
0172     10,
0173     {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st},
0174     ISO2022JPCharLenTable,
0175     "ISO-2022-JP",
0176 };
0177 
0178 static const unsigned int ISO2022KR_cls[256 / 8] = {
0179     PCK4BITS(2, 0, 0, 0, 0, 0, 0, 0), // 00 - 07
0180     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 08 - 0f
0181     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17
0182     PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f
0183     PCK4BITS(0, 0, 0, 0, 3, 0, 0, 0), // 20 - 27
0184     PCK4BITS(0, 4, 0, 0, 0, 0, 0, 0), // 28 - 2f
0185     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37
0186     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f
0187     PCK4BITS(0, 0, 0, 5, 0, 0, 0, 0), // 40 - 47
0188     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 48 - 4f
0189     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57
0190     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f
0191     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67
0192     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f
0193     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77
0194     PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f
0195     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87
0196     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f
0197     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 90 - 97
0198     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 98 - 9f
0199     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
0200     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
0201     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
0202     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
0203     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
0204     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
0205     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
0206     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
0207     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7
0208     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef
0209     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7
0210     PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2) // f8 - ff
0211 };
0212 
0213 static const unsigned int ISO2022KR_st[5] = {
0214     PCK4BITS(eStart, 3, eError, eStart, eStart, eStart, eError, eError), // 00-07
0215     PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), // 08-0f
0216     PCK4BITS(eItsMe, eItsMe, eError, eError, eError, 4, eError, eError), // 10-17
0217     PCK4BITS(eError, eError, eError, eError, 5, eError, eError, eError), // 18-1f
0218     PCK4BITS(eError, eError, eError, eItsMe, eStart, eStart, eStart, eStart) // 20-27
0219 };
0220 
0221 static const unsigned int ISO2022KRCharLenTable[] = {0, 0, 0, 0, 0, 0};
0222 
0223 const SMModel ISO2022KRSMModel = {
0224     {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_cls},
0225     6,
0226     {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_st},
0227     ISO2022KRCharLenTable,
0228     "ISO-2022-KR",
0229 };
0230 }