File indexing completed on 2024-04-28 03:53:03
0001 /* -*- C++ -*- 0002 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org> 0003 0004 SPDX-License-Identifier: MIT 0005 */ 0006 0007 #include "nsMBCSGroupProber.h" 0008 0009 #include <stdio.h> 0010 #include <stdlib.h> 0011 0012 namespace kencodingprober 0013 { 0014 #ifdef DEBUG_PROBE 0015 static const char *const ProberName[] = { 0016 "Unicode", 0017 "SJIS", 0018 "EUCJP", 0019 "GB18030", 0020 "EUCKR", 0021 "Big5", 0022 }; 0023 0024 #endif 0025 0026 nsMBCSGroupProber::nsMBCSGroupProber() 0027 { 0028 mProbers[0] = new UnicodeGroupProber(); 0029 mProbers[1] = new nsSJISProber(); 0030 mProbers[2] = new nsEUCJPProber(); 0031 mProbers[3] = new nsGB18030Prober(); 0032 mProbers[4] = new nsEUCKRProber(); 0033 mProbers[5] = new nsBig5Prober(); 0034 Reset(); 0035 } 0036 0037 nsMBCSGroupProber::~nsMBCSGroupProber() 0038 { 0039 for (unsigned int i = 0; i < NUM_OF_PROBERS; i++) { 0040 delete mProbers[i]; 0041 } 0042 } 0043 0044 const char *nsMBCSGroupProber::GetCharSetName() 0045 { 0046 if (mBestGuess == -1) { 0047 GetConfidence(); 0048 if (mBestGuess == -1) { 0049 mBestGuess = 0; 0050 } 0051 } 0052 return mProbers[mBestGuess]->GetCharSetName(); 0053 } 0054 0055 void nsMBCSGroupProber::Reset(void) 0056 { 0057 mActiveNum = 0; 0058 for (unsigned int i = 0; i < NUM_OF_PROBERS; i++) { 0059 if (mProbers[i]) { 0060 mProbers[i]->Reset(); 0061 mIsActive[i] = true; 0062 ++mActiveNum; 0063 } else { 0064 mIsActive[i] = false; 0065 } 0066 } 0067 mBestGuess = -1; 0068 mState = eDetecting; 0069 } 0070 0071 nsProbingState nsMBCSGroupProber::HandleData(const char *aBuf, unsigned int aLen) 0072 { 0073 nsProbingState st; 0074 unsigned int i; 0075 0076 // do filtering to reduce load to probers 0077 char *highbyteBuf; 0078 char *hptr; 0079 bool keepNext = true; // assume previous is not ascii, it will do no harm except add some noise 0080 hptr = highbyteBuf = (char *)malloc(aLen); 0081 if (!hptr) { 0082 return mState; 0083 } 0084 for (i = 0; i < aLen; ++i) { 0085 if (aBuf[i] & 0x80) { 0086 *hptr++ = aBuf[i]; 0087 keepNext = true; 0088 } else { 0089 // if previous is highbyte, keep this even it is a ASCII 0090 if (keepNext) { 0091 *hptr++ = aBuf[i]; 0092 keepNext = false; 0093 } 0094 } 0095 } 0096 0097 for (i = 0; i < NUM_OF_PROBERS; ++i) { 0098 if (!mIsActive[i]) { 0099 continue; 0100 } 0101 st = mProbers[i]->HandleData(highbyteBuf, hptr - highbyteBuf); 0102 if (st == eFoundIt) { 0103 mBestGuess = i; 0104 mState = eFoundIt; 0105 break; 0106 } else if (st == eNotMe) { 0107 mIsActive[i] = false; 0108 mActiveNum--; 0109 if (mActiveNum == 0) { 0110 mState = eNotMe; 0111 break; 0112 } 0113 } 0114 } 0115 0116 free(highbyteBuf); 0117 0118 return mState; 0119 } 0120 0121 float nsMBCSGroupProber::GetConfidence(void) 0122 { 0123 unsigned int i; 0124 float bestConf = 0.0; 0125 float cf; 0126 0127 switch (mState) { 0128 case eFoundIt: 0129 return (float)0.99; 0130 case eNotMe: 0131 return (float)0.01; 0132 default: 0133 for (i = 0; i < NUM_OF_PROBERS; ++i) { 0134 if (!mIsActive[i]) { 0135 continue; 0136 } 0137 cf = mProbers[i]->GetConfidence(); 0138 if (bestConf < cf) { 0139 bestConf = cf; 0140 mBestGuess = i; 0141 } 0142 } 0143 } 0144 return bestConf; 0145 } 0146 0147 #ifdef DEBUG_PROBE 0148 void nsMBCSGroupProber::DumpStatus() 0149 { 0150 unsigned int i; 0151 float cf; 0152 0153 GetConfidence(); 0154 for (i = 0; i < NUM_OF_PROBERS; i++) { 0155 if (!mIsActive[i]) { 0156 printf(" MBCS inactive: [%s] (confidence is too low).\r\n", ProberName[i]); 0157 } else { 0158 cf = mProbers[i]->GetConfidence(); 0159 printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]); 0160 } 0161 } 0162 } 0163 #endif 0164 }