File indexing completed on 2024-04-28 03:53:03

0001 /*  -*- C++ -*-
0002     SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
0003 
0004     SPDX-License-Identifier: MIT
0005 */
0006 
0007 #include "nsMBCSGroupProber.h"
0008 
0009 #include <stdio.h>
0010 #include <stdlib.h>
0011 
0012 namespace kencodingprober
0013 {
0014 #ifdef DEBUG_PROBE
0015 static const char *const ProberName[] = {
0016     "Unicode",
0017     "SJIS",
0018     "EUCJP",
0019     "GB18030",
0020     "EUCKR",
0021     "Big5",
0022 };
0023 
0024 #endif
0025 
0026 nsMBCSGroupProber::nsMBCSGroupProber()
0027 {
0028     mProbers[0] = new UnicodeGroupProber();
0029     mProbers[1] = new nsSJISProber();
0030     mProbers[2] = new nsEUCJPProber();
0031     mProbers[3] = new nsGB18030Prober();
0032     mProbers[4] = new nsEUCKRProber();
0033     mProbers[5] = new nsBig5Prober();
0034     Reset();
0035 }
0036 
0037 nsMBCSGroupProber::~nsMBCSGroupProber()
0038 {
0039     for (unsigned int i = 0; i < NUM_OF_PROBERS; i++) {
0040         delete mProbers[i];
0041     }
0042 }
0043 
0044 const char *nsMBCSGroupProber::GetCharSetName()
0045 {
0046     if (mBestGuess == -1) {
0047         GetConfidence();
0048         if (mBestGuess == -1) {
0049             mBestGuess = 0;
0050         }
0051     }
0052     return mProbers[mBestGuess]->GetCharSetName();
0053 }
0054 
0055 void nsMBCSGroupProber::Reset(void)
0056 {
0057     mActiveNum = 0;
0058     for (unsigned int i = 0; i < NUM_OF_PROBERS; i++) {
0059         if (mProbers[i]) {
0060             mProbers[i]->Reset();
0061             mIsActive[i] = true;
0062             ++mActiveNum;
0063         } else {
0064             mIsActive[i] = false;
0065         }
0066     }
0067     mBestGuess = -1;
0068     mState = eDetecting;
0069 }
0070 
0071 nsProbingState nsMBCSGroupProber::HandleData(const char *aBuf, unsigned int aLen)
0072 {
0073     nsProbingState st;
0074     unsigned int i;
0075 
0076     // do filtering to reduce load to probers
0077     char *highbyteBuf;
0078     char *hptr;
0079     bool keepNext = true; // assume previous is not ascii, it will do no harm except add some noise
0080     hptr = highbyteBuf = (char *)malloc(aLen);
0081     if (!hptr) {
0082         return mState;
0083     }
0084     for (i = 0; i < aLen; ++i) {
0085         if (aBuf[i] & 0x80) {
0086             *hptr++ = aBuf[i];
0087             keepNext = true;
0088         } else {
0089             // if previous is highbyte, keep this even it is a ASCII
0090             if (keepNext) {
0091                 *hptr++ = aBuf[i];
0092                 keepNext = false;
0093             }
0094         }
0095     }
0096 
0097     for (i = 0; i < NUM_OF_PROBERS; ++i) {
0098         if (!mIsActive[i]) {
0099             continue;
0100         }
0101         st = mProbers[i]->HandleData(highbyteBuf, hptr - highbyteBuf);
0102         if (st == eFoundIt) {
0103             mBestGuess = i;
0104             mState = eFoundIt;
0105             break;
0106         } else if (st == eNotMe) {
0107             mIsActive[i] = false;
0108             mActiveNum--;
0109             if (mActiveNum == 0) {
0110                 mState = eNotMe;
0111                 break;
0112             }
0113         }
0114     }
0115 
0116     free(highbyteBuf);
0117 
0118     return mState;
0119 }
0120 
0121 float nsMBCSGroupProber::GetConfidence(void)
0122 {
0123     unsigned int i;
0124     float bestConf = 0.0;
0125     float cf;
0126 
0127     switch (mState) {
0128     case eFoundIt:
0129         return (float)0.99;
0130     case eNotMe:
0131         return (float)0.01;
0132     default:
0133         for (i = 0; i < NUM_OF_PROBERS; ++i) {
0134             if (!mIsActive[i]) {
0135                 continue;
0136             }
0137             cf = mProbers[i]->GetConfidence();
0138             if (bestConf < cf) {
0139                 bestConf = cf;
0140                 mBestGuess = i;
0141             }
0142         }
0143     }
0144     return bestConf;
0145 }
0146 
0147 #ifdef DEBUG_PROBE
0148 void nsMBCSGroupProber::DumpStatus()
0149 {
0150     unsigned int i;
0151     float cf;
0152 
0153     GetConfidence();
0154     for (i = 0; i < NUM_OF_PROBERS; i++) {
0155         if (!mIsActive[i]) {
0156             printf("  MBCS inactive: [%s] (confidence is too low).\r\n", ProberName[i]);
0157         } else {
0158             cf = mProbers[i]->GetConfidence();
0159             printf("  MBCS %1.3f: [%s]\r\n", cf, ProberName[i]);
0160         }
0161     }
0162 }
0163 #endif
0164 }