File indexing completed on 2024-04-28 03:53:02

0001 /*  -*- C++ -*-
0002     SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
0003 
0004     SPDX-License-Identifier: MIT
0005 */
0006 
0007 #include "JapaneseGroupProber.h"
0008 
0009 #include <stdio.h>
0010 #include <stdlib.h>
0011 
0012 namespace kencodingprober
0013 {
0014 #ifdef DEBUG_PROBE
0015 static const char *const ProberName[] = {
0016     "Unicode",
0017     "GB18030",
0018     "Big5",
0019 };
0020 
0021 #endif
0022 
0023 JapaneseGroupProber::JapaneseGroupProber()
0024 {
0025     mProbers[0] = new UnicodeGroupProber();
0026     mProbers[1] = new nsSJISProber();
0027     mProbers[2] = new nsEUCJPProber();
0028     Reset();
0029 }
0030 
0031 JapaneseGroupProber::~JapaneseGroupProber()
0032 {
0033     for (unsigned int i = 0; i < JP_NUM_OF_PROBERS; i++) {
0034         delete mProbers[i];
0035     }
0036 }
0037 
0038 const char *JapaneseGroupProber::GetCharSetName()
0039 {
0040     if (mBestGuess == -1) {
0041         GetConfidence();
0042         if (mBestGuess == -1) {
0043             mBestGuess = 1; // assume it's GB18030
0044         }
0045     }
0046     return mProbers[mBestGuess]->GetCharSetName();
0047 }
0048 
0049 void JapaneseGroupProber::Reset(void)
0050 {
0051     mActiveNum = 0;
0052     for (unsigned int i = 0; i < JP_NUM_OF_PROBERS; i++) {
0053         if (mProbers[i]) {
0054             mProbers[i]->Reset();
0055             mIsActive[i] = true;
0056             ++mActiveNum;
0057         } else {
0058             mIsActive[i] = false;
0059         }
0060     }
0061     mBestGuess = -1;
0062     mState = eDetecting;
0063 }
0064 
0065 nsProbingState JapaneseGroupProber::HandleData(const char *aBuf, unsigned int aLen)
0066 {
0067     nsProbingState st;
0068     unsigned int i;
0069 
0070     // do filtering to reduce load to probers
0071     char *highbyteBuf;
0072     char *hptr;
0073     bool keepNext = true; // assume previous is not ascii, it will do no harm except add some noise
0074     hptr = highbyteBuf = (char *)malloc(aLen);
0075     if (!hptr) {
0076         return mState;
0077     }
0078     for (i = 0; i < aLen; ++i) {
0079         if (aBuf[i] & 0x80) {
0080             *hptr++ = aBuf[i];
0081             keepNext = true;
0082         } else {
0083             // if previous is highbyte, keep this even it is a ASCII
0084             if (keepNext) {
0085                 *hptr++ = aBuf[i];
0086                 keepNext = false;
0087             }
0088         }
0089     }
0090 
0091     for (i = 0; i < JP_NUM_OF_PROBERS; ++i) {
0092         if (!mIsActive[i]) {
0093             continue;
0094         }
0095         st = mProbers[i]->HandleData(highbyteBuf, hptr - highbyteBuf);
0096         if (st == eFoundIt) {
0097             mBestGuess = i;
0098             mState = eFoundIt;
0099             break;
0100         } else if (st == eNotMe) {
0101             mIsActive[i] = false;
0102             --mActiveNum;
0103             if (mActiveNum == 0) {
0104                 mState = eNotMe;
0105                 break;
0106             }
0107         }
0108     }
0109 
0110     free(highbyteBuf);
0111 
0112     return mState;
0113 }
0114 
0115 float JapaneseGroupProber::GetConfidence(void)
0116 {
0117     unsigned int i;
0118     float bestConf = 0.0;
0119     float cf;
0120 
0121     switch (mState) {
0122     case eFoundIt:
0123         return (float)0.99;
0124     case eNotMe:
0125         return (float)0.01;
0126     default:
0127         for (i = 0; i < JP_NUM_OF_PROBERS; ++i) {
0128             if (!mIsActive[i]) {
0129                 continue;
0130             }
0131             cf = mProbers[i]->GetConfidence();
0132             if (bestConf < cf) {
0133                 bestConf = cf;
0134                 mBestGuess = i;
0135             }
0136         }
0137     }
0138     return bestConf;
0139 }
0140 
0141 #ifdef DEBUG_PROBE
0142 void JapaneseGroupProber::DumpStatus()
0143 {
0144     unsigned int i;
0145     float cf;
0146 
0147     GetConfidence();
0148     for (i = 0; i < JP_NUM_OF_PROBERS; i++) {
0149         if (!mIsActive[i]) {
0150             printf("  Chinese group inactive: [%s] (confidence is too low).\r\n", ProberName[i]);
0151         } else {
0152             cf = mProbers[i]->GetConfidence();
0153             printf("  Chinese group %1.3f: [%s]\r\n", cf, ProberName[i]);
0154         }
0155     }
0156 }
0157 #endif
0158 }