File indexing completed on 2024-04-28 03:53:03

0001 /*  -*- C++ -*-
0002     SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
0003 
0004     SPDX-License-Identifier: MIT
0005 */
0006 
0007 // for S-JIS encoding, observe characteristic:
0008 // 1, kana character (or hankaku?) often have high frequency of appearance
0009 // 2, kana character often exist in group
0010 // 3, certain combination of kana is never used in japanese language
0011 
0012 #include "nsGB2312Prober.h"
0013 
0014 namespace kencodingprober
0015 {
0016 void nsGB18030Prober::Reset(void)
0017 {
0018     mCodingSM->Reset();
0019     mState = eDetecting;
0020     mDistributionAnalyser.Reset();
0021     // mContextAnalyser.Reset();
0022 }
0023 
0024 nsProbingState nsGB18030Prober::HandleData(const char *aBuf, unsigned int aLen)
0025 {
0026     if (aLen == 0) {
0027         return mState;
0028     }
0029 
0030     for (unsigned int i = 0; i < aLen; i++) {
0031         const nsSMState codingState = mCodingSM->NextState(aBuf[i]);
0032         if (codingState == eError) {
0033             mState = eNotMe;
0034             break;
0035         }
0036         if (codingState == eItsMe) {
0037             mState = eFoundIt;
0038             break;
0039         }
0040         if (codingState == eStart) {
0041             unsigned int charLen = mCodingSM->GetCurrentCharLen();
0042 
0043             if (i == 0) {
0044                 mLastChar[1] = aBuf[0];
0045                 mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
0046             } else {
0047                 mDistributionAnalyser.HandleOneChar(aBuf + i - 1, charLen);
0048             }
0049         }
0050     }
0051 
0052     mLastChar[0] = aBuf[aLen - 1];
0053 
0054     if (mState == eDetecting) {
0055         if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) {
0056             mState = eFoundIt;
0057         }
0058     }
0059     //    else
0060     //      mDistributionAnalyser.HandleData(aBuf, aLen);
0061 
0062     return mState;
0063 }
0064 
0065 float nsGB18030Prober::GetConfidence(void)
0066 {
0067     float distribCf = mDistributionAnalyser.GetConfidence();
0068 
0069     return (float)distribCf;
0070 }
0071 }