File indexing completed on 2024-04-28 03:53:03

0001 /*  -*- C++ -*-
0002     SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
0003 
0004     SPDX-License-Identifier: MIT
0005 */
0006 
0007 // for japanese encoding, observe characteristic:
0008 // 1, kana character (or hankaku?) often have high frequency of appearance
0009 // 2, kana character often exist in group
0010 // 3, certain combination of kana is never used in japanese language
0011 
0012 #include "nsEUCJPProber.h"
0013 
0014 namespace kencodingprober
0015 {
0016 void nsEUCJPProber::Reset(void)
0017 {
0018     mCodingSM->Reset();
0019     mState = eDetecting;
0020     mContextAnalyser.Reset();
0021     mDistributionAnalyser.Reset();
0022 }
0023 
0024 nsProbingState nsEUCJPProber::HandleData(const char *aBuf, unsigned int aLen)
0025 {
0026     if (aLen == 0) {
0027         return mState;
0028     }
0029 
0030     for (unsigned int i = 0; i < aLen; i++) {
0031         const nsSMState codingState = mCodingSM->NextState(aBuf[i]);
0032         if (codingState == eError) {
0033             mState = eNotMe;
0034             break;
0035         }
0036         if (codingState == eItsMe) {
0037             mState = eFoundIt;
0038             break;
0039         }
0040         if (codingState == eStart) {
0041             unsigned int charLen = mCodingSM->GetCurrentCharLen();
0042 
0043             if (i == 0) {
0044                 mLastChar[1] = aBuf[0];
0045                 mContextAnalyser.HandleOneChar(mLastChar, charLen);
0046                 mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
0047             } else {
0048                 mContextAnalyser.HandleOneChar(aBuf + i - 1, charLen);
0049                 mDistributionAnalyser.HandleOneChar(aBuf + i - 1, charLen);
0050             }
0051         }
0052     }
0053 
0054     mLastChar[0] = aBuf[aLen - 1];
0055 
0056     if (mState == eDetecting) {
0057         if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) {
0058             mState = eFoundIt;
0059         }
0060     }
0061 
0062     return mState;
0063 }
0064 
0065 float nsEUCJPProber::GetConfidence(void)
0066 {
0067     float contxtCf = mContextAnalyser.GetConfidence();
0068     float distribCf = mDistributionAnalyser.GetConfidence();
0069 
0070     return (contxtCf > distribCf ? contxtCf : distribCf);
0071 }
0072 }