File indexing completed on 2024-04-28 03:53:04

0001 /*  -*- C++ -*-
0002     SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
0003 
0004     SPDX-License-Identifier: MIT
0005 */
0006 
0007 // for S-JIS encoding, observe characteristic:
0008 // 1, kana character (or hankaku?) often have high frequency of appearance
0009 // 2, kana character often exist in group
0010 // 3, certain combination of kana is never used in japanese language
0011 
0012 #include "nsSJISProber.h"
0013 
0014 namespace kencodingprober
0015 {
0016 void nsSJISProber::Reset(void)
0017 {
0018     mCodingSM->Reset();
0019     mState = eDetecting;
0020     mContextAnalyser.Reset();
0021     mDistributionAnalyser.Reset();
0022 }
0023 
0024 nsProbingState nsSJISProber::HandleData(const char *aBuf, unsigned int aLen)
0025 {
0026     if (aLen == 0) {
0027         return mState;
0028     }
0029 
0030     for (unsigned int i = 0; i < aLen; i++) {
0031         const nsSMState codingState = mCodingSM->NextState(aBuf[i]);
0032         if (codingState == eError) {
0033             mState = eNotMe;
0034             break;
0035         }
0036         if (codingState == eItsMe) {
0037             mState = eFoundIt;
0038             break;
0039         }
0040         if (codingState == eStart) {
0041             unsigned int charLen = mCodingSM->GetCurrentCharLen();
0042             if (i == 0) {
0043                 mLastChar[1] = aBuf[0];
0044                 mContextAnalyser.HandleOneChar(mLastChar + 2 - charLen, charLen);
0045                 mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
0046             } else {
0047                 mContextAnalyser.HandleOneChar(aBuf + i + 1 - charLen, charLen);
0048                 mDistributionAnalyser.HandleOneChar(aBuf + i - 1, charLen);
0049             }
0050         }
0051     }
0052 
0053     mLastChar[0] = aBuf[aLen - 1];
0054 
0055     if (mState == eDetecting) {
0056         if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) {
0057             mState = eFoundIt;
0058         }
0059     }
0060 
0061     return mState;
0062 }
0063 
0064 float nsSJISProber::GetConfidence(void)
0065 {
0066     float contxtCf = mContextAnalyser.GetConfidence();
0067     float distribCf = mDistributionAnalyser.GetConfidence();
0068 
0069     return (contxtCf > distribCf ? contxtCf : distribCf);
0070 }
0071 }