File indexing completed on 2024-04-28 03:53:04
0001 /* -*- C++ -*- 0002 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org> 0003 0004 SPDX-License-Identifier: MIT 0005 */ 0006 0007 // for S-JIS encoding, observe characteristic: 0008 // 1, kana character (or hankaku?) often have high frequency of appearance 0009 // 2, kana character often exist in group 0010 // 3, certain combination of kana is never used in japanese language 0011 0012 #include "nsSJISProber.h" 0013 0014 namespace kencodingprober 0015 { 0016 void nsSJISProber::Reset(void) 0017 { 0018 mCodingSM->Reset(); 0019 mState = eDetecting; 0020 mContextAnalyser.Reset(); 0021 mDistributionAnalyser.Reset(); 0022 } 0023 0024 nsProbingState nsSJISProber::HandleData(const char *aBuf, unsigned int aLen) 0025 { 0026 if (aLen == 0) { 0027 return mState; 0028 } 0029 0030 for (unsigned int i = 0; i < aLen; i++) { 0031 const nsSMState codingState = mCodingSM->NextState(aBuf[i]); 0032 if (codingState == eError) { 0033 mState = eNotMe; 0034 break; 0035 } 0036 if (codingState == eItsMe) { 0037 mState = eFoundIt; 0038 break; 0039 } 0040 if (codingState == eStart) { 0041 unsigned int charLen = mCodingSM->GetCurrentCharLen(); 0042 if (i == 0) { 0043 mLastChar[1] = aBuf[0]; 0044 mContextAnalyser.HandleOneChar(mLastChar + 2 - charLen, charLen); 0045 mDistributionAnalyser.HandleOneChar(mLastChar, charLen); 0046 } else { 0047 mContextAnalyser.HandleOneChar(aBuf + i + 1 - charLen, charLen); 0048 mDistributionAnalyser.HandleOneChar(aBuf + i - 1, charLen); 0049 } 0050 } 0051 } 0052 0053 mLastChar[0] = aBuf[aLen - 1]; 0054 0055 if (mState == eDetecting) { 0056 if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) { 0057 mState = eFoundIt; 0058 } 0059 } 0060 0061 return mState; 0062 } 0063 0064 float nsSJISProber::GetConfidence(void) 0065 { 0066 float contxtCf = mContextAnalyser.GetConfidence(); 0067 float distribCf = mDistributionAnalyser.GetConfidence(); 0068 0069 return (contxtCf > distribCf ? contxtCf : distribCf); 0070 } 0071 }