File indexing completed on 2024-04-28 03:53:03
0001 /* -*- C++ -*- 0002 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org> 0003 0004 SPDX-License-Identifier: MIT 0005 */ 0006 0007 // for S-JIS encoding, observe characteristic: 0008 // 1, kana character (or hankaku?) often have high frequency of appearance 0009 // 2, kana character often exist in group 0010 // 3, certain combination of kana is never used in japanese language 0011 0012 #ifndef nsEUCJPProber_h__ 0013 #define nsEUCJPProber_h__ 0014 0015 #include "CharDistribution.h" 0016 #include "JpCntx.h" 0017 #include "nsCharSetProber.h" 0018 #include "nsCodingStateMachine.h" 0019 namespace kencodingprober 0020 { 0021 class KCODECS_NO_EXPORT nsEUCJPProber : public nsCharSetProber 0022 { 0023 public: 0024 nsEUCJPProber(void) 0025 { 0026 mCodingSM = new nsCodingStateMachine(&EUCJPSMModel); 0027 Reset(); 0028 } 0029 ~nsEUCJPProber(void) override 0030 { 0031 delete mCodingSM; 0032 } 0033 nsProbingState HandleData(const char *aBuf, unsigned int aLen) override; 0034 const char *GetCharSetName() override 0035 { 0036 return "EUC-JP"; 0037 } 0038 nsProbingState GetState(void) override 0039 { 0040 return mState; 0041 } 0042 void Reset(void) override; 0043 float GetConfidence(void) override; 0044 void SetOpion() override 0045 { 0046 } 0047 0048 protected: 0049 nsCodingStateMachine *mCodingSM; 0050 nsProbingState mState; 0051 0052 EUCJPContextAnalysis mContextAnalyser; 0053 EUCJPDistributionAnalysis mDistributionAnalyser; 0054 0055 char mLastChar[2]; 0056 }; 0057 } 0058 0059 #endif /* nsEUCJPProber_h__ */