File indexing completed on 2024-04-28 03:53:03

0001 /*  -*- C++ -*-
0002     SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
0003 
0004     SPDX-License-Identifier: MIT
0005 */
0006 
0007 // for S-JIS encoding, observe characteristic:
0008 // 1, kana character (or hankaku?) often have high frequency of appearance
0009 // 2, kana character often exist in group
0010 // 3, certain combination of kana is never used in japanese language
0011 
0012 #ifndef nsEUCJPProber_h__
0013 #define nsEUCJPProber_h__
0014 
0015 #include "CharDistribution.h"
0016 #include "JpCntx.h"
0017 #include "nsCharSetProber.h"
0018 #include "nsCodingStateMachine.h"
0019 namespace kencodingprober
0020 {
0021 class KCODECS_NO_EXPORT nsEUCJPProber : public nsCharSetProber
0022 {
0023 public:
0024     nsEUCJPProber(void)
0025     {
0026         mCodingSM = new nsCodingStateMachine(&EUCJPSMModel);
0027         Reset();
0028     }
0029     ~nsEUCJPProber(void) override
0030     {
0031         delete mCodingSM;
0032     }
0033     nsProbingState HandleData(const char *aBuf, unsigned int aLen) override;
0034     const char *GetCharSetName() override
0035     {
0036         return "EUC-JP";
0037     }
0038     nsProbingState GetState(void) override
0039     {
0040         return mState;
0041     }
0042     void Reset(void) override;
0043     float GetConfidence(void) override;
0044     void SetOpion() override
0045     {
0046     }
0047 
0048 protected:
0049     nsCodingStateMachine *mCodingSM;
0050     nsProbingState mState;
0051 
0052     EUCJPContextAnalysis mContextAnalyser;
0053     EUCJPDistributionAnalysis mDistributionAnalyser;
0054 
0055     char mLastChar[2];
0056 };
0057 }
0058 
0059 #endif /* nsEUCJPProber_h__ */