File indexing completed on 2024-04-28 03:53:03
0001 /* -*- C++ -*- 0002 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org> 0003 0004 SPDX-License-Identifier: MIT 0005 */ 0006 0007 #ifndef __JPCNTX_H__ 0008 #define __JPCNTX_H__ 0009 0010 #include "kcodecs_export.h" 0011 0012 #include <qglobal.h> 0013 0014 #define NUM_OF_CATEGORY 6 0015 0016 #define ENOUGH_REL_THRESHOLD 100 0017 #define MAX_REL_THRESHOLD 1000 0018 namespace kencodingprober 0019 { 0020 // hiragana frequency category table 0021 extern const char jp2CharContext[83][83]; 0022 0023 class KCODECS_NO_EXPORT JapaneseContextAnalysis 0024 { 0025 public: 0026 JapaneseContextAnalysis() 0027 { 0028 Reset(); 0029 } 0030 virtual ~JapaneseContextAnalysis() 0031 { 0032 } 0033 0034 void HandleData(const char *aBuf, unsigned int aLen); 0035 0036 void HandleOneChar(const char *aStr, unsigned int aCharLen) 0037 { 0038 int order; 0039 0040 // if we received enough data, stop here 0041 if (mTotalRel > MAX_REL_THRESHOLD) { 0042 mDone = true; 0043 } 0044 if (mDone) { 0045 return; 0046 } 0047 0048 // Only 2-bytes characters are of our interest 0049 order = (aCharLen == 2) ? GetOrder(aStr) : -1; 0050 if (order != -1 && mLastCharOrder != -1) { 0051 mTotalRel++; 0052 // count this sequence to its category counter 0053 mRelSample[(int)jp2CharContext[mLastCharOrder][order]]++; 0054 } 0055 mLastCharOrder = order; 0056 } 0057 0058 float GetConfidence(); 0059 void Reset(void); 0060 void SetOpion() 0061 { 0062 } 0063 bool GotEnoughData() 0064 { 0065 return mTotalRel > ENOUGH_REL_THRESHOLD; 0066 } 0067 0068 protected: 0069 virtual int GetOrder(const char *str, unsigned int *charLen) = 0; 0070 virtual int GetOrder(const char *str) = 0; 0071 0072 // category counters, each integer counts sequence in its category 0073 unsigned int mRelSample[NUM_OF_CATEGORY]; 0074 0075 // total sequence received 0076 unsigned int mTotalRel; 0077 0078 // The order of previous char 0079 int mLastCharOrder; 0080 0081 // if last byte in current buffer is not the last byte of a character, we 0082 // need to know how many byte to skip in next buffer. 0083 unsigned int mNeedToSkipCharNum; 0084 0085 // If this flag is set to true, detection is done and conclusion has been made 0086 bool mDone; 0087 }; 0088 0089 class KCODECS_NO_EXPORT SJISContextAnalysis : public JapaneseContextAnalysis 0090 { 0091 // SJISContextAnalysis(){}; 0092 protected: 0093 int GetOrder(const char *str, unsigned int *charLen) override; 0094 0095 int GetOrder(const char *str) override 0096 { 0097 // We only interested in Hiragana, so first byte is '\202' 0098 if (*str == '\202' && (unsigned char)*(str + 1) >= (unsigned char)0x9f && (unsigned char)*(str + 1) <= (unsigned char)0xf1) { 0099 return (unsigned char)*(str + 1) - (unsigned char)0x9f; 0100 } 0101 return -1; 0102 } 0103 }; 0104 0105 class KCODECS_NO_EXPORT EUCJPContextAnalysis : public JapaneseContextAnalysis 0106 { 0107 protected: 0108 int GetOrder(const char *str, unsigned int *charLen) override; 0109 int GetOrder(const char *str) override 0110 // We only interested in Hiragana, so first byte is '\244' 0111 { 0112 if (*str == '\244' // 0113 && (unsigned char)*(str + 1) >= (unsigned char)0xa1 // 0114 && (unsigned char)*(str + 1) <= (unsigned char)0xf3) { 0115 return (unsigned char)*(str + 1) - (unsigned char)0xa1; 0116 } 0117 return -1; 0118 } 0119 }; 0120 } 0121 #endif /* __JPCNTX_H__ */