File indexing completed on 2024-05-19 05:47:34

0001 /*  -*- C++ -*-
0002  *  Copyright (C) 1998 <developer@mozilla.org>
0003  *
0004  *
0005  *  Permission is hereby granted, free of charge, to any person obtaining
0006  *  a copy of this software and associated documentation files (the
0007  *  "Software"), to deal in the Software without restriction, including
0008  *  without limitation the rights to use, copy, modify, merge, publish,
0009  *  distribute, sublicense, and/or sell copies of the Software, and to
0010  *  permit persons to whom the Software is furnished to do so, subject to
0011  *  the following conditions:
0012  *
0013  *  The above copyright notice and this permission notice shall be included
0014  *  in all copies or substantial portions of the Software.
0015  *
0016  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0017  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0018  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0019  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
0020  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
0021  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
0022  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
0023  */
0024 
0025 #include "CharDistribution.h"
0026 
0027 #include "tables/JISFreq.tab"
0028 #include "tables/Big5Freq.tab"
0029 #include "tables/EUCKRFreq.tab"
0030 #include "tables/GB2312Freq.tab"
0031 
0032 #define SURE_YES 0.99f
0033 #define SURE_NO  0.01f
0034 
0035 namespace kencodingprober
0036 {
0037 //return confidence base on received data
0038 float CharDistributionAnalysis::GetConfidence()
0039 {
0040     //if we didn't receive any character in our consideration range, return negative answer
0041     if (mTotalChars <= 0) {
0042         return SURE_NO;
0043     }
0044 
0045     if (mTotalChars != mFreqChars) {
0046         float r = mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio);
0047 
0048         if (r < SURE_YES) {
0049             return r;
0050         }
0051     }
0052     //normalize confidence, (we don't want to be 100% sure)
0053     return SURE_YES;
0054 }
0055 
0056 EUCKRDistributionAnalysis::EUCKRDistributionAnalysis()
0057 {
0058     mCharToFreqOrder = EUCKRCharToFreqOrder;
0059     mTableSize = EUCKR_TABLE_SIZE;
0060     mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO;
0061 }
0062 
0063 GB2312DistributionAnalysis::GB2312DistributionAnalysis()
0064 {
0065     mCharToFreqOrder = GB2312CharToFreqOrder;
0066     mTableSize = GB2312_TABLE_SIZE;
0067     mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO;
0068 }
0069 
0070 Big5DistributionAnalysis::Big5DistributionAnalysis()
0071 {
0072     mCharToFreqOrder = Big5CharToFreqOrder;
0073     mTableSize = BIG5_TABLE_SIZE;
0074     mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO;
0075 }
0076 
0077 SJISDistributionAnalysis::SJISDistributionAnalysis()
0078 {
0079     mCharToFreqOrder = JISCharToFreqOrder;
0080     mTableSize = JIS_TABLE_SIZE;
0081     mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
0082 }
0083 
0084 EUCJPDistributionAnalysis::EUCJPDistributionAnalysis()
0085 {
0086     mCharToFreqOrder = JISCharToFreqOrder;
0087     mTableSize = JIS_TABLE_SIZE;
0088     mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
0089 }
0090 }
0091