File indexing completed on 2024-05-19 05:47:34
0001 /* -*- C++ -*- 0002 * Copyright (C) 1998 <developer@mozilla.org> 0003 * 0004 * 0005 * Permission is hereby granted, free of charge, to any person obtaining 0006 * a copy of this software and associated documentation files (the 0007 * "Software"), to deal in the Software without restriction, including 0008 * without limitation the rights to use, copy, modify, merge, publish, 0009 * distribute, sublicense, and/or sell copies of the Software, and to 0010 * permit persons to whom the Software is furnished to do so, subject to 0011 * the following conditions: 0012 * 0013 * The above copyright notice and this permission notice shall be included 0014 * in all copies or substantial portions of the Software. 0015 * 0016 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 0017 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 0018 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 0019 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 0020 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 0021 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 0022 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 0023 */ 0024 0025 #include "CharDistribution.h" 0026 0027 #include "tables/JISFreq.tab" 0028 #include "tables/Big5Freq.tab" 0029 #include "tables/EUCKRFreq.tab" 0030 #include "tables/GB2312Freq.tab" 0031 0032 #define SURE_YES 0.99f 0033 #define SURE_NO 0.01f 0034 0035 namespace kencodingprober 0036 { 0037 //return confidence base on received data 0038 float CharDistributionAnalysis::GetConfidence() 0039 { 0040 //if we didn't receive any character in our consideration range, return negative answer 0041 if (mTotalChars <= 0) { 0042 return SURE_NO; 0043 } 0044 0045 if (mTotalChars != mFreqChars) { 0046 float r = mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio); 0047 0048 if (r < SURE_YES) { 0049 return r; 0050 } 0051 } 0052 //normalize confidence, (we don't want to be 100% sure) 0053 return SURE_YES; 0054 } 0055 0056 EUCKRDistributionAnalysis::EUCKRDistributionAnalysis() 0057 { 0058 mCharToFreqOrder = EUCKRCharToFreqOrder; 0059 mTableSize = EUCKR_TABLE_SIZE; 0060 mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO; 0061 } 0062 0063 GB2312DistributionAnalysis::GB2312DistributionAnalysis() 0064 { 0065 mCharToFreqOrder = GB2312CharToFreqOrder; 0066 mTableSize = GB2312_TABLE_SIZE; 0067 mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO; 0068 } 0069 0070 Big5DistributionAnalysis::Big5DistributionAnalysis() 0071 { 0072 mCharToFreqOrder = Big5CharToFreqOrder; 0073 mTableSize = BIG5_TABLE_SIZE; 0074 mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO; 0075 } 0076 0077 SJISDistributionAnalysis::SJISDistributionAnalysis() 0078 { 0079 mCharToFreqOrder = JISCharToFreqOrder; 0080 mTableSize = JIS_TABLE_SIZE; 0081 mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO; 0082 } 0083 0084 EUCJPDistributionAnalysis::EUCJPDistributionAnalysis() 0085 { 0086 mCharToFreqOrder = JISCharToFreqOrder; 0087 mTableSize = JIS_TABLE_SIZE; 0088 mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO; 0089 } 0090 } 0091