File indexing completed on 2024-05-12 05:46:27
0001 /* 0002 * This file is part of the KDE libraries 0003 * 0004 * Copyright (c) 2000-2003 Shiro Kawai <shirok@users.sourceforge.net> 0005 * 0006 * Redistribution and use in source and binary forms, with or without 0007 * modification, are permitted provided that the following conditions 0008 * are met: 0009 * 0010 * 1. Redistributions of source code must retain the above copyright 0011 * notice, this list of conditions and the following disclaimer. 0012 * 0013 * 2. Redistributions in binary form must reproduce the above copyright 0014 * notice, this list of conditions and the following disclaimer in the 0015 * documentation and/or other materials provided with the distribution. 0016 * 0017 * 3. Neither the name of the authors nor the names of its contributors 0018 * may be used to endorse or promote products derived from this 0019 * software without specific prior written permission. 0020 * 0021 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 0022 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 0023 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 0024 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 0025 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 0026 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 0027 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 0028 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 0029 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 0030 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 0031 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 0032 * 0033 */ 0034 /* 0035 * original code is here. 0036 * http://cvs.sourceforge.net/viewcvs.py/gauche/Gauche/ext/charconv/guess.c?view=markup 0037 */ 0038 #ifndef GUESS_JA_H 0039 #define GUESS_JA_H 0040 0041 class guess_arc { 0042 public: 0043 unsigned int next; /* next state */ 0044 double score; /* score */ 0045 }; 0046 0047 0048 typedef signed char dfa_table[256]; 0049 0050 /* DFA tables declared in guess_ja.cpp */ 0051 extern const dfa_table guess_eucj_st[]; 0052 extern guess_arc guess_eucj_ar[7]; 0053 extern const dfa_table guess_sjis_st[]; 0054 extern guess_arc guess_sjis_ar[6]; 0055 extern const dfa_table guess_utf8_st[]; 0056 extern guess_arc guess_utf8_ar[11]; 0057 0058 class guess_dfa { 0059 public: 0060 const dfa_table *states; 0061 const guess_arc *arcs; 0062 int state; 0063 double score; 0064 0065 guess_dfa (const dfa_table stable[], const guess_arc *atable) : 0066 states(stable), arcs(atable) 0067 { 0068 state = 0; 0069 score = 1.0; 0070 } 0071 }; 0072 0073 class JapaneseCode 0074 { 0075 public: 0076 enum Type {K_ASCII, K_JIS, K_EUC, K_SJIS, K_UNICODE, K_UTF8 }; 0077 enum Type guess_jp(const char* buf, int buflen); 0078 0079 JapaneseCode () { 0080 eucj = new guess_dfa(guess_eucj_st, guess_eucj_ar); 0081 sjis = new guess_dfa(guess_sjis_st, guess_sjis_ar); 0082 utf8 = new guess_dfa(guess_utf8_st, guess_utf8_ar); 0083 last_JIS_escape = false; 0084 } 0085 0086 ~JapaneseCode () { 0087 delete eucj; 0088 delete sjis; 0089 delete utf8; 0090 } 0091 0092 protected: 0093 guess_dfa *eucj; 0094 guess_dfa *sjis; 0095 guess_dfa *utf8; 0096 0097 bool last_JIS_escape; 0098 }; 0099 0100 #define DFA_NEXT(dfa, ch) \ 0101 do { \ 0102 int arc__; \ 0103 if (dfa->state >= 0) { \ 0104 arc__ = dfa->states[dfa->state][ch]; \ 0105 if (arc__ < 0) { \ 0106 dfa->state = -1; \ 0107 } else { \ 0108 dfa->state = dfa->arcs[arc__].next; \ 0109 dfa->score *= dfa->arcs[arc__].score; \ 0110 } \ 0111 } \ 0112 } while (0) 0113 0114 #define DFA_ALIVE(dfa) (dfa->state >= 0) 0115 0116 #endif /* GUESS_JA_H */