File indexing completed on 2023-09-24 04:06:32
0001 /* 0002 * This file is part of the KDE libraries 0003 * 0004 * Copyright 2000-2003 Shiro Kawai <shiro@acm.org>, All rights reserved. 0005 * 0006 * Redistribution and use in source and binary forms, with or without 0007 * modification, are permitted provided that the following conditions 0008 * are met: 0009 * 0010 * 1. Redistributions of source code must retain the above copyright 0011 * notice, this list of conditions and the following disclaimer. 0012 * 0013 * 2. Redistributions in binary form must reproduce the above copyright 0014 * notice, this list of conditions and the following disclaimer in the 0015 * documentation and/or other materials provided with the distribution. 0016 * 0017 * 3. Neither the name of the authors nor the names of its contributors 0018 * may be used to endorse or promote products derived from this 0019 * software without specific prior written permission. 0020 * 0021 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 0022 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 0023 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 0024 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 0025 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 0026 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 0027 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 0028 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 0029 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 0030 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 0031 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 0032 * 0033 */ 0034 /* 0035 * original code is here. 0036 * http://cvs.sourceforge.net/viewcvs.py/gauche/Gauche/ext/charconv/guess.c?view=markup 0037 */ 0038 #ifndef GUESS_JA_H 0039 #define GUESS_JA_H 0040 0041 #include <qglobal.h> 0042 #ifdef Q_OS_WIN 0043 #undef UNICODE 0044 #endif 0045 #ifdef SOLARIS 0046 #undef UNICODE 0047 #endif 0048 namespace khtml 0049 { 0050 class guess_arc 0051 { 0052 public: 0053 unsigned int next; /* next state */ 0054 double score; /* score */ 0055 }; 0056 } 0057 0058 using namespace khtml; 0059 0060 typedef signed char dfa_table[256]; 0061 0062 /* DFA tables declared in guess_ja.cpp */ 0063 extern const dfa_table guess_eucj_st[]; 0064 extern guess_arc guess_eucj_ar[7]; 0065 extern const dfa_table guess_sjis_st[]; 0066 extern guess_arc guess_sjis_ar[6]; 0067 extern const dfa_table guess_utf8_st[]; 0068 extern guess_arc guess_utf8_ar[11]; 0069 0070 namespace khtml 0071 { 0072 0073 class guess_dfa 0074 { 0075 public: 0076 const dfa_table *states; 0077 const guess_arc *arcs; 0078 int state; 0079 double score; 0080 0081 guess_dfa(const dfa_table stable[], const guess_arc *atable) : 0082 states(stable), arcs(atable) 0083 { 0084 state = 0; 0085 score = 1.0; 0086 } 0087 }; 0088 0089 class JapaneseCode 0090 { 0091 public: 0092 enum Type {ASCII, JIS, EUC, SJIS, UNICODE, UTF8 }; 0093 enum Type guess_jp(const char *buf, int buflen); 0094 0095 JapaneseCode() 0096 { 0097 eucj = new guess_dfa(guess_eucj_st, guess_eucj_ar); 0098 sjis = new guess_dfa(guess_sjis_st, guess_sjis_ar); 0099 utf8 = new guess_dfa(guess_utf8_st, guess_utf8_ar); 0100 last_JIS_escape = false; 0101 } 0102 0103 ~JapaneseCode() 0104 { 0105 delete eucj; 0106 delete sjis; 0107 delete utf8; 0108 } 0109 0110 protected: 0111 guess_dfa *eucj; 0112 guess_dfa *sjis; 0113 guess_dfa *utf8; 0114 0115 bool last_JIS_escape; 0116 }; 0117 } 0118 0119 #define DFA_NEXT(dfa, ch) \ 0120 do { \ 0121 int arc__; \ 0122 if (dfa->state >= 0) { \ 0123 arc__ = dfa->states[dfa->state][ch]; \ 0124 if (arc__ < 0) { \ 0125 dfa->state = -1; \ 0126 } else { \ 0127 dfa->state = dfa->arcs[arc__].next; \ 0128 dfa->score *= dfa->arcs[arc__].score; \ 0129 } \ 0130 } \ 0131 } while (0) 0132 0133 #define DFA_ALIVE(dfa) (dfa->state >= 0) 0134 0135 #endif /* GUESS_JA_H */