File indexing completed on 2024-05-12 05:46:27

0001 /*
0002  * This file is part of the KDE libraries
0003  *
0004  * Copyright (c) 2000-2003 Shiro Kawai <shirok@users.sourceforge.net>
0005  *
0006  * Redistribution and use in source and binary forms, with or without
0007  * modification, are permitted provided that the following conditions
0008  * are met:
0009  *
0010  *  1. Redistributions of source code must retain the above copyright
0011  *     notice, this list of conditions and the following disclaimer.
0012  *
0013  *  2. Redistributions in binary form must reproduce the above copyright
0014  *     notice, this list of conditions and the following disclaimer in the
0015  *     documentation and/or other materials provided with the distribution.
0016  *
0017  *  3. Neither the name of the authors nor the names of its contributors
0018  *     may be used to endorse or promote products derived from this
0019  *     software without specific prior written permission.
0020  *
0021  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
0022  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
0023  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0024  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
0025  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0026  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
0027  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
0028  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
0029  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
0030  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0031  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0032  *
0033  */
0034 /*
0035  * original code is here.
0036  * http://cvs.sourceforge.net/viewcvs.py/gauche/Gauche/ext/charconv/guess.c?view=markup
0037  */
0038 #ifndef GUESS_JA_H
0039 #define GUESS_JA_H
0040 
0041 class guess_arc {
0042  public:
0043   unsigned int next;          /* next state */
0044   double score;               /* score */
0045 };
0046 
0047 
0048 typedef signed char dfa_table[256];
0049 
0050 /* DFA tables declared in guess_ja.cpp */
0051 extern const dfa_table guess_eucj_st[];
0052 extern guess_arc guess_eucj_ar[7];
0053 extern const dfa_table guess_sjis_st[];
0054 extern guess_arc guess_sjis_ar[6];
0055 extern const dfa_table guess_utf8_st[];
0056 extern guess_arc guess_utf8_ar[11];
0057 
0058 class guess_dfa {
0059  public:
0060   const dfa_table *states;
0061   const guess_arc *arcs;
0062   int state;
0063   double score;
0064   
0065  guess_dfa (const dfa_table stable[], const guess_arc *atable) :
0066   states(stable), arcs(atable)
0067   {
0068     state = 0;
0069     score = 1.0;
0070   }
0071 };
0072 
0073 class JapaneseCode
0074 {
0075  public:
0076   enum Type {K_ASCII, K_JIS, K_EUC, K_SJIS, K_UNICODE, K_UTF8 };
0077   enum Type guess_jp(const char* buf, int buflen);
0078   
0079   JapaneseCode () {
0080     eucj = new guess_dfa(guess_eucj_st, guess_eucj_ar);
0081     sjis = new guess_dfa(guess_sjis_st, guess_sjis_ar);
0082     utf8 = new guess_dfa(guess_utf8_st, guess_utf8_ar);
0083     last_JIS_escape = false;
0084   }
0085   
0086   ~JapaneseCode () {
0087     delete eucj;
0088     delete sjis;
0089     delete utf8;
0090   }
0091   
0092  protected:
0093   guess_dfa *eucj;
0094   guess_dfa *sjis;
0095   guess_dfa *utf8;
0096   
0097   bool last_JIS_escape;
0098 };
0099 
0100 #define DFA_NEXT(dfa, ch)                               \
0101     do {                                                \
0102         int arc__;                                      \
0103         if (dfa->state >= 0) {                          \
0104             arc__ = dfa->states[dfa->state][ch];        \
0105             if (arc__ < 0) {                            \
0106                 dfa->state = -1;                        \
0107             } else {                                    \
0108                 dfa->state = dfa->arcs[arc__].next;     \
0109                 dfa->score *= dfa->arcs[arc__].score;   \
0110             }                                           \
0111         }                                               \
0112     } while (0)
0113 
0114 #define DFA_ALIVE(dfa)  (dfa->state >= 0)
0115 
0116 #endif  /* GUESS_JA_H */