File indexing completed on 2022-09-27 12:25:39

0001 /*
0002     This file is part of Kiten, a KDE Japanese Reference Tool
0003     SPDX-FileCopyrightText: 2006 Joseph Kerian <jkerian@gmail.com>
0004 
0005     SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 #include "DictKanjidic/dictfilekanjidic.h"
0009 #include "kitenmacros.h"
0010 #include "radicalfile.h"
0011 
0012 #include <QFile>
0013 #include <QRegExp>
0014 #include <QString>
0015 #include <QTextCodec>
0016 #include <QTextStream>
0017 
0018 RadicalFile::RadicalFile( QString &radkfile, const QString &kanjidic )
0019 {
0020   loadRadicalFile( radkfile );
0021   if( ! kanjidic.isEmpty() )
0022   {
0023     loadKanjidic( kanjidic );
0024   }
0025 }
0026 
0027 QSet<Kanji> RadicalFile::kanjiContainingRadicals( QSet<QString> &radicallist ) const
0028 {
0029   QSet<QString> kanjiStringSet;
0030   QSet<Kanji> result;
0031   if( m_radicals.count() < 1 || radicallist.count() < 1 )
0032   {
0033     return result;
0034   }
0035 
0036   //Start out with our first set
0037   kanjiStringSet = m_radicals[ *radicallist.begin() ].getKanji();
0038   //Make a set intersection of these m_kanji
0039   foreach( const QString &rad, radicallist )
0040   {
0041     kanjiStringSet &= m_radicals[ rad ].getKanji();
0042   }
0043 
0044   //Convert our set of QString to a set of Kanji
0045   foreach( const QString &kanji, kanjiStringSet )
0046   {
0047     result += m_kanji[ kanji ];
0048   }
0049 
0050   return result;
0051 }
0052 
0053 bool RadicalFile::loadRadicalFile( QString &radkfile )
0054 {
0055   QFile f( radkfile );
0056   if ( ! f.open( QIODevice::ReadOnly ) )
0057   {
0058     return false;
0059   }
0060 
0061   //Read our radical file through a eucJP codec (helpfully builtin to Qt)
0062   QTextStream t( &f );
0063   Radical *newestRadical = nullptr;
0064   QHash< QString, QSet<QString> > krad;
0065 
0066   t.setCodec( QTextCodec::codecForName( "eucJP" ) );
0067   while ( ! t.atEnd() )
0068   {
0069     QString line = t.readLine();
0070     if( line.length() == 0 || line.at( 0 ) == '#' )
0071     {
0072       //Skip comment characters
0073       continue;
0074     }
0075     else if( line.at( 0 ) == '$' )
0076     {
0077       //Start of a new radical
0078       if( newestRadical != nullptr )
0079       {
0080         m_radicals.insert( newestRadical->toString(), *newestRadical );
0081       }
0082       delete newestRadical;
0083       QStringList lineElements = line.split( QRegExp( QStringLiteral( "\\s+" ) ) );
0084       newestRadical = new Radical(  lineElements.at( 1 )
0085                                   , lineElements.at( 2 ).toUInt()
0086                                   , m_radicals.size() );
0087     }
0088     else if( newestRadical != nullptr )
0089     {
0090       // List of m_kanji, potentially
0091       const QList<QString> m_kanjiList = line.trimmed().split( QLatin1String(""), Qt::SkipEmptyParts );
0092       const QSet<QString> kanjiSet = QSet<QString>(m_kanjiList.begin(), m_kanjiList.end());
0093       newestRadical->addKanji( kanjiSet );
0094       foreach( const QString &kanji, m_kanjiList )
0095       {
0096         krad[ kanji ] += newestRadical->toString();
0097       }
0098     }
0099   }
0100   if( newestRadical != nullptr )
0101   {
0102     m_radicals[ newestRadical->toString() ] = *newestRadical;
0103     delete newestRadical;
0104   }
0105 
0106   //Move contents of our krad QHash into our hash of m_kanji
0107   QHash<QString,QSet<QString> >::iterator it;
0108   for( it = krad.begin(); it != krad.end(); ++it )
0109   {
0110     m_kanji.insert(   it.key()
0111                     , Kanji( it.key()
0112                     , it.value() ) );
0113   }
0114   f.close();
0115   return true;
0116 }
0117 
0118 // Mostly copied from KanjiBrowser::loadKanji()
0119 bool RadicalFile::loadKanjidic( const QString &kanjidic )
0120 {
0121   DictFileKanjidic dictFileKanjidic;
0122   dictFileKanjidic.loadSettings();
0123   dictFileKanjidic.loadDictionary( kanjidic, KANJIDIC );
0124 
0125   QRegExp strokeMatch( "^S\\d+" );
0126   foreach( const QString &line, dictFileKanjidic.dumpDictionary() )
0127   {
0128     const QString kanji = line[ 0 ];
0129 
0130     QStringList strokesSection = line.split( " ", Qt::SkipEmptyParts )
0131                                      .filter( strokeMatch );
0132 
0133     unsigned int strokes = strokesSection.first().remove( 0, 1 ).toInt();
0134 
0135     if( m_kanji.contains( kanji ) ) {
0136       m_kanji[ kanji ].setStrokes( strokes );
0137     }
0138   }
0139 
0140   return true;
0141 }
0142 
0143 QMultiMap<int,Radical>* RadicalFile::mapRadicalsByStrokes( int max_strokes ) const
0144 {
0145   QMultiMap<int, Radical> *result = new QMultiMap<int, Radical>();
0146   foreach( const Radical &rad, m_radicals )
0147   {
0148     int strokes = rad.strokes();
0149     if( ( max_strokes > 0 ) && ( strokes > max_strokes ) )
0150     {
0151       strokes = max_strokes;
0152     }
0153     result->insert( strokes, rad );
0154   }
0155   return result;
0156 }
0157 
0158 QSet<QString> RadicalFile::radicalsInKanji( QSet<Kanji> &kanjilist ) const
0159 {
0160   QSet<QString> possibleRadicals;
0161   foreach( const QString &kanji, kanjilist )
0162   {
0163     possibleRadicals |= m_kanji[ kanji ].getRadicals();
0164   }
0165 
0166   return possibleRadicals;
0167 }