File indexing completed on 2024-12-01 03:29:23

0001 /*
0002     This file is part of Kiten, a KDE Japanese Reference Tool
0003     SPDX-FileCopyrightText: 2006 Joseph Kerian <jkerian@gmail.com>
0004     SPDX-FileCopyrightText: 2011 Daniel E. Moctezuma <democtezuma@gmail.com>
0005 
0006     SPDX-License-Identifier: LGPL-2.0-or-later
0007 */
0008 
0009 #ifndef KITEN_DICTQUERY_H
0010 #define KITEN_DICTQUERY_H
0011 
0012 #include <QHash>
0013 #include <QHashIterator>
0014 #include <QString>
0015 #include <QStringList>
0016 
0017 class QChar;
0018 
0019 #include "kiten_export.h"
0020 
0021 /**
0022  * @short A class to allow users of libkiten to properly setup a database
0023  * query.
0024  *
0025  * In general, you either pass or parse in parameters from users or automated
0026  * programs to be later sent to the dictionary manager.
0027  *
0028  * This class is one of the three critical classes (along with
0029  * dictionary and EntryList) that are needed to use libkiten. Essentially...
0030  * you feed the dictionary class a DictQuery, and dictionary will return an
0031  * EntryList that matches the query.
0032  *
0033  * @code
0034  * dictionary dictManager();
0035  * //Load some dictionaries via dictionary class methods
0036  * EntryList *results;
0037  * DictQuery myQuery("kanji");
0038  * results = dictManager.doSearch(myQuery);
0039  * //Print results (if any)
0040  * @endcode
0041  *
0042  * The internal implementation understands four distinct types of data:
0043  * Japanese Kanji
0044  * Japanese Kana
0045  * English Characters
0046  * Property Pairs of the form \<i\>name\</i\>:\<i\>value\</i\>
0047  *
0048  * It is left up to the individual dictionary types to parse these values
0049  * for matching and appropriateness to each dictionary.
0050  * You can use the setDictionaries() method to narrow the range of the
0051  * dictionaries that it will apply to.
0052  *
0053  * A DictQuery object can be considered an "unordered set" of values.
0054  * When testing for equality or comparison, each property or text entry above
0055  * is seen as a unique item in a set. Order is not important for these
0056  * operations. The object will attempt to preserve the order from parsed
0057  * strings, but it does not consider this order to be important.
0058  *
0059  * In general, application level programs are expected to use the
0060  * QString based interfaces, and dictionary implementations and other
0061  * parts of libkiten are expected to use the direct accessors and mutators,
0062  * although specialized interfaces (such as kitenradselect) may use property
0063  * mutators for a limited set of properties. (in this case, radicals)
0064  *
0065  * The query string input is of the following format:
0066  * &lt;QS&gt; ::= &lt;M&gt;DictQuery::mainDelimiter&lt;QS&gt;|&lt;R&gt;DictQuery::mainDelimiter&lt;QS&gt;|
0067  *       &lt;O&gt;DictQuery::mainDelimiter&lt;QS&gt;|NULL
0068  * &lt;M&gt;  ::= kana&lt;M&gt;|kana
0069  * &lt;R&gt;  ::= character&lt;R&gt;|character
0070  * &lt;O&gt;  ::= &lt;C&gt;DictQuery::propertySeparator&lt;D&gt;
0071  * &lt;C&gt;  ::= character&lt;C&gt;|character
0072  * &lt;D&gt;  ::= character&lt;D&gt;|character
0073  *
0074  * @author Joseph Kerian \<jkerian@gmail.com>
0075  */
0076 class KITEN_EXPORT DictQuery
0077 {
0078 public:
0079     /**
0080      * This is the main delimiter that the DictQuery uses when parsing strings.
0081      * It is set to "space" at the moment.
0082      */
0083     static const QString mainDelimiter;
0084     /**
0085      * This is the delimiter that DictQuery uses when parsing property strings
0086      * of the form <i>strokes:4</i>. It is set to ":" at the moment.
0087      */
0088     static const QString propertySeperator;
0089     /**
0090      * Normal constructor.
0091      *
0092      * This will create an empty query object.
0093      */
0094     DictQuery();
0095     /**
0096      * Constructor with a given QString.
0097      *
0098      * @param str the QString will be parsed as described below in operator=(const QString&)
0099      */
0100     explicit DictQuery(const QString &str);
0101     /**
0102      * Copy constructor
0103      *
0104      * @param orig the original DictQuery to be copied
0105      */
0106     DictQuery(const DictQuery &orig);
0107     /**
0108      * Destructor
0109      */
0110     ~DictQuery();
0111 
0112     /**
0113      * @return true if the DictQuery is completely empty
0114      */
0115     bool isEmpty() const;
0116     /**
0117      * Removes all text/entries from the DictQuery
0118      */
0119     void clear();
0120     /**
0121      * The assignment copy operator
0122      */
0123     DictQuery &operator=(const DictQuery &old);
0124     /**
0125      * The clone method
0126      */
0127     DictQuery *clone() const;
0128     /**
0129      * This returns a QString that represents the query. This may be the same
0130      * as the original string, but some slight changes may have occurred if you
0131      * have done any manipulations on the DictQuery.
0132      */
0133     const QString toString() const;
0134     /**
0135      * This is a conversion to a QString... useful in a surprising
0136      * number of cases.
0137      */
0138     operator QString() const;
0139 
0140     /**
0141      * Use this to get a list of all the property keys in the query
0142      */
0143     const QList<QString> listPropertyKeys() const;
0144     /**
0145      * Returns a given extended attribute
0146      */
0147     const QString operator[](const QString &key) const;
0148     /**
0149      * Sets a given extended attribute
0150      */
0151     QString operator[](const QString &key);
0152     /**
0153      * Get a specific property by key (is the same as using operator[] const)
0154      */
0155     QString getProperty(const QString &key) const;
0156     /**
0157      * Verify if a given DictQuery object has a search parameter of a
0158      * particular property.
0159      */
0160     bool hasProperty(const QString &key) const;
0161     /**
0162      * Set a particular property... this does significantly more error checking
0163      * than the operator[] version, and will return false if there was a
0164      * problem (an empty value or bad key)
0165      *
0166      * @param key the key for this entry
0167      * @param value the value to set this to, will overwrite the current contents of this location
0168      *
0169      * @returns false on failure
0170      */
0171     bool setProperty(const QString &key, const QString &value);
0172     /**
0173      * Remove all instances of a property.
0174      *
0175      * @returns true if the DictQuery had properties of the given type
0176      */
0177     bool removeProperty(const QString &key);
0178     /**
0179      * Returns and removes the property
0180      */
0181     QString takeProperty(const QString &key);
0182 
0183     /**
0184      * Returns a list of the dictionaries that this particular query
0185      * will target. An empty list (the default) will search all dictionaries
0186      * that the user has selected.
0187      */
0188     QStringList getDictionaries() const;
0189     /**
0190      * Set the list of dictionaries to search. This will be read and used
0191      * by the dictionary manager.
0192      */
0193     void setDictionaries(const QStringList &newDictionaries);
0194 
0195     /**
0196      * Accessor for the non-japanese meaning field
0197      */
0198     QString getMeaning() const;
0199     /**
0200      * Mutator for the Meaning field
0201      */
0202     bool setMeaning(const QString &newMeaning);
0203     /**
0204      * Accessor for the Pronunciation field (generally kana)
0205      */
0206     QString getPronunciation() const;
0207     /**
0208      * Mutator for the Pronunciation field
0209      */
0210     bool setPronunciation(const QString &newPronunciation);
0211     /**
0212      * Accessor for the Word/Kanji field (this is usually used for anything
0213      * containing kanji).
0214      */
0215     QString getWord() const;
0216     /**
0217      * Mutator for the Word/Kanji field. If what you are setting contains
0218      * only kana, consider using the setPronunciation instead.
0219      */
0220     bool setWord(const QString &newWord);
0221 
0222     /**
0223      * A simple setwise comparison of two DictQuery objects
0224      * Note that order is not important here... only each element
0225      * that is one of the DictQuery objects appears in the other.
0226      */
0227     KITEN_EXPORT friend bool operator==(const DictQuery &a, const DictQuery &b);
0228     /**
0229      * Convenient inverted override of operator==( DictQuery, DictQuery )
0230      */
0231     KITEN_EXPORT friend bool operator!=(const DictQuery &other, const DictQuery &query);
0232     /**
0233      * Set-wise strictly less than. A better way to think of this
0234      * might be the "subset" operator
0235      */
0236     KITEN_EXPORT friend bool operator<(const DictQuery &a, const DictQuery &b);
0237     /**
0238      * Convenient override of operator<( DictQuery, DictQuery ) and operator==
0239      */
0240     KITEN_EXPORT friend bool operator<=(const DictQuery &a, const DictQuery &b);
0241     /**
0242      * This will append the properties and other elements of the added kanji
0243      * onto the elements of the current element. If regenerated as a string,
0244      * it should look something like concatenation
0245      */
0246     DictQuery &operator+=(const DictQuery &old);
0247     /**
0248      * A simple string parser, look above for examples and explanations
0249      */
0250     DictQuery &operator=(const QString &old);
0251     /**
0252      * A simple override of operator+=( const DictQuery& )
0253      */
0254     DictQuery &operator+=(const QString &old);
0255     /**
0256      * Simple addition... similar to operator+=
0257      */
0258     KITEN_EXPORT friend DictQuery operator+(const DictQuery &a, const DictQuery &b);
0259 #ifndef QT_NO_CAST_ASCII
0260     /**
0261      * An ascii cast variant of the operator=
0262      * Only available if QT_NO_CAST_ASCII is not defined on lib compilation
0263      */
0264     DictQuery &operator=(const char *);
0265 #endif
0266 
0267     // Specify the type of matching
0268     /**
0269      * This enum is used to define the type of matching this query is supposed
0270      * to do. The names are fairly self-explanatory
0271      */
0272     enum MatchType { Exact, Beginning, Ending, Anywhere };
0273     /**
0274      * Get which match type is currently set on the DictQuery.
0275      */
0276     MatchType getMatchType() const;
0277     /**
0278      * Set a match type. If this is not called, the default is matchExact.
0279      */
0280     void setMatchType(MatchType newType);
0281 
0282     /**
0283      * This enum is used to define the type of matching this query is supposed
0284      * to do.
0285      */
0286     enum MatchWordType { Any, Verb, Noun, Adjective, Adverb, Prefix, Suffix, Expression };
0287     /**
0288      * Get which word type is currently set on the DictQuery.
0289      */
0290     MatchWordType getMatchWordType() const;
0291     /**
0292      * Set a word type. If this is not called, the default value is 'Any'.
0293      */
0294     void setMatchWordType(MatchWordType newType);
0295 
0296     enum FilterType { NoFilter, Rare, CommonUncommon };
0297     /**
0298      * Get which filter is currently set on the DictQuery.
0299      */
0300     FilterType getFilterType() const;
0301     /**
0302      * Set whether or not the query should output results separated in
0303      * common and uncommon sections.
0304      */
0305     void setFilterType(FilterType newType);
0306 
0307     /**
0308      * This enum is used as the return type for the two utility functions,
0309      * stringTypeCheck and charTypeCheck.
0310      */
0311     enum StringTypeEnum { Kanji, Kana, Latin, Mixed, ParseError };
0312     /**
0313      * A simple utility routine to tell us what sort of string we have
0314      * If the string contains only kanji, kana or non-kanji/kana characters, the result is strTypeKanji,
0315      * strTypeKana or strTypeLatin (perhaps a misnomer... but so far it's valid).
0316      * If the string contains both kanji and kana, the type returned is strTypeKanji
0317      * If the string contains any other combination, the return type is mixed.
0318      */
0319     static StringTypeEnum stringTypeCheck(const QString &in);
0320     /**
0321      * This utility does the same thing for QChar as stringTypeCheck does for QString. At the moment
0322      * the implementation is rather simple, and it assumes that anything that is not latin1 or kana is
0323      * a kanji.
0324      */
0325     static StringTypeEnum charTypeCheck(const QChar &ch);
0326 
0327 private:
0328     class Private;
0329     Private *const d;
0330 };
0331 
0332 // Currently... KDE doesn't seem to want to use exceptions
0333 #ifdef LIBKITEN_USING_EXCEPTIONS
0334 class InvalidQueryException
0335 {
0336 public:
0337     InvalidQueryException(QString x)
0338     {
0339         m_val = x;
0340     }
0341     InvalidQueryException(QString m = "Invalid Query String", QString x)
0342     {
0343         m_val = x;
0344         m_msg = m;
0345     }
0346     QString value()
0347     {
0348         return m_val;
0349     }
0350     QString message()
0351     {
0352         return m_msg;
0353     }
0354 
0355 protected:
0356     QString m_val;
0357     QString m_msg;
0358 };
0359 #endif
0360 
0361 #endif