File indexing completed on 2024-06-23 05:55:39

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Search_Lucene
0017  * @subpackage Search
0018  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0019  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0020  * @version    $Id$
0021  */
0022 
0023 /**
0024  * @category   Zend
0025  * @package    Zend_Search_Lucene
0026  * @subpackage Search
0027  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0028  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0029  */
0030 class Zend_Search_Lucene_Search_QueryToken
0031 {
0032     /**
0033      * Token types.
0034      */
0035     const TT_WORD                 = 0;  // Word
0036     const TT_PHRASE               = 1;  // Phrase (one or several quoted words)
0037     const TT_FIELD                = 2;  // Field name in 'field:word', field:<phrase> or field:(<subquery>) pairs
0038     const TT_FIELD_INDICATOR      = 3;  // ':'
0039     const TT_REQUIRED             = 4;  // '+'
0040     const TT_PROHIBITED           = 5;  // '-'
0041     const TT_FUZZY_PROX_MARK      = 6;  // '~'
0042     const TT_BOOSTING_MARK        = 7;  // '^'
0043     const TT_RANGE_INCL_START     = 8;  // '['
0044     const TT_RANGE_INCL_END       = 9;  // ']'
0045     const TT_RANGE_EXCL_START     = 10; // '{'
0046     const TT_RANGE_EXCL_END       = 11; // '}'
0047     const TT_SUBQUERY_START       = 12; // '('
0048     const TT_SUBQUERY_END         = 13; // ')'
0049     const TT_AND_LEXEME           = 14; // 'AND' or 'and'
0050     const TT_OR_LEXEME            = 15; // 'OR'  or 'or'
0051     const TT_NOT_LEXEME           = 16; // 'NOT' or 'not'
0052     const TT_TO_LEXEME            = 17; // 'TO'  or 'to'
0053     const TT_NUMBER               = 18; // Number, like: 10, 0.8, .64, ....
0054 
0055 
0056     /**
0057      * Returns all possible lexeme types.
0058      * It's used for syntax analyzer state machine initialization
0059      *
0060      * @return array
0061      */
0062     public static function getTypes()
0063     {
0064         return array(   self::TT_WORD,
0065                         self::TT_PHRASE,
0066                         self::TT_FIELD,
0067                         self::TT_FIELD_INDICATOR,
0068                         self::TT_REQUIRED,
0069                         self::TT_PROHIBITED,
0070                         self::TT_FUZZY_PROX_MARK,
0071                         self::TT_BOOSTING_MARK,
0072                         self::TT_RANGE_INCL_START,
0073                         self::TT_RANGE_INCL_END,
0074                         self::TT_RANGE_EXCL_START,
0075                         self::TT_RANGE_EXCL_END,
0076                         self::TT_SUBQUERY_START,
0077                         self::TT_SUBQUERY_END,
0078                         self::TT_AND_LEXEME,
0079                         self::TT_OR_LEXEME,
0080                         self::TT_NOT_LEXEME,
0081                         self::TT_TO_LEXEME,
0082                         self::TT_NUMBER
0083                      );
0084     }
0085 
0086 
0087     /**
0088      * TokenCategories
0089      */
0090     const TC_WORD           = 0;   // Word
0091     const TC_PHRASE         = 1;   // Phrase (one or several quoted words)
0092     const TC_NUMBER         = 2;   // Nubers, which are used with syntax elements. Ex. roam~0.8
0093     const TC_SYNTAX_ELEMENT = 3;   // +  -  ( )  [ ]  { }  !  ||  && ~ ^
0094 
0095 
0096     /**
0097      * Token type.
0098      *
0099      * @var integer
0100      */
0101     public $type;
0102 
0103     /**
0104      * Token text.
0105      *
0106      * @var integer
0107      */
0108     public $text;
0109 
0110     /**
0111      * Token position within query.
0112      *
0113      * @var integer
0114      */
0115     public $position;
0116 
0117 
0118     /**
0119      * IndexReader constructor needs token type and token text as a parameters.
0120      *
0121      * @param integer $tokenCategory
0122      * @param string  $tokText
0123      * @param integer $position
0124      */
0125     public function __construct($tokenCategory, $tokenText, $position)
0126     {
0127         $this->text     = $tokenText;
0128         $this->position = $position + 1; // Start from 1
0129 
0130         switch ($tokenCategory) {
0131             case self::TC_WORD:
0132                 if (  strtolower($tokenText) == 'and') {
0133                     $this->type = self::TT_AND_LEXEME;
0134                 } else if (strtolower($tokenText) == 'or') {
0135                     $this->type = self::TT_OR_LEXEME;
0136                 } else if (strtolower($tokenText) == 'not') {
0137                     $this->type = self::TT_NOT_LEXEME;
0138                 } else if (strtolower($tokenText) == 'to') {
0139                     $this->type = self::TT_TO_LEXEME;
0140                 } else {
0141                     $this->type = self::TT_WORD;
0142                 }
0143                 break;
0144 
0145             case self::TC_PHRASE:
0146                 $this->type = self::TT_PHRASE;
0147                 break;
0148 
0149             case self::TC_NUMBER:
0150                 $this->type = self::TT_NUMBER;
0151                 break;
0152 
0153             case self::TC_SYNTAX_ELEMENT:
0154                 switch ($tokenText) {
0155                     case ':':
0156                         $this->type = self::TT_FIELD_INDICATOR;
0157                         break;
0158 
0159                     case '+':
0160                         $this->type = self::TT_REQUIRED;
0161                         break;
0162 
0163                     case '-':
0164                         $this->type = self::TT_PROHIBITED;
0165                         break;
0166 
0167                     case '~':
0168                         $this->type = self::TT_FUZZY_PROX_MARK;
0169                         break;
0170 
0171                     case '^':
0172                         $this->type = self::TT_BOOSTING_MARK;
0173                         break;
0174 
0175                     case '[':
0176                         $this->type = self::TT_RANGE_INCL_START;
0177                         break;
0178 
0179                     case ']':
0180                         $this->type = self::TT_RANGE_INCL_END;
0181                         break;
0182 
0183                     case '{':
0184                         $this->type = self::TT_RANGE_EXCL_START;
0185                         break;
0186 
0187                     case '}':
0188                         $this->type = self::TT_RANGE_EXCL_END;
0189                         break;
0190 
0191                     case '(':
0192                         $this->type = self::TT_SUBQUERY_START;
0193                         break;
0194 
0195                     case ')':
0196                         $this->type = self::TT_SUBQUERY_END;
0197                         break;
0198 
0199                     case '!':
0200                         $this->type = self::TT_NOT_LEXEME;
0201                         break;
0202 
0203                     case '&&':
0204                         $this->type = self::TT_AND_LEXEME;
0205                         break;
0206 
0207                     case '||':
0208                         $this->type = self::TT_OR_LEXEME;
0209                         break;
0210 
0211                     default:
0212                         // require_once 'Zend/Search/Lucene/Exception.php';
0213                         throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\'');
0214                 }
0215                 break;
0216 
0217             case self::TC_NUMBER:
0218                 $this->type = self::TT_NUMBER;
0219 
0220             default:
0221                 // require_once 'Zend/Search/Lucene/Exception.php';
0222                 throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\'');
0223         }
0224     }
0225 }