File indexing completed on 2024-06-23 05:55:39
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Search_Lucene 0017 * @subpackage Search 0018 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0019 * @license http://framework.zend.com/license/new-bsd New BSD License 0020 * @version $Id$ 0021 */ 0022 0023 /** 0024 * @category Zend 0025 * @package Zend_Search_Lucene 0026 * @subpackage Search 0027 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0028 * @license http://framework.zend.com/license/new-bsd New BSD License 0029 */ 0030 class Zend_Search_Lucene_Search_QueryToken 0031 { 0032 /** 0033 * Token types. 0034 */ 0035 const TT_WORD = 0; // Word 0036 const TT_PHRASE = 1; // Phrase (one or several quoted words) 0037 const TT_FIELD = 2; // Field name in 'field:word', field:<phrase> or field:(<subquery>) pairs 0038 const TT_FIELD_INDICATOR = 3; // ':' 0039 const TT_REQUIRED = 4; // '+' 0040 const TT_PROHIBITED = 5; // '-' 0041 const TT_FUZZY_PROX_MARK = 6; // '~' 0042 const TT_BOOSTING_MARK = 7; // '^' 0043 const TT_RANGE_INCL_START = 8; // '[' 0044 const TT_RANGE_INCL_END = 9; // ']' 0045 const TT_RANGE_EXCL_START = 10; // '{' 0046 const TT_RANGE_EXCL_END = 11; // '}' 0047 const TT_SUBQUERY_START = 12; // '(' 0048 const TT_SUBQUERY_END = 13; // ')' 0049 const TT_AND_LEXEME = 14; // 'AND' or 'and' 0050 const TT_OR_LEXEME = 15; // 'OR' or 'or' 0051 const TT_NOT_LEXEME = 16; // 'NOT' or 'not' 0052 const TT_TO_LEXEME = 17; // 'TO' or 'to' 0053 const TT_NUMBER = 18; // Number, like: 10, 0.8, .64, .... 0054 0055 0056 /** 0057 * Returns all possible lexeme types. 0058 * It's used for syntax analyzer state machine initialization 0059 * 0060 * @return array 0061 */ 0062 public static function getTypes() 0063 { 0064 return array( self::TT_WORD, 0065 self::TT_PHRASE, 0066 self::TT_FIELD, 0067 self::TT_FIELD_INDICATOR, 0068 self::TT_REQUIRED, 0069 self::TT_PROHIBITED, 0070 self::TT_FUZZY_PROX_MARK, 0071 self::TT_BOOSTING_MARK, 0072 self::TT_RANGE_INCL_START, 0073 self::TT_RANGE_INCL_END, 0074 self::TT_RANGE_EXCL_START, 0075 self::TT_RANGE_EXCL_END, 0076 self::TT_SUBQUERY_START, 0077 self::TT_SUBQUERY_END, 0078 self::TT_AND_LEXEME, 0079 self::TT_OR_LEXEME, 0080 self::TT_NOT_LEXEME, 0081 self::TT_TO_LEXEME, 0082 self::TT_NUMBER 0083 ); 0084 } 0085 0086 0087 /** 0088 * TokenCategories 0089 */ 0090 const TC_WORD = 0; // Word 0091 const TC_PHRASE = 1; // Phrase (one or several quoted words) 0092 const TC_NUMBER = 2; // Nubers, which are used with syntax elements. Ex. roam~0.8 0093 const TC_SYNTAX_ELEMENT = 3; // + - ( ) [ ] { } ! || && ~ ^ 0094 0095 0096 /** 0097 * Token type. 0098 * 0099 * @var integer 0100 */ 0101 public $type; 0102 0103 /** 0104 * Token text. 0105 * 0106 * @var integer 0107 */ 0108 public $text; 0109 0110 /** 0111 * Token position within query. 0112 * 0113 * @var integer 0114 */ 0115 public $position; 0116 0117 0118 /** 0119 * IndexReader constructor needs token type and token text as a parameters. 0120 * 0121 * @param integer $tokenCategory 0122 * @param string $tokText 0123 * @param integer $position 0124 */ 0125 public function __construct($tokenCategory, $tokenText, $position) 0126 { 0127 $this->text = $tokenText; 0128 $this->position = $position + 1; // Start from 1 0129 0130 switch ($tokenCategory) { 0131 case self::TC_WORD: 0132 if ( strtolower($tokenText) == 'and') { 0133 $this->type = self::TT_AND_LEXEME; 0134 } else if (strtolower($tokenText) == 'or') { 0135 $this->type = self::TT_OR_LEXEME; 0136 } else if (strtolower($tokenText) == 'not') { 0137 $this->type = self::TT_NOT_LEXEME; 0138 } else if (strtolower($tokenText) == 'to') { 0139 $this->type = self::TT_TO_LEXEME; 0140 } else { 0141 $this->type = self::TT_WORD; 0142 } 0143 break; 0144 0145 case self::TC_PHRASE: 0146 $this->type = self::TT_PHRASE; 0147 break; 0148 0149 case self::TC_NUMBER: 0150 $this->type = self::TT_NUMBER; 0151 break; 0152 0153 case self::TC_SYNTAX_ELEMENT: 0154 switch ($tokenText) { 0155 case ':': 0156 $this->type = self::TT_FIELD_INDICATOR; 0157 break; 0158 0159 case '+': 0160 $this->type = self::TT_REQUIRED; 0161 break; 0162 0163 case '-': 0164 $this->type = self::TT_PROHIBITED; 0165 break; 0166 0167 case '~': 0168 $this->type = self::TT_FUZZY_PROX_MARK; 0169 break; 0170 0171 case '^': 0172 $this->type = self::TT_BOOSTING_MARK; 0173 break; 0174 0175 case '[': 0176 $this->type = self::TT_RANGE_INCL_START; 0177 break; 0178 0179 case ']': 0180 $this->type = self::TT_RANGE_INCL_END; 0181 break; 0182 0183 case '{': 0184 $this->type = self::TT_RANGE_EXCL_START; 0185 break; 0186 0187 case '}': 0188 $this->type = self::TT_RANGE_EXCL_END; 0189 break; 0190 0191 case '(': 0192 $this->type = self::TT_SUBQUERY_START; 0193 break; 0194 0195 case ')': 0196 $this->type = self::TT_SUBQUERY_END; 0197 break; 0198 0199 case '!': 0200 $this->type = self::TT_NOT_LEXEME; 0201 break; 0202 0203 case '&&': 0204 $this->type = self::TT_AND_LEXEME; 0205 break; 0206 0207 case '||': 0208 $this->type = self::TT_OR_LEXEME; 0209 break; 0210 0211 default: 0212 // require_once 'Zend/Search/Lucene/Exception.php'; 0213 throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\''); 0214 } 0215 break; 0216 0217 case self::TC_NUMBER: 0218 $this->type = self::TT_NUMBER; 0219 0220 default: 0221 // require_once 'Zend/Search/Lucene/Exception.php'; 0222 throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\''); 0223 } 0224 } 0225 }