File indexing completed on 2024-06-23 05:55:39
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Search_Lucene 0017 * @subpackage Search 0018 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0019 * @license http://framework.zend.com/license/new-bsd New BSD License 0020 * @version $Id$ 0021 */ 0022 0023 /** Zend_Search_Lucene_Search_QueryToken */ 0024 // require_once 'Zend/Search/Lucene/Search/QueryToken.php'; 0025 0026 0027 /** 0028 * @category Zend 0029 * @package Zend_Search_Lucene 0030 * @subpackage Search 0031 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0032 * @license http://framework.zend.com/license/new-bsd New BSD License 0033 */ 0034 class Zend_Search_Lucene_Search_QueryParserContext 0035 { 0036 /** 0037 * Default field for the context. 0038 * 0039 * null means, that term should be searched through all fields 0040 * Zend_Search_Lucene_Search_Query::rewriteQuery($index) transletes such queries to several 0041 * 0042 * @var string|null 0043 */ 0044 private $_defaultField; 0045 0046 /** 0047 * Field specified for next entry 0048 * 0049 * @var string 0050 */ 0051 private $_nextEntryField = null; 0052 0053 /** 0054 * True means, that term is required. 0055 * False means, that term is prohibited. 0056 * null means, that term is neither prohibited, nor required 0057 * 0058 * @var boolean 0059 */ 0060 private $_nextEntrySign = null; 0061 0062 0063 /** 0064 * Entries grouping mode 0065 */ 0066 const GM_SIGNS = 0; // Signs mode: '+term1 term2 -term3 +(subquery1) -(subquery2)' 0067 const GM_BOOLEAN = 1; // Boolean operators mode: 'term1 and term2 or (subquery1) and not (subquery2)' 0068 0069 /** 0070 * Grouping mode 0071 * 0072 * @var integer 0073 */ 0074 private $_mode = null; 0075 0076 /** 0077 * Entries signs. 0078 * Used in GM_SIGNS grouping mode 0079 * 0080 * @var arrays 0081 */ 0082 private $_signs = array(); 0083 0084 /** 0085 * Query entries 0086 * Each entry is a Zend_Search_Lucene_Search_QueryEntry object or 0087 * boolean operator (Zend_Search_Lucene_Search_QueryToken class constant) 0088 * 0089 * @var array 0090 */ 0091 private $_entries = array(); 0092 0093 /** 0094 * Query string encoding 0095 * 0096 * @var string 0097 */ 0098 private $_encoding; 0099 0100 0101 /** 0102 * Context object constructor 0103 * 0104 * @param string $encoding 0105 * @param string|null $defaultField 0106 */ 0107 public function __construct($encoding, $defaultField = null) 0108 { 0109 $this->_encoding = $encoding; 0110 $this->_defaultField = $defaultField; 0111 } 0112 0113 0114 /** 0115 * Get context default field 0116 * 0117 * @return string|null 0118 */ 0119 public function getField() 0120 { 0121 return ($this->_nextEntryField !== null) ? $this->_nextEntryField : $this->_defaultField; 0122 } 0123 0124 /** 0125 * Set field for next entry 0126 * 0127 * @param string $field 0128 */ 0129 public function setNextEntryField($field) 0130 { 0131 $this->_nextEntryField = $field; 0132 } 0133 0134 0135 /** 0136 * Set sign for next entry 0137 * 0138 * @param integer $sign 0139 * @throws Zend_Search_Lucene_Exception 0140 */ 0141 public function setNextEntrySign($sign) 0142 { 0143 if ($this->_mode === self::GM_BOOLEAN) { 0144 // require_once 'Zend/Search/Lucene/Search/QueryParserException.php'; 0145 throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.'); 0146 } 0147 0148 $this->_mode = self::GM_SIGNS; 0149 0150 if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED) { 0151 $this->_nextEntrySign = true; 0152 } else if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED) { 0153 $this->_nextEntrySign = false; 0154 } else { 0155 // require_once 'Zend/Search/Lucene/Exception.php'; 0156 throw new Zend_Search_Lucene_Exception('Unrecognized sign type.'); 0157 } 0158 } 0159 0160 0161 /** 0162 * Add entry to a query 0163 * 0164 * @param Zend_Search_Lucene_Search_QueryEntry $entry 0165 */ 0166 public function addEntry(Zend_Search_Lucene_Search_QueryEntry $entry) 0167 { 0168 if ($this->_mode !== self::GM_BOOLEAN) { 0169 $this->_signs[] = $this->_nextEntrySign; 0170 } 0171 0172 $this->_entries[] = $entry; 0173 0174 $this->_nextEntryField = null; 0175 $this->_nextEntrySign = null; 0176 } 0177 0178 0179 /** 0180 * Process fuzzy search or proximity search modifier 0181 * 0182 * @throws Zend_Search_Lucene_Search_QueryParserException 0183 */ 0184 public function processFuzzyProximityModifier($parameter = null) 0185 { 0186 // Check, that modifier has came just after word or phrase 0187 if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) { 0188 // require_once 'Zend/Search/Lucene/Search/QueryParserException.php'; 0189 throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.'); 0190 } 0191 0192 $lastEntry = array_pop($this->_entries); 0193 0194 if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) { 0195 // there are no entries or last entry is boolean operator 0196 // require_once 'Zend/Search/Lucene/Search/QueryParserException.php'; 0197 throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.'); 0198 } 0199 0200 $lastEntry->processFuzzyProximityModifier($parameter); 0201 0202 $this->_entries[] = $lastEntry; 0203 } 0204 0205 /** 0206 * Set boost factor to the entry 0207 * 0208 * @param float $boostFactor 0209 */ 0210 public function boost($boostFactor) 0211 { 0212 // Check, that modifier has came just after word or phrase 0213 if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) { 0214 // require_once 'Zend/Search/Lucene/Search/QueryParserException.php'; 0215 throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.'); 0216 } 0217 0218 $lastEntry = array_pop($this->_entries); 0219 0220 if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) { 0221 // there are no entries or last entry is boolean operator 0222 // require_once 'Zend/Search/Lucene/Search/QueryParserException.php'; 0223 throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.'); 0224 } 0225 0226 $lastEntry->boost($boostFactor); 0227 0228 $this->_entries[] = $lastEntry; 0229 } 0230 0231 /** 0232 * Process logical operator 0233 * 0234 * @param integer $operator 0235 */ 0236 public function addLogicalOperator($operator) 0237 { 0238 if ($this->_mode === self::GM_SIGNS) { 0239 // require_once 'Zend/Search/Lucene/Search/QueryParserException.php'; 0240 throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.'); 0241 } 0242 0243 $this->_mode = self::GM_BOOLEAN; 0244 0245 $this->_entries[] = $operator; 0246 } 0247 0248 0249 /** 0250 * Generate 'signs style' query from the context 0251 * '+term1 term2 -term3 +(<subquery1>) ...' 0252 * 0253 * @return Zend_Search_Lucene_Search_Query 0254 */ 0255 public function _signStyleExpressionQuery() 0256 { 0257 // require_once 'Zend/Search/Lucene/Search/Query/Boolean.php'; 0258 $query = new Zend_Search_Lucene_Search_Query_Boolean(); 0259 0260 // require_once 'Zend/Search/Lucene/Search/QueryParser.php'; 0261 if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) { 0262 $defaultSign = true; // required 0263 } else { 0264 // Zend_Search_Lucene_Search_QueryParser::B_OR 0265 $defaultSign = null; // optional 0266 } 0267 0268 foreach ($this->_entries as $entryId => $entry) { 0269 $sign = ($this->_signs[$entryId] !== null) ? $this->_signs[$entryId] : $defaultSign; 0270 $query->addSubquery($entry->getQuery($this->_encoding), $sign); 0271 } 0272 0273 return $query; 0274 } 0275 0276 0277 /** 0278 * Generate 'boolean style' query from the context 0279 * 'term1 and term2 or term3 and (<subquery1>) and not (<subquery2>)' 0280 * 0281 * @return Zend_Search_Lucene_Search_Query 0282 * @throws Zend_Search_Lucene 0283 */ 0284 private function _booleanExpressionQuery() 0285 { 0286 /** 0287 * We treat each level of an expression as a boolean expression in 0288 * a Disjunctive Normal Form 0289 * 0290 * AND operator has higher precedence than OR 0291 * 0292 * Thus logical query is a disjunction of one or more conjunctions of 0293 * one or more query entries 0294 */ 0295 0296 // require_once 'Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php'; 0297 $expressionRecognizer = new Zend_Search_Lucene_Search_BooleanExpressionRecognizer(); 0298 0299 // require_once 'Zend/Search/Lucene/Exception.php'; 0300 try { 0301 foreach ($this->_entries as $entry) { 0302 if ($entry instanceof Zend_Search_Lucene_Search_QueryEntry) { 0303 $expressionRecognizer->processLiteral($entry); 0304 } else { 0305 switch ($entry) { 0306 case Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME: 0307 $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_AND_OPERATOR); 0308 break; 0309 0310 case Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME: 0311 $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_OR_OPERATOR); 0312 break; 0313 0314 case Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME: 0315 $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_NOT_OPERATOR); 0316 break; 0317 0318 default: 0319 throw new Zend_Search_Lucene('Boolean expression error. Unknown operator type.'); 0320 } 0321 } 0322 } 0323 0324 $conjuctions = $expressionRecognizer->finishExpression(); 0325 } catch (Zend_Search_Exception $e) { 0326 // throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error. Error message: \'' . 0327 // $e->getMessage() . '\'.' ); 0328 // It's query syntax error message and it should be user friendly. So FSM message is omitted 0329 // require_once 'Zend/Search/Lucene/Search/QueryParserException.php'; 0330 throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error.', 0, $e); 0331 } 0332 0333 // Remove 'only negative' conjunctions 0334 foreach ($conjuctions as $conjuctionId => $conjuction) { 0335 $nonNegativeEntryFound = false; 0336 0337 foreach ($conjuction as $conjuctionEntry) { 0338 if ($conjuctionEntry[1]) { 0339 $nonNegativeEntryFound = true; 0340 break; 0341 } 0342 } 0343 0344 if (!$nonNegativeEntryFound) { 0345 unset($conjuctions[$conjuctionId]); 0346 } 0347 } 0348 0349 0350 $subqueries = array(); 0351 foreach ($conjuctions as $conjuction) { 0352 // Check, if it's a one term conjuction 0353 if (count($conjuction) == 1) { 0354 $subqueries[] = $conjuction[0][0]->getQuery($this->_encoding); 0355 } else { 0356 // require_once 'Zend/Search/Lucene/Search/Query/Boolean.php'; 0357 $subquery = new Zend_Search_Lucene_Search_Query_Boolean(); 0358 0359 foreach ($conjuction as $conjuctionEntry) { 0360 $subquery->addSubquery($conjuctionEntry[0]->getQuery($this->_encoding), $conjuctionEntry[1]); 0361 } 0362 0363 $subqueries[] = $subquery; 0364 } 0365 } 0366 0367 if (count($subqueries) == 0) { 0368 // require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php'; 0369 return new Zend_Search_Lucene_Search_Query_Insignificant(); 0370 } 0371 0372 if (count($subqueries) == 1) { 0373 return $subqueries[0]; 0374 } 0375 0376 0377 // require_once 'Zend/Search/Lucene/Search/Query/Boolean.php'; 0378 $query = new Zend_Search_Lucene_Search_Query_Boolean(); 0379 0380 foreach ($subqueries as $subquery) { 0381 // Non-requirered entry/subquery 0382 $query->addSubquery($subquery); 0383 } 0384 0385 return $query; 0386 } 0387 0388 /** 0389 * Generate query from current context 0390 * 0391 * @return Zend_Search_Lucene_Search_Query 0392 */ 0393 public function getQuery() 0394 { 0395 if ($this->_mode === self::GM_BOOLEAN) { 0396 return $this->_booleanExpressionQuery(); 0397 } else { 0398 return $this->_signStyleExpressionQuery(); 0399 } 0400 } 0401 }