File indexing completed on 2024-06-23 05:55:39

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Search_Lucene
0017  * @subpackage Search
0018  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0019  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0020  * @version    $Id$
0021  */
0022 
0023 /** Zend_Search_Lucene_Search_QueryToken */
0024 // require_once 'Zend/Search/Lucene/Search/QueryToken.php';
0025 
0026 
0027 /**
0028  * @category   Zend
0029  * @package    Zend_Search_Lucene
0030  * @subpackage Search
0031  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0032  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0033  */
0034 class Zend_Search_Lucene_Search_QueryParserContext
0035 {
0036     /**
0037      * Default field for the context.
0038      *
0039      * null means, that term should be searched through all fields
0040      * Zend_Search_Lucene_Search_Query::rewriteQuery($index) transletes such queries to several
0041      *
0042      * @var string|null
0043      */
0044     private $_defaultField;
0045 
0046     /**
0047      * Field specified for next entry
0048      *
0049      * @var string
0050      */
0051     private $_nextEntryField = null;
0052 
0053     /**
0054      * True means, that term is required.
0055      * False means, that term is prohibited.
0056      * null means, that term is neither prohibited, nor required
0057      *
0058      * @var boolean
0059      */
0060     private $_nextEntrySign = null;
0061 
0062 
0063     /**
0064      * Entries grouping mode
0065      */
0066     const GM_SIGNS   = 0;  // Signs mode: '+term1 term2 -term3 +(subquery1) -(subquery2)'
0067     const GM_BOOLEAN = 1;  // Boolean operators mode: 'term1 and term2  or  (subquery1) and not (subquery2)'
0068 
0069     /**
0070      * Grouping mode
0071      *
0072      * @var integer
0073      */
0074     private $_mode = null;
0075 
0076     /**
0077      * Entries signs.
0078      * Used in GM_SIGNS grouping mode
0079      *
0080      * @var arrays
0081      */
0082     private $_signs = array();
0083 
0084     /**
0085      * Query entries
0086      * Each entry is a Zend_Search_Lucene_Search_QueryEntry object or
0087      * boolean operator (Zend_Search_Lucene_Search_QueryToken class constant)
0088      *
0089      * @var array
0090      */
0091     private $_entries = array();
0092 
0093     /**
0094      * Query string encoding
0095      *
0096      * @var string
0097      */
0098     private $_encoding;
0099 
0100 
0101     /**
0102      * Context object constructor
0103      *
0104      * @param string $encoding
0105      * @param string|null $defaultField
0106      */
0107     public function __construct($encoding, $defaultField = null)
0108     {
0109         $this->_encoding     = $encoding;
0110         $this->_defaultField = $defaultField;
0111     }
0112 
0113 
0114     /**
0115      * Get context default field
0116      *
0117      * @return string|null
0118      */
0119     public function getField()
0120     {
0121         return ($this->_nextEntryField !== null)  ?  $this->_nextEntryField : $this->_defaultField;
0122     }
0123 
0124     /**
0125      * Set field for next entry
0126      *
0127      * @param string $field
0128      */
0129     public function setNextEntryField($field)
0130     {
0131         $this->_nextEntryField = $field;
0132     }
0133 
0134 
0135     /**
0136      * Set sign for next entry
0137      *
0138      * @param integer $sign
0139      * @throws Zend_Search_Lucene_Exception
0140      */
0141     public function setNextEntrySign($sign)
0142     {
0143         if ($this->_mode === self::GM_BOOLEAN) {
0144             // require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
0145             throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
0146         }
0147 
0148         $this->_mode = self::GM_SIGNS;
0149 
0150         if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED) {
0151             $this->_nextEntrySign = true;
0152         } else if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED) {
0153             $this->_nextEntrySign = false;
0154         } else {
0155             // require_once 'Zend/Search/Lucene/Exception.php';
0156             throw new Zend_Search_Lucene_Exception('Unrecognized sign type.');
0157         }
0158     }
0159 
0160 
0161     /**
0162      * Add entry to a query
0163      *
0164      * @param Zend_Search_Lucene_Search_QueryEntry $entry
0165      */
0166     public function addEntry(Zend_Search_Lucene_Search_QueryEntry $entry)
0167     {
0168         if ($this->_mode !== self::GM_BOOLEAN) {
0169             $this->_signs[] = $this->_nextEntrySign;
0170         }
0171 
0172         $this->_entries[] = $entry;
0173 
0174         $this->_nextEntryField = null;
0175         $this->_nextEntrySign  = null;
0176     }
0177 
0178 
0179     /**
0180      * Process fuzzy search or proximity search modifier
0181      *
0182      * @throws Zend_Search_Lucene_Search_QueryParserException
0183      */
0184     public function processFuzzyProximityModifier($parameter = null)
0185     {
0186         // Check, that modifier has came just after word or phrase
0187         if ($this->_nextEntryField !== null  ||  $this->_nextEntrySign !== null) {
0188             // require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
0189             throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
0190         }
0191 
0192         $lastEntry = array_pop($this->_entries);
0193 
0194         if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
0195             // there are no entries or last entry is boolean operator
0196             // require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
0197             throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
0198         }
0199 
0200         $lastEntry->processFuzzyProximityModifier($parameter);
0201 
0202         $this->_entries[] = $lastEntry;
0203     }
0204 
0205     /**
0206      * Set boost factor to the entry
0207      *
0208      * @param float $boostFactor
0209      */
0210     public function boost($boostFactor)
0211     {
0212         // Check, that modifier has came just after word or phrase
0213         if ($this->_nextEntryField !== null  ||  $this->_nextEntrySign !== null) {
0214             // require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
0215             throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
0216         }
0217 
0218         $lastEntry = array_pop($this->_entries);
0219 
0220         if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
0221             // there are no entries or last entry is boolean operator
0222             // require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
0223             throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
0224         }
0225 
0226         $lastEntry->boost($boostFactor);
0227 
0228         $this->_entries[] = $lastEntry;
0229     }
0230 
0231     /**
0232      * Process logical operator
0233      *
0234      * @param integer $operator
0235      */
0236     public function addLogicalOperator($operator)
0237     {
0238         if ($this->_mode === self::GM_SIGNS) {
0239             // require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
0240             throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
0241         }
0242 
0243         $this->_mode = self::GM_BOOLEAN;
0244 
0245         $this->_entries[] = $operator;
0246     }
0247 
0248 
0249     /**
0250      * Generate 'signs style' query from the context
0251      * '+term1 term2 -term3 +(<subquery1>) ...'
0252      *
0253      * @return Zend_Search_Lucene_Search_Query
0254      */
0255     public function _signStyleExpressionQuery()
0256     {
0257         // require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
0258         $query = new Zend_Search_Lucene_Search_Query_Boolean();
0259 
0260         // require_once 'Zend/Search/Lucene/Search/QueryParser.php';
0261         if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
0262             $defaultSign = true; // required
0263         } else {
0264             // Zend_Search_Lucene_Search_QueryParser::B_OR
0265             $defaultSign = null; // optional
0266         }
0267 
0268         foreach ($this->_entries as $entryId => $entry) {
0269             $sign = ($this->_signs[$entryId] !== null) ?  $this->_signs[$entryId] : $defaultSign;
0270             $query->addSubquery($entry->getQuery($this->_encoding), $sign);
0271         }
0272 
0273         return $query;
0274     }
0275 
0276 
0277     /**
0278      * Generate 'boolean style' query from the context
0279      * 'term1 and term2   or   term3 and (<subquery1>) and not (<subquery2>)'
0280      *
0281      * @return Zend_Search_Lucene_Search_Query
0282      * @throws Zend_Search_Lucene
0283      */
0284     private function _booleanExpressionQuery()
0285     {
0286         /**
0287          * We treat each level of an expression as a boolean expression in
0288          * a Disjunctive Normal Form
0289          *
0290          * AND operator has higher precedence than OR
0291          *
0292          * Thus logical query is a disjunction of one or more conjunctions of
0293          * one or more query entries
0294          */
0295 
0296         // require_once 'Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php';
0297         $expressionRecognizer = new Zend_Search_Lucene_Search_BooleanExpressionRecognizer();
0298 
0299         // require_once 'Zend/Search/Lucene/Exception.php';
0300         try {
0301             foreach ($this->_entries as $entry) {
0302                 if ($entry instanceof Zend_Search_Lucene_Search_QueryEntry) {
0303                     $expressionRecognizer->processLiteral($entry);
0304                 } else {
0305                     switch ($entry) {
0306                         case Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME:
0307                             $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_AND_OPERATOR);
0308                             break;
0309 
0310                         case Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME:
0311                             $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_OR_OPERATOR);
0312                             break;
0313 
0314                         case Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME:
0315                             $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_NOT_OPERATOR);
0316                             break;
0317 
0318                         default:
0319                             throw new Zend_Search_Lucene('Boolean expression error. Unknown operator type.');
0320                     }
0321                 }
0322             }
0323 
0324             $conjuctions = $expressionRecognizer->finishExpression();
0325         } catch (Zend_Search_Exception $e) {
0326             // throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error. Error message: \'' .
0327             //                                                          $e->getMessage() . '\'.' );
0328             // It's query syntax error message and it should be user friendly. So FSM message is omitted
0329             // require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
0330             throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error.', 0, $e);
0331         }
0332 
0333         // Remove 'only negative' conjunctions
0334         foreach ($conjuctions as $conjuctionId => $conjuction) {
0335             $nonNegativeEntryFound = false;
0336 
0337             foreach ($conjuction as $conjuctionEntry) {
0338                 if ($conjuctionEntry[1]) {
0339                     $nonNegativeEntryFound = true;
0340                     break;
0341                 }
0342             }
0343 
0344             if (!$nonNegativeEntryFound) {
0345                 unset($conjuctions[$conjuctionId]);
0346             }
0347         }
0348 
0349 
0350         $subqueries = array();
0351         foreach ($conjuctions as  $conjuction) {
0352             // Check, if it's a one term conjuction
0353             if (count($conjuction) == 1) {
0354                 $subqueries[] = $conjuction[0][0]->getQuery($this->_encoding);
0355             } else {
0356                 // require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
0357                 $subquery = new Zend_Search_Lucene_Search_Query_Boolean();
0358 
0359                 foreach ($conjuction as $conjuctionEntry) {
0360                     $subquery->addSubquery($conjuctionEntry[0]->getQuery($this->_encoding), $conjuctionEntry[1]);
0361                 }
0362 
0363                 $subqueries[] = $subquery;
0364             }
0365         }
0366 
0367         if (count($subqueries) == 0) {
0368             // require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
0369             return new Zend_Search_Lucene_Search_Query_Insignificant();
0370         }
0371 
0372         if (count($subqueries) == 1) {
0373             return $subqueries[0];
0374         }
0375 
0376 
0377         // require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
0378         $query = new Zend_Search_Lucene_Search_Query_Boolean();
0379 
0380         foreach ($subqueries as $subquery) {
0381             // Non-requirered entry/subquery
0382             $query->addSubquery($subquery);
0383         }
0384 
0385         return $query;
0386     }
0387 
0388     /**
0389      * Generate query from current context
0390      *
0391      * @return Zend_Search_Lucene_Search_Query
0392      */
0393     public function getQuery()
0394     {
0395         if ($this->_mode === self::GM_BOOLEAN) {
0396             return $this->_booleanExpressionQuery();
0397         } else {
0398             return $this->_signStyleExpressionQuery();
0399         }
0400     }
0401 }