File indexing completed on 2024-06-23 05:55:39

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Search_Lucene
0017  * @subpackage Search
0018  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0019  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0020  * @version    $Id$
0021  */
0022 
0023 /** Zend_Search_Lucene_FSM */
0024 // require_once 'Zend/Search/Lucene/FSM.php';
0025 
0026 /** Zend_Search_Lucene_Search_QueryParser */
0027 // require_once 'Zend/Search/Lucene/Search/QueryToken.php';
0028 
0029 /**
0030  * @category   Zend
0031  * @package    Zend_Search_Lucene
0032  * @subpackage Search
0033  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0034  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0035  */
0036 class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
0037 {
0038     /** State Machine states */
0039     const ST_WHITE_SPACE     = 0;
0040     const ST_SYNT_LEXEME     = 1;
0041     const ST_LEXEME          = 2;
0042     const ST_QUOTED_LEXEME   = 3;
0043     const ST_ESCAPED_CHAR    = 4;
0044     const ST_ESCAPED_QCHAR   = 5;
0045     const ST_LEXEME_MODIFIER = 6;
0046     const ST_NUMBER          = 7;
0047     const ST_MANTISSA        = 8;
0048     const ST_ERROR           = 9;
0049 
0050     /** Input symbols */
0051     const IN_WHITE_SPACE     = 0;
0052     const IN_SYNT_CHAR       = 1;
0053     const IN_LEXEME_MODIFIER = 2;
0054     const IN_ESCAPE_CHAR     = 3;
0055     const IN_QUOTE           = 4;
0056     const IN_DECIMAL_POINT   = 5;
0057     const IN_ASCII_DIGIT     = 6;
0058     const IN_CHAR            = 7;
0059     const IN_MUTABLE_CHAR    = 8;
0060 
0061     const QUERY_WHITE_SPACE_CHARS      = " \n\r\t";
0062     const QUERY_SYNT_CHARS             = ':()[]{}!|&';
0063     const QUERY_MUTABLE_CHARS          = '+-';
0064     const QUERY_DOUBLECHARLEXEME_CHARS = '|&';
0065     const QUERY_LEXEMEMODIFIER_CHARS   = '~^';
0066     const QUERY_ASCIIDIGITS_CHARS      = '0123456789';
0067 
0068     /**
0069      * List of recognized lexemes
0070      *
0071      * @var array
0072      */
0073     private $_lexemes;
0074 
0075     /**
0076      * Query string (array of single- or non single-byte characters)
0077      *
0078      * @var array
0079      */
0080     private $_queryString;
0081 
0082     /**
0083      * Current position within a query string
0084      * Used to create appropriate error messages
0085      *
0086      * @var integer
0087      */
0088     private $_queryStringPosition;
0089 
0090     /**
0091      * Recognized part of current lexeme
0092      *
0093      * @var string
0094      */
0095     private $_currentLexeme;
0096 
0097     public function __construct()
0098     {
0099         parent::__construct( array(self::ST_WHITE_SPACE,
0100                                    self::ST_SYNT_LEXEME,
0101                                    self::ST_LEXEME,
0102                                    self::ST_QUOTED_LEXEME,
0103                                    self::ST_ESCAPED_CHAR,
0104                                    self::ST_ESCAPED_QCHAR,
0105                                    self::ST_LEXEME_MODIFIER,
0106                                    self::ST_NUMBER,
0107                                    self::ST_MANTISSA,
0108                                    self::ST_ERROR),
0109                              array(self::IN_WHITE_SPACE,
0110                                    self::IN_SYNT_CHAR,
0111                                    self::IN_MUTABLE_CHAR,
0112                                    self::IN_LEXEME_MODIFIER,
0113                                    self::IN_ESCAPE_CHAR,
0114                                    self::IN_QUOTE,
0115                                    self::IN_DECIMAL_POINT,
0116                                    self::IN_ASCII_DIGIT,
0117                                    self::IN_CHAR));
0118 
0119 
0120         $lexemeModifierErrorAction    = new Zend_Search_Lucene_FSMAction($this, 'lexModifierErrException');
0121         $quoteWithinLexemeErrorAction = new Zend_Search_Lucene_FSMAction($this, 'quoteWithinLexemeErrException');
0122         $wrongNumberErrorAction       = new Zend_Search_Lucene_FSMAction($this, 'wrongNumberErrException');
0123 
0124 
0125 
0126         $this->addRules(array( array(self::ST_WHITE_SPACE,   self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
0127                                array(self::ST_WHITE_SPACE,   self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
0128                                array(self::ST_WHITE_SPACE,   self::IN_MUTABLE_CHAR,    self::ST_SYNT_LEXEME),
0129                                array(self::ST_WHITE_SPACE,   self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
0130                                array(self::ST_WHITE_SPACE,   self::IN_ESCAPE_CHAR,     self::ST_ESCAPED_CHAR),
0131                                array(self::ST_WHITE_SPACE,   self::IN_QUOTE,           self::ST_QUOTED_LEXEME),
0132                                array(self::ST_WHITE_SPACE,   self::IN_DECIMAL_POINT,   self::ST_LEXEME),
0133                                array(self::ST_WHITE_SPACE,   self::IN_ASCII_DIGIT,     self::ST_LEXEME),
0134                                array(self::ST_WHITE_SPACE,   self::IN_CHAR,            self::ST_LEXEME)
0135                              ));
0136         $this->addRules(array( array(self::ST_SYNT_LEXEME,   self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
0137                                array(self::ST_SYNT_LEXEME,   self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
0138                                array(self::ST_SYNT_LEXEME,   self::IN_MUTABLE_CHAR,    self::ST_SYNT_LEXEME),
0139                                array(self::ST_SYNT_LEXEME,   self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
0140                                array(self::ST_SYNT_LEXEME,   self::IN_ESCAPE_CHAR,     self::ST_ESCAPED_CHAR),
0141                                array(self::ST_SYNT_LEXEME,   self::IN_QUOTE,           self::ST_QUOTED_LEXEME),
0142                                array(self::ST_SYNT_LEXEME,   self::IN_DECIMAL_POINT,   self::ST_LEXEME),
0143                                array(self::ST_SYNT_LEXEME,   self::IN_ASCII_DIGIT,     self::ST_LEXEME),
0144                                array(self::ST_SYNT_LEXEME,   self::IN_CHAR,            self::ST_LEXEME)
0145                              ));
0146         $this->addRules(array( array(self::ST_LEXEME,        self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
0147                                array(self::ST_LEXEME,        self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
0148                                array(self::ST_LEXEME,        self::IN_MUTABLE_CHAR,    self::ST_LEXEME),
0149                                array(self::ST_LEXEME,        self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
0150                                array(self::ST_LEXEME,        self::IN_ESCAPE_CHAR,     self::ST_ESCAPED_CHAR),
0151 
0152                                // IN_QUOTE     not allowed
0153                                array(self::ST_LEXEME,        self::IN_QUOTE,           self::ST_ERROR, $quoteWithinLexemeErrorAction),
0154 
0155                                array(self::ST_LEXEME,        self::IN_DECIMAL_POINT,   self::ST_LEXEME),
0156                                array(self::ST_LEXEME,        self::IN_ASCII_DIGIT,     self::ST_LEXEME),
0157                                array(self::ST_LEXEME,        self::IN_CHAR,            self::ST_LEXEME)
0158                              ));
0159         $this->addRules(array( array(self::ST_QUOTED_LEXEME, self::IN_WHITE_SPACE,     self::ST_QUOTED_LEXEME),
0160                                array(self::ST_QUOTED_LEXEME, self::IN_SYNT_CHAR,       self::ST_QUOTED_LEXEME),
0161                                array(self::ST_QUOTED_LEXEME, self::IN_MUTABLE_CHAR,    self::ST_QUOTED_LEXEME),
0162                                array(self::ST_QUOTED_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
0163                                array(self::ST_QUOTED_LEXEME, self::IN_ESCAPE_CHAR,     self::ST_ESCAPED_QCHAR),
0164                                array(self::ST_QUOTED_LEXEME, self::IN_QUOTE,           self::ST_WHITE_SPACE),
0165                                array(self::ST_QUOTED_LEXEME, self::IN_DECIMAL_POINT,   self::ST_QUOTED_LEXEME),
0166                                array(self::ST_QUOTED_LEXEME, self::IN_ASCII_DIGIT,     self::ST_QUOTED_LEXEME),
0167                                array(self::ST_QUOTED_LEXEME, self::IN_CHAR,            self::ST_QUOTED_LEXEME)
0168                              ));
0169         $this->addRules(array( array(self::ST_ESCAPED_CHAR,  self::IN_WHITE_SPACE,     self::ST_LEXEME),
0170                                array(self::ST_ESCAPED_CHAR,  self::IN_SYNT_CHAR,       self::ST_LEXEME),
0171                                array(self::ST_ESCAPED_CHAR,  self::IN_MUTABLE_CHAR,    self::ST_LEXEME),
0172                                array(self::ST_ESCAPED_CHAR,  self::IN_LEXEME_MODIFIER, self::ST_LEXEME),
0173                                array(self::ST_ESCAPED_CHAR,  self::IN_ESCAPE_CHAR,     self::ST_LEXEME),
0174                                array(self::ST_ESCAPED_CHAR,  self::IN_QUOTE,           self::ST_LEXEME),
0175                                array(self::ST_ESCAPED_CHAR,  self::IN_DECIMAL_POINT,   self::ST_LEXEME),
0176                                array(self::ST_ESCAPED_CHAR,  self::IN_ASCII_DIGIT,     self::ST_LEXEME),
0177                                array(self::ST_ESCAPED_CHAR,  self::IN_CHAR,            self::ST_LEXEME)
0178                              ));
0179         $this->addRules(array( array(self::ST_ESCAPED_QCHAR, self::IN_WHITE_SPACE,     self::ST_QUOTED_LEXEME),
0180                                array(self::ST_ESCAPED_QCHAR, self::IN_SYNT_CHAR,       self::ST_QUOTED_LEXEME),
0181                                array(self::ST_ESCAPED_QCHAR, self::IN_MUTABLE_CHAR,    self::ST_QUOTED_LEXEME),
0182                                array(self::ST_ESCAPED_QCHAR, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
0183                                array(self::ST_ESCAPED_QCHAR, self::IN_ESCAPE_CHAR,     self::ST_QUOTED_LEXEME),
0184                                array(self::ST_ESCAPED_QCHAR, self::IN_QUOTE,           self::ST_QUOTED_LEXEME),
0185                                array(self::ST_ESCAPED_QCHAR, self::IN_DECIMAL_POINT,   self::ST_QUOTED_LEXEME),
0186                                array(self::ST_ESCAPED_QCHAR, self::IN_ASCII_DIGIT,     self::ST_QUOTED_LEXEME),
0187                                array(self::ST_ESCAPED_QCHAR, self::IN_CHAR,            self::ST_QUOTED_LEXEME)
0188                              ));
0189         $this->addRules(array( array(self::ST_LEXEME_MODIFIER, self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
0190                                array(self::ST_LEXEME_MODIFIER, self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
0191                                array(self::ST_LEXEME_MODIFIER, self::IN_MUTABLE_CHAR,    self::ST_SYNT_LEXEME),
0192                                array(self::ST_LEXEME_MODIFIER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
0193 
0194                                // IN_ESCAPE_CHAR       not allowed
0195                                array(self::ST_LEXEME_MODIFIER, self::IN_ESCAPE_CHAR,     self::ST_ERROR, $lexemeModifierErrorAction),
0196 
0197                                // IN_QUOTE             not allowed
0198                                array(self::ST_LEXEME_MODIFIER, self::IN_QUOTE,           self::ST_ERROR, $lexemeModifierErrorAction),
0199 
0200 
0201                                array(self::ST_LEXEME_MODIFIER, self::IN_DECIMAL_POINT,   self::ST_MANTISSA),
0202                                array(self::ST_LEXEME_MODIFIER, self::IN_ASCII_DIGIT,     self::ST_NUMBER),
0203 
0204                                // IN_CHAR              not allowed
0205                                array(self::ST_LEXEME_MODIFIER, self::IN_CHAR,            self::ST_ERROR, $lexemeModifierErrorAction),
0206                              ));
0207         $this->addRules(array( array(self::ST_NUMBER, self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
0208                                array(self::ST_NUMBER, self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
0209                                array(self::ST_NUMBER, self::IN_MUTABLE_CHAR,    self::ST_SYNT_LEXEME),
0210                                array(self::ST_NUMBER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
0211 
0212                                // IN_ESCAPE_CHAR       not allowed
0213                                array(self::ST_NUMBER, self::IN_ESCAPE_CHAR,     self::ST_ERROR, $wrongNumberErrorAction),
0214 
0215                                // IN_QUOTE             not allowed
0216                                array(self::ST_NUMBER, self::IN_QUOTE,           self::ST_ERROR, $wrongNumberErrorAction),
0217 
0218                                array(self::ST_NUMBER, self::IN_DECIMAL_POINT,   self::ST_MANTISSA),
0219                                array(self::ST_NUMBER, self::IN_ASCII_DIGIT,     self::ST_NUMBER),
0220 
0221                                // IN_CHAR              not allowed
0222                                array(self::ST_NUMBER, self::IN_CHAR,            self::ST_ERROR, $wrongNumberErrorAction),
0223                              ));
0224         $this->addRules(array( array(self::ST_MANTISSA, self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
0225                                array(self::ST_MANTISSA, self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
0226                                array(self::ST_MANTISSA, self::IN_MUTABLE_CHAR,    self::ST_SYNT_LEXEME),
0227                                array(self::ST_MANTISSA, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
0228 
0229                                // IN_ESCAPE_CHAR       not allowed
0230                                array(self::ST_MANTISSA, self::IN_ESCAPE_CHAR,     self::ST_ERROR, $wrongNumberErrorAction),
0231 
0232                                // IN_QUOTE             not allowed
0233                                array(self::ST_MANTISSA, self::IN_QUOTE,           self::ST_ERROR, $wrongNumberErrorAction),
0234 
0235                                // IN_DECIMAL_POINT     not allowed
0236                                array(self::ST_MANTISSA, self::IN_DECIMAL_POINT,   self::ST_ERROR, $wrongNumberErrorAction),
0237 
0238                                array(self::ST_MANTISSA, self::IN_ASCII_DIGIT,     self::ST_MANTISSA),
0239 
0240                                // IN_CHAR              not allowed
0241                                array(self::ST_MANTISSA, self::IN_CHAR,            self::ST_ERROR, $wrongNumberErrorAction),
0242                              ));
0243 
0244 
0245         /** Actions */
0246         $syntaxLexemeAction    = new Zend_Search_Lucene_FSMAction($this, 'addQuerySyntaxLexeme');
0247         $lexemeModifierAction  = new Zend_Search_Lucene_FSMAction($this, 'addLexemeModifier');
0248         $addLexemeAction       = new Zend_Search_Lucene_FSMAction($this, 'addLexeme');
0249         $addQuotedLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addQuotedLexeme');
0250         $addNumberLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addNumberLexeme');
0251         $addLexemeCharAction   = new Zend_Search_Lucene_FSMAction($this, 'addLexemeChar');
0252 
0253 
0254         /** Syntax lexeme */
0255         $this->addEntryAction(self::ST_SYNT_LEXEME,  $syntaxLexemeAction);
0256         // Two lexemes in succession
0257         $this->addTransitionAction(self::ST_SYNT_LEXEME, self::ST_SYNT_LEXEME, $syntaxLexemeAction);
0258 
0259 
0260         /** Lexeme */
0261         $this->addEntryAction(self::ST_LEXEME,                       $addLexemeCharAction);
0262         $this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME, $addLexemeCharAction);
0263         // ST_ESCAPED_CHAR => ST_LEXEME transition is covered by ST_LEXEME entry action
0264 
0265         $this->addTransitionAction(self::ST_LEXEME, self::ST_WHITE_SPACE,     $addLexemeAction);
0266         $this->addTransitionAction(self::ST_LEXEME, self::ST_SYNT_LEXEME,     $addLexemeAction);
0267         $this->addTransitionAction(self::ST_LEXEME, self::ST_QUOTED_LEXEME,   $addLexemeAction);
0268         $this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME_MODIFIER, $addLexemeAction);
0269         $this->addTransitionAction(self::ST_LEXEME, self::ST_NUMBER,          $addLexemeAction);
0270         $this->addTransitionAction(self::ST_LEXEME, self::ST_MANTISSA,        $addLexemeAction);
0271 
0272 
0273         /** Quoted lexeme */
0274         // We don't need entry action (skeep quote)
0275         $this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
0276         $this->addTransitionAction(self::ST_ESCAPED_QCHAR, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
0277         // Closing quote changes state to the ST_WHITE_SPACE   other states are not used
0278         $this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_WHITE_SPACE,   $addQuotedLexemeAction);
0279 
0280 
0281         /** Lexeme modifier */
0282         $this->addEntryAction(self::ST_LEXEME_MODIFIER, $lexemeModifierAction);
0283 
0284 
0285         /** Number */
0286         $this->addEntryAction(self::ST_NUMBER,                           $addLexemeCharAction);
0287         $this->addEntryAction(self::ST_MANTISSA,                         $addLexemeCharAction);
0288         $this->addTransitionAction(self::ST_NUMBER,   self::ST_NUMBER,   $addLexemeCharAction);
0289         // ST_NUMBER => ST_MANTISSA transition is covered by ST_MANTISSA entry action
0290         $this->addTransitionAction(self::ST_MANTISSA, self::ST_MANTISSA, $addLexemeCharAction);
0291 
0292         $this->addTransitionAction(self::ST_NUMBER,   self::ST_WHITE_SPACE,     $addNumberLexemeAction);
0293         $this->addTransitionAction(self::ST_NUMBER,   self::ST_SYNT_LEXEME,     $addNumberLexemeAction);
0294         $this->addTransitionAction(self::ST_NUMBER,   self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
0295         $this->addTransitionAction(self::ST_MANTISSA, self::ST_WHITE_SPACE,     $addNumberLexemeAction);
0296         $this->addTransitionAction(self::ST_MANTISSA, self::ST_SYNT_LEXEME,     $addNumberLexemeAction);
0297         $this->addTransitionAction(self::ST_MANTISSA, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
0298     }
0299 
0300 
0301 
0302 
0303     /**
0304      * Translate input char to an input symbol of state machine
0305      *
0306      * @param string $char
0307      * @return integer
0308      */
0309     private function _translateInput($char)
0310     {
0311         if        (strpos(self::QUERY_WHITE_SPACE_CHARS,    $char) !== false) { return self::IN_WHITE_SPACE;
0312         } else if (strpos(self::QUERY_SYNT_CHARS,           $char) !== false) { return self::IN_SYNT_CHAR;
0313         } else if (strpos(self::QUERY_MUTABLE_CHARS,        $char) !== false) { return self::IN_MUTABLE_CHAR;
0314         } else if (strpos(self::QUERY_LEXEMEMODIFIER_CHARS, $char) !== false) { return self::IN_LEXEME_MODIFIER;
0315         } else if (strpos(self::QUERY_ASCIIDIGITS_CHARS,    $char) !== false) { return self::IN_ASCII_DIGIT;
0316         } else if ($char === '"' )                                            { return self::IN_QUOTE;
0317         } else if ($char === '.' )                                            { return self::IN_DECIMAL_POINT;
0318         } else if ($char === '\\')                                            { return self::IN_ESCAPE_CHAR;
0319         } else                                                                { return self::IN_CHAR;
0320         }
0321     }
0322 
0323 
0324     /**
0325      * This method is used to tokenize query string into lexemes
0326      *
0327      * @param string $inputString
0328      * @param string $encoding
0329      * @return array
0330      * @throws Zend_Search_Lucene_Search_QueryParserException
0331      */
0332     public function tokenize($inputString, $encoding)
0333     {
0334         $this->reset();
0335 
0336         $this->_lexemes     = array();
0337         $this->_queryString = array();
0338 
0339         if (PHP_OS == 'AIX' && $encoding == '') {
0340             $encoding = 'ISO8859-1';
0341         }
0342         $strLength = iconv_strlen($inputString, $encoding);
0343 
0344         // Workaround for iconv_substr bug
0345         $inputString .= ' ';
0346 
0347         for ($count = 0; $count < $strLength; $count++) {
0348             $this->_queryString[$count] = iconv_substr($inputString, $count, 1, $encoding);
0349         }
0350 
0351         for ($this->_queryStringPosition = 0;
0352              $this->_queryStringPosition < count($this->_queryString);
0353              $this->_queryStringPosition++) {
0354             $this->process($this->_translateInput($this->_queryString[$this->_queryStringPosition]));
0355         }
0356 
0357         $this->process(self::IN_WHITE_SPACE);
0358 
0359         if ($this->getState() != self::ST_WHITE_SPACE) {
0360             // require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
0361             throw new Zend_Search_Lucene_Search_QueryParserException('Unexpected end of query');
0362         }
0363 
0364         $this->_queryString = null;
0365 
0366         return $this->_lexemes;
0367     }
0368 
0369 
0370 
0371     /*********************************************************************
0372      * Actions implementation
0373      *
0374      * Actions affect on recognized lexemes list
0375      *********************************************************************/
0376 
0377     /**
0378      * Add query syntax lexeme
0379      *
0380      * @throws Zend_Search_Lucene_Search_QueryParserException
0381      */
0382     public function addQuerySyntaxLexeme()
0383     {
0384         $lexeme = $this->_queryString[$this->_queryStringPosition];
0385 
0386         // Process two char lexemes
0387         if (strpos(self::QUERY_DOUBLECHARLEXEME_CHARS, $lexeme) !== false) {
0388             // increase current position in a query string
0389             $this->_queryStringPosition++;
0390 
0391             // check,
0392             if ($this->_queryStringPosition == count($this->_queryString)  ||
0393                 $this->_queryString[$this->_queryStringPosition] != $lexeme) {
0394                     // require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
0395                     throw new Zend_Search_Lucene_Search_QueryParserException('Two chars lexeme expected. ' . $this->_positionMsg());
0396                 }
0397 
0398             // duplicate character
0399             $lexeme .= $lexeme;
0400         }
0401 
0402         $token = new Zend_Search_Lucene_Search_QueryToken(
0403                                 Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
0404                                 $lexeme,
0405                                 $this->_queryStringPosition);
0406 
0407         // Skip this lexeme if it's a field indicator ':' and treat previous as 'field' instead of 'word'
0408         if ($token->type == Zend_Search_Lucene_Search_QueryToken::TT_FIELD_INDICATOR) {
0409             $token = array_pop($this->_lexemes);
0410             if ($token === null  ||  $token->type != Zend_Search_Lucene_Search_QueryToken::TT_WORD) {
0411                 // require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
0412                 throw new Zend_Search_Lucene_Search_QueryParserException('Field mark \':\' must follow field name. ' . $this->_positionMsg());
0413             }
0414 
0415             $token->type = Zend_Search_Lucene_Search_QueryToken::TT_FIELD;
0416         }
0417 
0418         $this->_lexemes[] = $token;
0419     }
0420 
0421     /**
0422      * Add lexeme modifier
0423      */
0424     public function addLexemeModifier()
0425     {
0426         $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
0427                                     Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
0428                                     $this->_queryString[$this->_queryStringPosition],
0429                                     $this->_queryStringPosition);
0430     }
0431 
0432 
0433     /**
0434      * Add lexeme
0435      */
0436     public function addLexeme()
0437     {
0438         $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
0439                                     Zend_Search_Lucene_Search_QueryToken::TC_WORD,
0440                                     $this->_currentLexeme,
0441                                     $this->_queryStringPosition - 1);
0442 
0443         $this->_currentLexeme = '';
0444     }
0445 
0446     /**
0447      * Add quoted lexeme
0448      */
0449     public function addQuotedLexeme()
0450     {
0451         $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
0452                                     Zend_Search_Lucene_Search_QueryToken::TC_PHRASE,
0453                                     $this->_currentLexeme,
0454                                     $this->_queryStringPosition);
0455 
0456         $this->_currentLexeme = '';
0457     }
0458 
0459     /**
0460      * Add number lexeme
0461      */
0462     public function addNumberLexeme()
0463     {
0464         $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
0465                                     Zend_Search_Lucene_Search_QueryToken::TC_NUMBER,
0466                                     $this->_currentLexeme,
0467                                     $this->_queryStringPosition - 1);
0468         $this->_currentLexeme = '';
0469     }
0470 
0471     /**
0472      * Extend lexeme by one char
0473      */
0474     public function addLexemeChar()
0475     {
0476         $this->_currentLexeme .= $this->_queryString[$this->_queryStringPosition];
0477     }
0478 
0479 
0480     /**
0481      * Position message
0482      *
0483      * @return string
0484      */
0485     private function _positionMsg()
0486     {
0487         return 'Position is ' . $this->_queryStringPosition . '.';
0488     }
0489 
0490 
0491     /*********************************************************************
0492      * Syntax errors actions
0493      *********************************************************************/
0494     public function lexModifierErrException()
0495     {
0496         // require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
0497         throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier character can be followed only by number, white space or query syntax element. ' . $this->_positionMsg());
0498     }
0499     public function quoteWithinLexemeErrException()
0500     {
0501         // require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
0502         throw new Zend_Search_Lucene_Search_QueryParserException('Quote within lexeme must be escaped by \'\\\' char. ' . $this->_positionMsg());
0503     }
0504     public function wrongNumberErrException()
0505     {
0506         // require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
0507         throw new Zend_Search_Lucene_Search_QueryParserException('Wrong number syntax.' . $this->_positionMsg());
0508     }
0509 }
0510