File indexing completed on 2025-01-19 05:21:25

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Search_Lucene
0017  * @subpackage Analysis
0018  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0019  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0020  * @version    $Id$
0021  */
0022 
0023 
0024 /** Zend_Search_Lucene_Analysis_Analyzer_Common */
0025 // require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
0026 
0027 
0028 /**
0029  * @category   Zend
0030  * @package    Zend_Search_Lucene
0031  * @subpackage Analysis
0032  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0033  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0034  */
0035 
0036 class Zend_Search_Lucene_Analysis_Analyzer_Common_Text extends Zend_Search_Lucene_Analysis_Analyzer_Common
0037 {
0038     /**
0039      * Current position in a stream
0040      *
0041      * @var integer
0042      */
0043     private $_position;
0044 
0045     /**
0046      * Reset token stream
0047      */
0048     public function reset()
0049     {
0050         $this->_position = 0;
0051 
0052         if ($this->_input === null) {
0053             return;
0054         }
0055 
0056         // convert input into ascii
0057         if (PHP_OS != 'AIX') {
0058             $this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input);
0059         }
0060         $this->_encoding = 'ASCII';
0061     }
0062 
0063     /**
0064      * Tokenization stream API
0065      * Get next token
0066      * Returns null at the end of stream
0067      *
0068      * @return Zend_Search_Lucene_Analysis_Token|null
0069      */
0070     public function nextToken()
0071     {
0072         if ($this->_input === null) {
0073             return null;
0074         }
0075 
0076 
0077         do {
0078             if (! preg_match('/[a-zA-Z]+/', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_position)) {
0079                 // It covers both cases a) there are no matches (preg_match(...) === 0)
0080                 // b) error occured (preg_match(...) === FALSE)
0081                 return null;
0082             }
0083 
0084             $str = $match[0][0];
0085             $pos = $match[0][1];
0086             $endpos = $pos + strlen($str);
0087 
0088             $this->_position = $endpos;
0089 
0090             $token = $this->normalize(new Zend_Search_Lucene_Analysis_Token($str, $pos, $endpos));
0091         } while ($token === null); // try again if token is skipped
0092 
0093         return $token;
0094     }
0095 }
0096