File indexing completed on 2025-01-19 05:21:25
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Search_Lucene 0017 * @subpackage Analysis 0018 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0019 * @license http://framework.zend.com/license/new-bsd New BSD License 0020 * @version $Id$ 0021 */ 0022 0023 0024 /** Zend_Search_Lucene_Analysis_Analyzer_Common */ 0025 // require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php'; 0026 0027 0028 /** 0029 * @category Zend 0030 * @package Zend_Search_Lucene 0031 * @subpackage Analysis 0032 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0033 * @license http://framework.zend.com/license/new-bsd New BSD License 0034 */ 0035 0036 class Zend_Search_Lucene_Analysis_Analyzer_Common_Text extends Zend_Search_Lucene_Analysis_Analyzer_Common 0037 { 0038 /** 0039 * Current position in a stream 0040 * 0041 * @var integer 0042 */ 0043 private $_position; 0044 0045 /** 0046 * Reset token stream 0047 */ 0048 public function reset() 0049 { 0050 $this->_position = 0; 0051 0052 if ($this->_input === null) { 0053 return; 0054 } 0055 0056 // convert input into ascii 0057 if (PHP_OS != 'AIX') { 0058 $this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input); 0059 } 0060 $this->_encoding = 'ASCII'; 0061 } 0062 0063 /** 0064 * Tokenization stream API 0065 * Get next token 0066 * Returns null at the end of stream 0067 * 0068 * @return Zend_Search_Lucene_Analysis_Token|null 0069 */ 0070 public function nextToken() 0071 { 0072 if ($this->_input === null) { 0073 return null; 0074 } 0075 0076 0077 do { 0078 if (! preg_match('/[a-zA-Z]+/', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_position)) { 0079 // It covers both cases a) there are no matches (preg_match(...) === 0) 0080 // b) error occured (preg_match(...) === FALSE) 0081 return null; 0082 } 0083 0084 $str = $match[0][0]; 0085 $pos = $match[0][1]; 0086 $endpos = $pos + strlen($str); 0087 0088 $this->_position = $endpos; 0089 0090 $token = $this->normalize(new Zend_Search_Lucene_Analysis_Token($str, $pos, $endpos)); 0091 } while ($token === null); // try again if token is skipped 0092 0093 return $token; 0094 } 0095 } 0096