File indexing completed on 2025-01-19 05:21:25
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Search_Lucene 0017 * @subpackage Analysis 0018 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0019 * @license http://framework.zend.com/license/new-bsd New BSD License 0020 * @version $Id$ 0021 */ 0022 0023 0024 /** 0025 * @category Zend 0026 * @package Zend_Search_Lucene 0027 * @subpackage Analysis 0028 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0029 * @license http://framework.zend.com/license/new-bsd New BSD License 0030 */ 0031 class Zend_Search_Lucene_Analysis_Token 0032 { 0033 /** 0034 * The text of the term. 0035 * 0036 * @var string 0037 */ 0038 private $_termText; 0039 0040 /** 0041 * Start in source text. 0042 * 0043 * @var integer 0044 */ 0045 private $_startOffset; 0046 0047 /** 0048 * End in source text 0049 * 0050 * @var integer 0051 */ 0052 private $_endOffset; 0053 0054 /** 0055 * The position of this token relative to the previous Token. 0056 * 0057 * The default value is one. 0058 * 0059 * Some common uses for this are: 0060 * Set it to zero to put multiple terms in the same position. This is 0061 * useful if, e.g., a word has multiple stems. Searches for phrases 0062 * including either stem will match. In this case, all but the first stem's 0063 * increment should be set to zero: the increment of the first instance 0064 * should be one. Repeating a token with an increment of zero can also be 0065 * used to boost the scores of matches on that token. 0066 * 0067 * Set it to values greater than one to inhibit exact phrase matches. 0068 * If, for example, one does not want phrases to match across removed stop 0069 * words, then one could build a stop word filter that removes stop words and 0070 * also sets the increment to the number of stop words removed before each 0071 * non-stop word. Then exact phrase queries will only match when the terms 0072 * occur with no intervening stop words. 0073 * 0074 * @var integer 0075 */ 0076 private $_positionIncrement; 0077 0078 0079 /** 0080 * Object constructor 0081 * 0082 * @param string $text 0083 * @param integer $start 0084 * @param integer $end 0085 * @param string $type 0086 */ 0087 public function __construct($text, $start, $end) 0088 { 0089 $this->_termText = $text; 0090 $this->_startOffset = $start; 0091 $this->_endOffset = $end; 0092 0093 $this->_positionIncrement = 1; 0094 } 0095 0096 0097 /** 0098 * positionIncrement setter 0099 * 0100 * @param integer $positionIncrement 0101 */ 0102 public function setPositionIncrement($positionIncrement) 0103 { 0104 $this->_positionIncrement = $positionIncrement; 0105 } 0106 0107 /** 0108 * Returns the position increment of this Token. 0109 * 0110 * @return integer 0111 */ 0112 public function getPositionIncrement() 0113 { 0114 return $this->_positionIncrement; 0115 } 0116 0117 /** 0118 * Returns the Token's term text. 0119 * 0120 * @return string 0121 */ 0122 public function getTermText() 0123 { 0124 return $this->_termText; 0125 } 0126 0127 /** 0128 * Sets the Token's term text. 0129 * 0130 * @param string $text 0131 * @return this 0132 */ 0133 public function setTermText($text) 0134 { 0135 $this->_termText = $text; 0136 return $this; 0137 } 0138 0139 /** 0140 * Returns this Token's starting offset, the position of the first character 0141 * corresponding to this token in the source text. 0142 * 0143 * Note: 0144 * The difference between getEndOffset() and getStartOffset() may not be equal 0145 * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered 0146 * by a stemmer or some other filter. 0147 * 0148 * @return integer 0149 */ 0150 public function getStartOffset() 0151 { 0152 return $this->_startOffset; 0153 } 0154 0155 /** 0156 * Returns this Token's ending offset, one greater than the position of the 0157 * last character corresponding to this token in the source text. 0158 * 0159 * @return integer 0160 */ 0161 public function getEndOffset() 0162 { 0163 return $this->_endOffset; 0164 } 0165 } 0166