Lucene/Analysis/Token.php

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Search_Lucene
0017  * @subpackage Analysis
0018  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0019  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0020  * @version    $Id$
0021  */
0022
0023
0024 /**
0025  * @category   Zend
0026  * @package    Zend_Search_Lucene
0027  * @subpackage Analysis
0028  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0029  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0030  */
0031 class Zend_Search_Lucene_Analysis_Token
0032 {
0033     /**
0034      * The text of the term.
0035      *
0036      * @var string
0037      */
0038     private $_termText;
0039
0040     /**
0041      * Start in source text.
0042      *
0043      * @var integer
0044      */
0045     private $_startOffset;
0046
0047     /**
0048      * End in source text
0049      *
0050      * @var integer
0051      */
0052     private $_endOffset;
0053
0054     /**
0055      * The position of this token relative to the previous Token.
0056      *
0057      * The default value is one.
0058      *
0059      * Some common uses for this are:
0060      * Set it to zero to put multiple terms in the same position.  This is
0061      * useful if, e.g., a word has multiple stems.  Searches for phrases
0062      * including either stem will match.  In this case, all but the first stem's
0063      * increment should be set to zero: the increment of the first instance
0064      * should be one.  Repeating a token with an increment of zero can also be
0065      * used to boost the scores of matches on that token.
0066      *
0067      * Set it to values greater than one to inhibit exact phrase matches.
0068      * If, for example, one does not want phrases to match across removed stop
0069      * words, then one could build a stop word filter that removes stop words and
0070      * also sets the increment to the number of stop words removed before each
0071      * non-stop word.  Then exact phrase queries will only match when the terms
0072      * occur with no intervening stop words.
0073      *
0074      * @var integer
0075      */
0076     private $_positionIncrement;
0077
0078
0079     /**
0080      * Object constructor
0081      *
0082      * @param string  $text
0083      * @param integer $start
0084      * @param integer $end
0085      * @param string  $type
0086      */
0087     public function __construct($text, $start, $end)
0088     {
0089         $this->_termText    = $text;
0090         $this->_startOffset = $start;
0091         $this->_endOffset   = $end;
0092
0093         $this->_positionIncrement = 1;
0094     }
0095
0096
0097     /**
0098      * positionIncrement setter
0099      *
0100      * @param integer $positionIncrement
0101      */
0102     public function setPositionIncrement($positionIncrement)
0103     {
0104         $this->_positionIncrement = $positionIncrement;
0105     }
0106
0107     /**
0108      * Returns the position increment of this Token.
0109      *
0110      * @return integer
0111      */
0112     public function getPositionIncrement()
0113     {
0114         return $this->_positionIncrement;
0115     }
0116
0117     /**
0118      * Returns the Token's term text.
0119      *
0120      * @return string
0121      */
0122     public function getTermText()
0123     {
0124         return $this->_termText;
0125     }
0126
0127     /**
0128      * Sets the Token's term text.
0129      *
0130      * @param string $text
0131      * @return this
0132      */
0133     public function setTermText($text)
0134     {
0135         $this->_termText = $text;
0136         return $this;
0137     }
0138
0139     /**
0140      * Returns this Token's starting offset, the position of the first character
0141      * corresponding to this token in the source text.
0142      *
0143      * Note:
0144      * The difference between getEndOffset() and getStartOffset() may not be equal
0145      * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered
0146      * by a stemmer or some other filter.
0147      *
0148      * @return integer
0149      */
0150     public function getStartOffset()
0151     {
0152         return $this->_startOffset;
0153     }
0154
0155     /**
0156      * Returns this Token's ending offset, one greater than the position of the
0157      * last character corresponding to this token in the source text.
0158      *
0159      * @return integer
0160      */
0161     public function getEndOffset()
0162     {
0163         return $this->_endOffset;
0164     }
0165 }
0166