File indexing completed on 2025-01-19 05:21:26

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Search_Lucene
0017  * @subpackage Index
0018  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0019  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0020  * @version    $Id$
0021  */
0022 
0023 
0024 /**
0025  * A Term represents a word from text.  This is the unit of search.  It is
0026  * composed of two elements, the text of the word, as a string, and the name of
0027  * the field that the text occured in, an interned string.
0028  *
0029  * Note that terms may represent more than words from text fields, but also
0030  * things like dates, email addresses, urls, etc.
0031  *
0032  * @category   Zend
0033  * @package    Zend_Search_Lucene
0034  * @subpackage Index
0035  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0036  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0037  */
0038 class Zend_Search_Lucene_Index_Term
0039 {
0040     /**
0041      * Field name or field number (depending from context)
0042      *
0043      * @var mixed
0044      */
0045     public $field;
0046 
0047     /**
0048      * Term value
0049      *
0050      * @var string
0051      */
0052     public $text;
0053 
0054 
0055     /**
0056      * Object constructor
0057      */
0058     public function __construct($text, $field = null)
0059     {
0060         $this->field = ($field === null)?  Zend_Search_Lucene::getDefaultSearchField() : $field;
0061         $this->text  = $text;
0062     }
0063 
0064 
0065     /**
0066      * Returns term key
0067      *
0068      * @return string
0069      */
0070     public function key()
0071     {
0072         return $this->field . chr(0) . $this->text;
0073     }
0074 
0075     /**
0076      * Get term prefix
0077      *
0078      * @param string $str
0079      * @param integer $length
0080      * @return string
0081      */
0082     public static function getPrefix($str, $length)
0083     {
0084         $prefixBytes = 0;
0085         $prefixChars = 0;
0086         while ($prefixBytes < strlen($str)  &&  $prefixChars < $length) {
0087             $charBytes = 1;
0088             if ((ord($str[$prefixBytes]) & 0xC0) == 0xC0) {
0089                 $charBytes++;
0090                 if (ord($str[$prefixBytes]) & 0x20 ) {
0091                     $charBytes++;
0092                     if (ord($str[$prefixBytes]) & 0x10 ) {
0093                         $charBytes++;
0094                     }
0095                 }
0096             }
0097 
0098             if ($prefixBytes + $charBytes > strlen($str)) {
0099                 // wrong character
0100                 break;
0101             }
0102 
0103             $prefixChars++;
0104             $prefixBytes += $charBytes;
0105         }
0106 
0107         return substr($str, 0, $prefixBytes);
0108     }
0109 
0110     /**
0111      * Get UTF-8 string length
0112      *
0113      * @param string $str
0114      * @return string
0115      */
0116     public static function getLength($str)
0117     {
0118         $bytes = 0;
0119         $chars = 0;
0120         while ($bytes < strlen($str)) {
0121             $charBytes = 1;
0122             if ((ord($str[$bytes]) & 0xC0) == 0xC0) {
0123                 $charBytes++;
0124                 if (ord($str[$bytes]) & 0x20 ) {
0125                     $charBytes++;
0126                     if (ord($str[$bytes]) & 0x10 ) {
0127                         $charBytes++;
0128                     }
0129                 }
0130             }
0131 
0132             if ($bytes + $charBytes > strlen($str)) {
0133                 // wrong character
0134                 break;
0135             }
0136 
0137             $chars++;
0138             $bytes += $charBytes;
0139         }
0140 
0141         return $chars;
0142     }
0143 }
0144