File indexing completed on 2025-01-19 05:21:26
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Search_Lucene 0017 * @subpackage Index 0018 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0019 * @license http://framework.zend.com/license/new-bsd New BSD License 0020 * @version $Id$ 0021 */ 0022 0023 0024 /** 0025 * A Term represents a word from text. This is the unit of search. It is 0026 * composed of two elements, the text of the word, as a string, and the name of 0027 * the field that the text occured in, an interned string. 0028 * 0029 * Note that terms may represent more than words from text fields, but also 0030 * things like dates, email addresses, urls, etc. 0031 * 0032 * @category Zend 0033 * @package Zend_Search_Lucene 0034 * @subpackage Index 0035 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0036 * @license http://framework.zend.com/license/new-bsd New BSD License 0037 */ 0038 class Zend_Search_Lucene_Index_Term 0039 { 0040 /** 0041 * Field name or field number (depending from context) 0042 * 0043 * @var mixed 0044 */ 0045 public $field; 0046 0047 /** 0048 * Term value 0049 * 0050 * @var string 0051 */ 0052 public $text; 0053 0054 0055 /** 0056 * Object constructor 0057 */ 0058 public function __construct($text, $field = null) 0059 { 0060 $this->field = ($field === null)? Zend_Search_Lucene::getDefaultSearchField() : $field; 0061 $this->text = $text; 0062 } 0063 0064 0065 /** 0066 * Returns term key 0067 * 0068 * @return string 0069 */ 0070 public function key() 0071 { 0072 return $this->field . chr(0) . $this->text; 0073 } 0074 0075 /** 0076 * Get term prefix 0077 * 0078 * @param string $str 0079 * @param integer $length 0080 * @return string 0081 */ 0082 public static function getPrefix($str, $length) 0083 { 0084 $prefixBytes = 0; 0085 $prefixChars = 0; 0086 while ($prefixBytes < strlen($str) && $prefixChars < $length) { 0087 $charBytes = 1; 0088 if ((ord($str[$prefixBytes]) & 0xC0) == 0xC0) { 0089 $charBytes++; 0090 if (ord($str[$prefixBytes]) & 0x20 ) { 0091 $charBytes++; 0092 if (ord($str[$prefixBytes]) & 0x10 ) { 0093 $charBytes++; 0094 } 0095 } 0096 } 0097 0098 if ($prefixBytes + $charBytes > strlen($str)) { 0099 // wrong character 0100 break; 0101 } 0102 0103 $prefixChars++; 0104 $prefixBytes += $charBytes; 0105 } 0106 0107 return substr($str, 0, $prefixBytes); 0108 } 0109 0110 /** 0111 * Get UTF-8 string length 0112 * 0113 * @param string $str 0114 * @return string 0115 */ 0116 public static function getLength($str) 0117 { 0118 $bytes = 0; 0119 $chars = 0; 0120 while ($bytes < strlen($str)) { 0121 $charBytes = 1; 0122 if ((ord($str[$bytes]) & 0xC0) == 0xC0) { 0123 $charBytes++; 0124 if (ord($str[$bytes]) & 0x20 ) { 0125 $charBytes++; 0126 if (ord($str[$bytes]) & 0x10 ) { 0127 $charBytes++; 0128 } 0129 } 0130 } 0131 0132 if ($bytes + $charBytes > strlen($str)) { 0133 // wrong character 0134 break; 0135 } 0136 0137 $chars++; 0138 $bytes += $charBytes; 0139 } 0140 0141 return $chars; 0142 } 0143 } 0144