File indexing completed on 2025-01-19 05:21:27
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Search_Lucene 0017 * @subpackage Document 0018 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0019 * @license http://framework.zend.com/license/new-bsd New BSD License 0020 * @version $Id$ 0021 */ 0022 0023 0024 /** 0025 * A field is a section of a Document. Each field has two parts, 0026 * a name and a value. Values may be free text or they may be atomic 0027 * keywords, which are not further processed. Such keywords may 0028 * be used to represent dates, urls, etc. Fields are optionally 0029 * stored in the index, so that they may be returned with hits 0030 * on the document. 0031 * 0032 * @category Zend 0033 * @package Zend_Search_Lucene 0034 * @subpackage Document 0035 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0036 * @license http://framework.zend.com/license/new-bsd New BSD License 0037 */ 0038 class Zend_Search_Lucene_Field 0039 { 0040 /** 0041 * Field name 0042 * 0043 * @var string 0044 */ 0045 public $name; 0046 0047 /** 0048 * Field value 0049 * 0050 * @var boolean 0051 */ 0052 public $value; 0053 0054 /** 0055 * Field is to be stored in the index for return with search hits. 0056 * 0057 * @var boolean 0058 */ 0059 public $isStored = false; 0060 0061 /** 0062 * Field is to be indexed, so that it may be searched on. 0063 * 0064 * @var boolean 0065 */ 0066 public $isIndexed = true; 0067 0068 /** 0069 * Field should be tokenized as text prior to indexing. 0070 * 0071 * @var boolean 0072 */ 0073 public $isTokenized = true; 0074 /** 0075 * Field is stored as binary. 0076 * 0077 * @var boolean 0078 */ 0079 public $isBinary = false; 0080 0081 /** 0082 * Field are stored as a term vector 0083 * 0084 * @var boolean 0085 */ 0086 public $storeTermVector = false; 0087 0088 /** 0089 * Field boost factor 0090 * It's not stored directly in the index, but affects on normalization factor 0091 * 0092 * @var float 0093 */ 0094 public $boost = 1.0; 0095 0096 /** 0097 * Field value encoding. 0098 * 0099 * @var string 0100 */ 0101 public $encoding; 0102 0103 /** 0104 * Object constructor 0105 * 0106 * @param string $name 0107 * @param string $value 0108 * @param string $encoding 0109 * @param boolean $isStored 0110 * @param boolean $isIndexed 0111 * @param boolean $isTokenized 0112 * @param boolean $isBinary 0113 */ 0114 public function __construct($name, $value, $encoding, $isStored, $isIndexed, $isTokenized, $isBinary = false) 0115 { 0116 $this->name = $name; 0117 $this->value = $value; 0118 0119 if (!$isBinary) { 0120 $this->encoding = $encoding; 0121 $this->isTokenized = $isTokenized; 0122 } else { 0123 $this->encoding = ''; 0124 $this->isTokenized = false; 0125 } 0126 0127 $this->isStored = $isStored; 0128 $this->isIndexed = $isIndexed; 0129 $this->isBinary = $isBinary; 0130 0131 $this->storeTermVector = false; 0132 $this->boost = 1.0; 0133 } 0134 0135 0136 /** 0137 * Constructs a String-valued Field that is not tokenized, but is indexed 0138 * and stored. Useful for non-text fields, e.g. date or url. 0139 * 0140 * @param string $name 0141 * @param string $value 0142 * @param string $encoding 0143 * @return Zend_Search_Lucene_Field 0144 */ 0145 public static function keyword($name, $value, $encoding = '') 0146 { 0147 return new self($name, $value, $encoding, true, true, false); 0148 } 0149 0150 0151 /** 0152 * Constructs a String-valued Field that is not tokenized nor indexed, 0153 * but is stored in the index, for return with hits. 0154 * 0155 * @param string $name 0156 * @param string $value 0157 * @param string $encoding 0158 * @return Zend_Search_Lucene_Field 0159 */ 0160 public static function unIndexed($name, $value, $encoding = '') 0161 { 0162 return new self($name, $value, $encoding, true, false, false); 0163 } 0164 0165 0166 /** 0167 * Constructs a Binary String valued Field that is not tokenized nor indexed, 0168 * but is stored in the index, for return with hits. 0169 * 0170 * @param string $name 0171 * @param string $value 0172 * @param string $encoding 0173 * @return Zend_Search_Lucene_Field 0174 */ 0175 public static function binary($name, $value) 0176 { 0177 return new self($name, $value, '', true, false, false, true); 0178 } 0179 0180 /** 0181 * Constructs a String-valued Field that is tokenized and indexed, 0182 * and is stored in the index, for return with hits. Useful for short text 0183 * fields, like "title" or "subject". Term vector will not be stored for this field. 0184 * 0185 * @param string $name 0186 * @param string $value 0187 * @param string $encoding 0188 * @return Zend_Search_Lucene_Field 0189 */ 0190 public static function text($name, $value, $encoding = '') 0191 { 0192 return new self($name, $value, $encoding, true, true, true); 0193 } 0194 0195 0196 /** 0197 * Constructs a String-valued Field that is tokenized and indexed, 0198 * but that is not stored in the index. 0199 * 0200 * @param string $name 0201 * @param string $value 0202 * @param string $encoding 0203 * @return Zend_Search_Lucene_Field 0204 */ 0205 public static function unStored($name, $value, $encoding = '') 0206 { 0207 return new self($name, $value, $encoding, false, true, true); 0208 } 0209 0210 /** 0211 * Get field value in UTF-8 encoding 0212 * 0213 * @return string 0214 */ 0215 public function getUtf8Value() 0216 { 0217 if (strcasecmp($this->encoding, 'utf8' ) == 0 || 0218 strcasecmp($this->encoding, 'utf-8') == 0 ) { 0219 return $this->value; 0220 } else { 0221 0222 return (PHP_OS != 'AIX') ? iconv($this->encoding, 'UTF-8', $this->value) : iconv('ISO8859-1', 'UTF-8', $this->value); 0223 } 0224 } 0225 } 0226