File indexing completed on 2024-05-26 06:03:23

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Search_Lucene
0017  * @subpackage Document
0018  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0019  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0020  * @version    $Id$
0021  */
0022 
0023 
0024 /**
0025  * A field is a section of a Document.  Each field has two parts,
0026  * a name and a value. Values may be free text or they may be atomic
0027  * keywords, which are not further processed. Such keywords may
0028  * be used to represent dates, urls, etc.  Fields are optionally
0029  * stored in the index, so that they may be returned with hits
0030  * on the document.
0031  *
0032  * @category   Zend
0033  * @package    Zend_Search_Lucene
0034  * @subpackage Document
0035  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0036  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0037  */
0038 class Zend_Search_Lucene_Field
0039 {
0040     /**
0041      * Field name
0042      *
0043      * @var string
0044      */
0045     public $name;
0046 
0047     /**
0048      * Field value
0049      *
0050      * @var boolean
0051      */
0052     public $value;
0053 
0054     /**
0055      * Field is to be stored in the index for return with search hits.
0056      *
0057      * @var boolean
0058      */
0059     public $isStored    = false;
0060 
0061     /**
0062      * Field is to be indexed, so that it may be searched on.
0063      *
0064      * @var boolean
0065      */
0066     public $isIndexed   = true;
0067 
0068     /**
0069      * Field should be tokenized as text prior to indexing.
0070      *
0071      * @var boolean
0072      */
0073     public $isTokenized = true;
0074     /**
0075      * Field is stored as binary.
0076      *
0077      * @var boolean
0078      */
0079     public $isBinary    = false;
0080 
0081     /**
0082      * Field are stored as a term vector
0083      *
0084      * @var boolean
0085      */
0086     public $storeTermVector = false;
0087 
0088     /**
0089      * Field boost factor
0090      * It's not stored directly in the index, but affects on normalization factor
0091      *
0092      * @var float
0093      */
0094     public $boost = 1.0;
0095 
0096     /**
0097      * Field value encoding.
0098      *
0099      * @var string
0100      */
0101     public $encoding;
0102 
0103     /**
0104      * Object constructor
0105      *
0106      * @param string $name
0107      * @param string $value
0108      * @param string $encoding
0109      * @param boolean $isStored
0110      * @param boolean $isIndexed
0111      * @param boolean $isTokenized
0112      * @param boolean $isBinary
0113      */
0114     public function __construct($name, $value, $encoding, $isStored, $isIndexed, $isTokenized, $isBinary = false)
0115     {
0116         $this->name  = $name;
0117         $this->value = $value;
0118 
0119         if (!$isBinary) {
0120             $this->encoding    = $encoding;
0121             $this->isTokenized = $isTokenized;
0122         } else {
0123             $this->encoding    = '';
0124             $this->isTokenized = false;
0125         }
0126 
0127         $this->isStored  = $isStored;
0128         $this->isIndexed = $isIndexed;
0129         $this->isBinary  = $isBinary;
0130 
0131         $this->storeTermVector = false;
0132         $this->boost           = 1.0;
0133     }
0134 
0135 
0136     /**
0137      * Constructs a String-valued Field that is not tokenized, but is indexed
0138      * and stored.  Useful for non-text fields, e.g. date or url.
0139      *
0140      * @param string $name
0141      * @param string $value
0142      * @param string $encoding
0143      * @return Zend_Search_Lucene_Field
0144      */
0145     public static function keyword($name, $value, $encoding = '')
0146     {
0147         return new self($name, $value, $encoding, true, true, false);
0148     }
0149 
0150 
0151     /**
0152      * Constructs a String-valued Field that is not tokenized nor indexed,
0153      * but is stored in the index, for return with hits.
0154      *
0155      * @param string $name
0156      * @param string $value
0157      * @param string $encoding
0158      * @return Zend_Search_Lucene_Field
0159      */
0160     public static function unIndexed($name, $value, $encoding = '')
0161     {
0162         return new self($name, $value, $encoding, true, false, false);
0163     }
0164 
0165 
0166     /**
0167      * Constructs a Binary String valued Field that is not tokenized nor indexed,
0168      * but is stored in the index, for return with hits.
0169      *
0170      * @param string $name
0171      * @param string $value
0172      * @param string $encoding
0173      * @return Zend_Search_Lucene_Field
0174      */
0175     public static function binary($name, $value)
0176     {
0177         return new self($name, $value, '', true, false, false, true);
0178     }
0179 
0180     /**
0181      * Constructs a String-valued Field that is tokenized and indexed,
0182      * and is stored in the index, for return with hits.  Useful for short text
0183      * fields, like "title" or "subject". Term vector will not be stored for this field.
0184      *
0185      * @param string $name
0186      * @param string $value
0187      * @param string $encoding
0188      * @return Zend_Search_Lucene_Field
0189      */
0190     public static function text($name, $value, $encoding = '')
0191     {
0192         return new self($name, $value, $encoding, true, true, true);
0193     }
0194 
0195 
0196     /**
0197      * Constructs a String-valued Field that is tokenized and indexed,
0198      * but that is not stored in the index.
0199      *
0200      * @param string $name
0201      * @param string $value
0202      * @param string $encoding
0203      * @return Zend_Search_Lucene_Field
0204      */
0205     public static function unStored($name, $value, $encoding = '')
0206     {
0207         return new self($name, $value, $encoding, false, true, true);
0208     }
0209 
0210     /**
0211      * Get field value in UTF-8 encoding
0212      *
0213      * @return string
0214      */
0215     public function getUtf8Value()
0216     {
0217         if (strcasecmp($this->encoding, 'utf8' ) == 0  ||
0218             strcasecmp($this->encoding, 'utf-8') == 0 ) {
0219                 return $this->value;
0220         } else {
0221 
0222             return (PHP_OS != 'AIX') ? iconv($this->encoding, 'UTF-8', $this->value) : iconv('ISO8859-1', 'UTF-8', $this->value);
0223         }
0224     }
0225 }
0226