File indexing completed on 2024-06-23 05:55:39

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Search_Lucene
0017  * @subpackage Search
0018  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0019  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0020  * @version    $Id$
0021  */
0022 
0023 
0024 /**
0025  * @category   Zend
0026  * @package    Zend_Search_Lucene
0027  * @subpackage Search
0028  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0029  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0030  */
0031 abstract class Zend_Search_Lucene_Search_Query
0032 {
0033     /**
0034      * query boost factor
0035      *
0036      * @var float
0037      */
0038     private $_boost = 1;
0039 
0040     /**
0041      * Query weight
0042      *
0043      * @var Zend_Search_Lucene_Search_Weight
0044      */
0045     protected $_weight = null;
0046 
0047     /**
0048      * Current highlight color
0049      *
0050      * @var integer
0051      */
0052     private $_currentColorIndex = 0;
0053 
0054     /**
0055      * Gets the boost for this clause.  Documents matching
0056      * this clause will (in addition to the normal weightings) have their score
0057      * multiplied by boost.   The boost is 1.0 by default.
0058      *
0059      * @return float
0060      */
0061     public function getBoost()
0062     {
0063         return $this->_boost;
0064     }
0065 
0066     /**
0067      * Sets the boost for this query clause to $boost.
0068      *
0069      * @param float $boost
0070      */
0071     public function setBoost($boost)
0072     {
0073         $this->_boost = $boost;
0074     }
0075 
0076     /**
0077      * Score specified document
0078      *
0079      * @param integer $docId
0080      * @param Zend_Search_Lucene_Interface $reader
0081      * @return float
0082      */
0083     abstract public function score($docId, Zend_Search_Lucene_Interface $reader);
0084 
0085     /**
0086      * Get document ids likely matching the query
0087      *
0088      * It's an array with document ids as keys (performance considerations)
0089      *
0090      * @return array
0091      */
0092     abstract public function matchedDocs();
0093 
0094     /**
0095      * Execute query in context of index reader
0096      * It also initializes necessary internal structures
0097      *
0098      * Query specific implementation
0099      *
0100      * @param Zend_Search_Lucene_Interface $reader
0101      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
0102      */
0103     abstract public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null);
0104 
0105     /**
0106      * Constructs an appropriate Weight implementation for this query.
0107      *
0108      * @param Zend_Search_Lucene_Interface $reader
0109      * @return Zend_Search_Lucene_Search_Weight
0110      */
0111     abstract public function createWeight(Zend_Search_Lucene_Interface $reader);
0112 
0113     /**
0114      * Constructs an initializes a Weight for a _top-level_query_.
0115      *
0116      * @param Zend_Search_Lucene_Interface $reader
0117      */
0118     protected function _initWeight(Zend_Search_Lucene_Interface $reader)
0119     {
0120         // Check, that it's a top-level query and query weight is not initialized yet.
0121         if ($this->_weight !== null) {
0122             return $this->_weight;
0123         }
0124 
0125         $this->createWeight($reader);
0126         $sum = $this->_weight->sumOfSquaredWeights();
0127         $queryNorm = $reader->getSimilarity()->queryNorm($sum);
0128         $this->_weight->normalize($queryNorm);
0129     }
0130 
0131     /**
0132      * Re-write query into primitive queries in the context of specified index
0133      *
0134      * @param Zend_Search_Lucene_Interface $index
0135      * @return Zend_Search_Lucene_Search_Query
0136      */
0137     abstract public function rewrite(Zend_Search_Lucene_Interface $index);
0138 
0139     /**
0140      * Optimize query in the context of specified index
0141      *
0142      * @param Zend_Search_Lucene_Interface $index
0143      * @return Zend_Search_Lucene_Search_Query
0144      */
0145     abstract public function optimize(Zend_Search_Lucene_Interface $index);
0146 
0147     /**
0148      * Reset query, so it can be reused within other queries or
0149      * with other indeces
0150      */
0151     public function reset()
0152     {
0153         $this->_weight = null;
0154     }
0155 
0156 
0157     /**
0158      * Print a query
0159      *
0160      * @return string
0161      */
0162     abstract public function __toString();
0163 
0164     /**
0165      * Return query terms
0166      *
0167      * @return array
0168      */
0169     abstract public function getQueryTerms();
0170 
0171     /**
0172      * Query specific matches highlighting
0173      *
0174      * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter  Highlighter object (also contains doc for highlighting)
0175      */
0176     abstract protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter);
0177 
0178     /**
0179      * Highlight matches in $inputHTML
0180      *
0181      * @param string $inputHTML
0182      * @param string  $defaultEncoding   HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
0183      * @param Zend_Search_Lucene_Search_Highlighter_Interface|null $highlighter
0184      * @return string
0185      */
0186     public function highlightMatches($inputHTML, $defaultEncoding = '', $highlighter = null)
0187     {
0188         if ($highlighter === null) {
0189             // require_once 'Zend/Search/Lucene/Search/Highlighter/Default.php';
0190             $highlighter = new Zend_Search_Lucene_Search_Highlighter_Default();
0191         }
0192 
0193         /** Zend_Search_Lucene_Document_Html */
0194         // require_once 'Zend/Search/Lucene/Document/Html.php';
0195 
0196         $doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML, false, $defaultEncoding);
0197         $highlighter->setDocument($doc);
0198 
0199         $this->_highlightMatches($highlighter);
0200 
0201         return $doc->getHTML();
0202     }
0203 
0204     /**
0205      * Highlight matches in $inputHtmlFragment and return it (without HTML header and body tag)
0206      *
0207      * @param string $inputHtmlFragment
0208      * @param string  $encoding   Input HTML string encoding
0209      * @param Zend_Search_Lucene_Search_Highlighter_Interface|null $highlighter
0210      * @return string
0211      */
0212     public function htmlFragmentHighlightMatches($inputHtmlFragment, $encoding = 'UTF-8', $highlighter = null)
0213     {
0214         if ($highlighter === null) {
0215             // require_once 'Zend/Search/Lucene/Search/Highlighter/Default.php';
0216             $highlighter = new Zend_Search_Lucene_Search_Highlighter_Default();
0217         }
0218 
0219         $inputHTML = '<html><head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head><body>'
0220                    . iconv($encoding, 'UTF-8//IGNORE', $inputHtmlFragment) . '</body></html>';
0221 
0222         /** Zend_Search_Lucene_Document_Html */
0223         // require_once 'Zend/Search/Lucene/Document/Html.php';
0224 
0225         $doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML);
0226         $highlighter->setDocument($doc);
0227 
0228         $this->_highlightMatches($highlighter);
0229 
0230         return $doc->getHtmlBody();
0231     }
0232 }
0233