File indexing completed on 2025-01-19 05:21:25
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Search_Lucene 0017 * @subpackage Index 0018 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0019 * @license http://framework.zend.com/license/new-bsd New BSD License 0020 * @version $Id$ 0021 */ 0022 0023 /** Zend_Search_Lucene_Index_SegmentInfo */ 0024 // require_once 'Zend/Search/Lucene/Index/SegmentInfo.php'; 0025 0026 0027 /** 0028 * @category Zend 0029 * @package Zend_Search_Lucene 0030 * @subpackage Index 0031 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0032 * @license http://framework.zend.com/license/new-bsd New BSD License 0033 */ 0034 class Zend_Search_Lucene_Index_SegmentMerger 0035 { 0036 /** 0037 * Target segment writer 0038 * 0039 * @var Zend_Search_Lucene_Index_SegmentWriter_StreamWriter 0040 */ 0041 private $_writer; 0042 0043 /** 0044 * Number of docs in a new segment 0045 * 0046 * @var integer 0047 */ 0048 private $_docCount; 0049 0050 /** 0051 * A set of segments to be merged 0052 * 0053 * @var array Zend_Search_Lucene_Index_SegmentInfo 0054 */ 0055 private $_segmentInfos = array(); 0056 0057 /** 0058 * Flag to signal, that merge is already done 0059 * 0060 * @var boolean 0061 */ 0062 private $_mergeDone = false; 0063 0064 /** 0065 * Field map 0066 * [<segment_name>][<field_number>] => <target_field_number> 0067 * 0068 * @var array 0069 */ 0070 private $_fieldsMap = array(); 0071 0072 0073 0074 /** 0075 * Object constructor. 0076 * 0077 * Creates new segment merger with $directory as target to merge segments into 0078 * and $name as a name of new segment 0079 * 0080 * @param Zend_Search_Lucene_Storage_Directory $directory 0081 * @param string $name 0082 */ 0083 public function __construct($directory, $name) 0084 { 0085 /** Zend_Search_Lucene_Index_SegmentWriter_StreamWriter */ 0086 // require_once 'Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php'; 0087 $this->_writer = new Zend_Search_Lucene_Index_SegmentWriter_StreamWriter($directory, $name); 0088 } 0089 0090 0091 /** 0092 * Add segmnet to a collection of segments to be merged 0093 * 0094 * @param Zend_Search_Lucene_Index_SegmentInfo $segment 0095 */ 0096 public function addSource(Zend_Search_Lucene_Index_SegmentInfo $segmentInfo) 0097 { 0098 $this->_segmentInfos[$segmentInfo->getName()] = $segmentInfo; 0099 } 0100 0101 0102 /** 0103 * Do merge. 0104 * 0105 * Returns number of documents in newly created segment 0106 * 0107 * @return Zend_Search_Lucene_Index_SegmentInfo 0108 * @throws Zend_Search_Lucene_Exception 0109 */ 0110 public function merge() 0111 { 0112 if ($this->_mergeDone) { 0113 // require_once 'Zend/Search/Lucene/Exception.php'; 0114 throw new Zend_Search_Lucene_Exception('Merge is already done.'); 0115 } 0116 0117 if (count($this->_segmentInfos) < 1) { 0118 // require_once 'Zend/Search/Lucene/Exception.php'; 0119 throw new Zend_Search_Lucene_Exception('Wrong number of segments to be merged (' 0120 . count($this->_segmentInfos) 0121 . ').'); 0122 } 0123 0124 $this->_mergeFields(); 0125 $this->_mergeNorms(); 0126 $this->_mergeStoredFields(); 0127 $this->_mergeTerms(); 0128 0129 $this->_mergeDone = true; 0130 0131 return $this->_writer->close(); 0132 } 0133 0134 0135 /** 0136 * Merge fields information 0137 */ 0138 private function _mergeFields() 0139 { 0140 foreach ($this->_segmentInfos as $segName => $segmentInfo) { 0141 foreach ($segmentInfo->getFieldInfos() as $fieldInfo) { 0142 $this->_fieldsMap[$segName][$fieldInfo->number] = $this->_writer->addFieldInfo($fieldInfo); 0143 } 0144 } 0145 } 0146 0147 /** 0148 * Merge field's normalization factors 0149 */ 0150 private function _mergeNorms() 0151 { 0152 foreach ($this->_writer->getFieldInfos() as $fieldInfo) { 0153 if ($fieldInfo->isIndexed) { 0154 foreach ($this->_segmentInfos as $segName => $segmentInfo) { 0155 if ($segmentInfo->hasDeletions()) { 0156 $srcNorm = $segmentInfo->normVector($fieldInfo->name); 0157 $norm = ''; 0158 $docs = $segmentInfo->count(); 0159 for ($count = 0; $count < $docs; $count++) { 0160 if (!$segmentInfo->isDeleted($count)) { 0161 $norm .= $srcNorm[$count]; 0162 } 0163 } 0164 $this->_writer->addNorm($fieldInfo->name, $norm); 0165 } else { 0166 $this->_writer->addNorm($fieldInfo->name, $segmentInfo->normVector($fieldInfo->name)); 0167 } 0168 } 0169 } 0170 } 0171 } 0172 0173 /** 0174 * Merge fields information 0175 */ 0176 private function _mergeStoredFields() 0177 { 0178 $this->_docCount = 0; 0179 0180 foreach ($this->_segmentInfos as $segName => $segmentInfo) { 0181 $fdtFile = $segmentInfo->openCompoundFile('.fdt'); 0182 0183 for ($count = 0; $count < $segmentInfo->count(); $count++) { 0184 $fieldCount = $fdtFile->readVInt(); 0185 $storedFields = array(); 0186 0187 for ($count2 = 0; $count2 < $fieldCount; $count2++) { 0188 $fieldNum = $fdtFile->readVInt(); 0189 $bits = $fdtFile->readByte(); 0190 $fieldInfo = $segmentInfo->getField($fieldNum); 0191 0192 if (!($bits & 2)) { // Text data 0193 $storedFields[] = 0194 new Zend_Search_Lucene_Field($fieldInfo->name, 0195 $fdtFile->readString(), 0196 'UTF-8', 0197 true, 0198 $fieldInfo->isIndexed, 0199 $bits & 1 ); 0200 } else { // Binary data 0201 $storedFields[] = 0202 new Zend_Search_Lucene_Field($fieldInfo->name, 0203 $fdtFile->readBinary(), 0204 '', 0205 true, 0206 $fieldInfo->isIndexed, 0207 $bits & 1, 0208 true); 0209 } 0210 } 0211 0212 if (!$segmentInfo->isDeleted($count)) { 0213 $this->_docCount++; 0214 $this->_writer->addStoredFields($storedFields); 0215 } 0216 } 0217 } 0218 } 0219 0220 0221 /** 0222 * Merge fields information 0223 */ 0224 private function _mergeTerms() 0225 { 0226 /** Zend_Search_Lucene_Index_TermsPriorityQueue */ 0227 // require_once 'Zend/Search/Lucene/Index/TermsPriorityQueue.php'; 0228 0229 $segmentInfoQueue = new Zend_Search_Lucene_Index_TermsPriorityQueue(); 0230 0231 $segmentStartId = 0; 0232 foreach ($this->_segmentInfos as $segName => $segmentInfo) { 0233 $segmentStartId = $segmentInfo->resetTermsStream($segmentStartId, Zend_Search_Lucene_Index_SegmentInfo::SM_MERGE_INFO); 0234 0235 // Skip "empty" segments 0236 if ($segmentInfo->currentTerm() !== null) { 0237 $segmentInfoQueue->put($segmentInfo); 0238 } 0239 } 0240 0241 $this->_writer->initializeDictionaryFiles(); 0242 0243 $termDocs = array(); 0244 while (($segmentInfo = $segmentInfoQueue->pop()) !== null) { 0245 // Merge positions array 0246 $termDocs += $segmentInfo->currentTermPositions(); 0247 0248 if ($segmentInfoQueue->top() === null || 0249 $segmentInfoQueue->top()->currentTerm()->key() != 0250 $segmentInfo->currentTerm()->key()) { 0251 // We got new term 0252 ksort($termDocs, SORT_NUMERIC); 0253 0254 // Add term if it's contained in any document 0255 if (count($termDocs) > 0) { 0256 $this->_writer->addTerm($segmentInfo->currentTerm(), $termDocs); 0257 } 0258 $termDocs = array(); 0259 } 0260 0261 $segmentInfo->nextTerm(); 0262 // check, if segment dictionary is finished 0263 if ($segmentInfo->currentTerm() !== null) { 0264 // Put segment back into the priority queue 0265 $segmentInfoQueue->put($segmentInfo); 0266 } 0267 } 0268 0269 $this->_writer->closeDictionaryFiles(); 0270 } 0271 }