File indexing completed on 2025-01-19 05:21:26
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Search_Lucene 0017 * @subpackage Index 0018 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0019 * @license http://framework.zend.com/license/new-bsd New BSD License 0020 * @version $Id$ 0021 */ 0022 0023 0024 /** Zend_Search_Lucene_LockManager */ 0025 // require_once 'Zend/Search/Lucene/LockManager.php'; 0026 0027 0028 /** 0029 * @category Zend 0030 * @package Zend_Search_Lucene 0031 * @subpackage Index 0032 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0033 * @license http://framework.zend.com/license/new-bsd New BSD License 0034 */ 0035 class Zend_Search_Lucene_Index_Writer 0036 { 0037 /** 0038 * @todo Implement Analyzer substitution 0039 * @todo Implement Zend_Search_Lucene_Storage_DirectoryRAM and Zend_Search_Lucene_Storage_FileRAM to use it for 0040 * temporary index files 0041 * @todo Directory lock processing 0042 */ 0043 0044 /** 0045 * Number of documents required before the buffered in-memory 0046 * documents are written into a new Segment 0047 * 0048 * Default value is 10 0049 * 0050 * @var integer 0051 */ 0052 public $maxBufferedDocs = 10; 0053 0054 /** 0055 * Largest number of documents ever merged by addDocument(). 0056 * Small values (e.g., less than 10,000) are best for interactive indexing, 0057 * as this limits the length of pauses while indexing to a few seconds. 0058 * Larger values are best for batched indexing and speedier searches. 0059 * 0060 * Default value is PHP_INT_MAX 0061 * 0062 * @var integer 0063 */ 0064 public $maxMergeDocs = PHP_INT_MAX; 0065 0066 /** 0067 * Determines how often segment indices are merged by addDocument(). 0068 * 0069 * With smaller values, less RAM is used while indexing, 0070 * and searches on unoptimized indices are faster, 0071 * but indexing speed is slower. 0072 * 0073 * With larger values, more RAM is used during indexing, 0074 * and while searches on unoptimized indices are slower, 0075 * indexing is faster. 0076 * 0077 * Thus larger values (> 10) are best for batch index creation, 0078 * and smaller values (< 10) for indices that are interactively maintained. 0079 * 0080 * Default value is 10 0081 * 0082 * @var integer 0083 */ 0084 public $mergeFactor = 10; 0085 0086 /** 0087 * File system adapter. 0088 * 0089 * @var Zend_Search_Lucene_Storage_Directory 0090 */ 0091 private $_directory = null; 0092 0093 0094 /** 0095 * Changes counter. 0096 * 0097 * @var integer 0098 */ 0099 private $_versionUpdate = 0; 0100 0101 /** 0102 * List of the segments, created by index writer 0103 * Array of Zend_Search_Lucene_Index_SegmentInfo objects 0104 * 0105 * @var array 0106 */ 0107 private $_newSegments = array(); 0108 0109 /** 0110 * List of segments to be deleted on commit 0111 * 0112 * @var array 0113 */ 0114 private $_segmentsToDelete = array(); 0115 0116 /** 0117 * Current segment to add documents 0118 * 0119 * @var Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter 0120 */ 0121 private $_currentSegment = null; 0122 0123 /** 0124 * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index. 0125 * 0126 * It's a reference to the corresponding Zend_Search_Lucene::$_segmentInfos array 0127 * 0128 * @var array Zend_Search_Lucene_Index_SegmentInfo 0129 */ 0130 private $_segmentInfos; 0131 0132 /** 0133 * Index target format version 0134 * 0135 * @var integer 0136 */ 0137 private $_targetFormatVersion; 0138 0139 /** 0140 * List of indexfiles extensions 0141 * 0142 * @var array 0143 */ 0144 private static $_indexExtensions = array('.cfs' => '.cfs', 0145 '.cfx' => '.cfx', 0146 '.fnm' => '.fnm', 0147 '.fdx' => '.fdx', 0148 '.fdt' => '.fdt', 0149 '.tis' => '.tis', 0150 '.tii' => '.tii', 0151 '.frq' => '.frq', 0152 '.prx' => '.prx', 0153 '.tvx' => '.tvx', 0154 '.tvd' => '.tvd', 0155 '.tvf' => '.tvf', 0156 '.del' => '.del', 0157 '.sti' => '.sti' ); 0158 0159 0160 /** 0161 * Create empty index 0162 * 0163 * @param Zend_Search_Lucene_Storage_Directory $directory 0164 * @param integer $generation 0165 * @param integer $nameCount 0166 */ 0167 public static function createIndex(Zend_Search_Lucene_Storage_Directory $directory, $generation, $nameCount) 0168 { 0169 if ($generation == 0) { 0170 // Create index in pre-2.1 mode 0171 foreach ($directory->fileList() as $file) { 0172 if ($file == 'deletable' || 0173 $file == 'segments' || 0174 isset(self::$_indexExtensions[ substr($file, strlen($file)-4)]) || 0175 preg_match('/\.f\d+$/i', $file) /* matches <segment_name>.f<decimal_nmber> file names */) { 0176 $directory->deleteFile($file); 0177 } 0178 } 0179 0180 $segmentsFile = $directory->createFile('segments'); 0181 $segmentsFile->writeInt((int)0xFFFFFFFF); 0182 0183 // write version (initialized by current time) 0184 $segmentsFile->writeLong(round(microtime(true))); 0185 0186 // write name counter 0187 $segmentsFile->writeInt($nameCount); 0188 // write segment counter 0189 $segmentsFile->writeInt(0); 0190 0191 $deletableFile = $directory->createFile('deletable'); 0192 // write counter 0193 $deletableFile->writeInt(0); 0194 } else { 0195 $genFile = $directory->createFile('segments.gen'); 0196 0197 $genFile->writeInt((int)0xFFFFFFFE); 0198 // Write generation two times 0199 $genFile->writeLong($generation); 0200 $genFile->writeLong($generation); 0201 0202 $segmentsFile = $directory->createFile(Zend_Search_Lucene::getSegmentFileName($generation)); 0203 $segmentsFile->writeInt((int)0xFFFFFFFD); 0204 0205 // write version (initialized by current time) 0206 $segmentsFile->writeLong(round(microtime(true))); 0207 0208 // write name counter 0209 $segmentsFile->writeInt($nameCount); 0210 // write segment counter 0211 $segmentsFile->writeInt(0); 0212 } 0213 } 0214 0215 /** 0216 * Open the index for writing 0217 * 0218 * @param Zend_Search_Lucene_Storage_Directory $directory 0219 * @param array $segmentInfos 0220 * @param integer $targetFormatVersion 0221 * @param Zend_Search_Lucene_Storage_File $cleanUpLock 0222 */ 0223 public function __construct(Zend_Search_Lucene_Storage_Directory $directory, &$segmentInfos, $targetFormatVersion) 0224 { 0225 $this->_directory = $directory; 0226 $this->_segmentInfos = &$segmentInfos; 0227 $this->_targetFormatVersion = $targetFormatVersion; 0228 } 0229 0230 /** 0231 * Adds a document to this index. 0232 * 0233 * @param Zend_Search_Lucene_Document $document 0234 */ 0235 public function addDocument(Zend_Search_Lucene_Document $document) 0236 { 0237 /** Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter */ 0238 // require_once 'Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php'; 0239 0240 if ($this->_currentSegment === null) { 0241 $this->_currentSegment = 0242 new Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter($this->_directory, $this->_newSegmentName()); 0243 } 0244 $this->_currentSegment->addDocument($document); 0245 0246 if ($this->_currentSegment->count() >= $this->maxBufferedDocs) { 0247 $this->commit(); 0248 } 0249 0250 $this->_maybeMergeSegments(); 0251 0252 $this->_versionUpdate++; 0253 } 0254 0255 0256 /** 0257 * Check if we have anything to merge 0258 * 0259 * @return boolean 0260 */ 0261 private function _hasAnythingToMerge() 0262 { 0263 $segmentSizes = array(); 0264 foreach ($this->_segmentInfos as $segName => $segmentInfo) { 0265 $segmentSizes[$segName] = $segmentInfo->count(); 0266 } 0267 0268 $mergePool = array(); 0269 $poolSize = 0; 0270 $sizeToMerge = $this->maxBufferedDocs; 0271 asort($segmentSizes, SORT_NUMERIC); 0272 foreach ($segmentSizes as $segName => $size) { 0273 // Check, if segment comes into a new merging block 0274 while ($size >= $sizeToMerge) { 0275 // Merge previous block if it's large enough 0276 if ($poolSize >= $sizeToMerge) { 0277 return true; 0278 } 0279 $mergePool = array(); 0280 $poolSize = 0; 0281 0282 $sizeToMerge *= $this->mergeFactor; 0283 0284 if ($sizeToMerge > $this->maxMergeDocs) { 0285 return false; 0286 } 0287 } 0288 0289 $mergePool[] = $this->_segmentInfos[$segName]; 0290 $poolSize += $size; 0291 } 0292 0293 if ($poolSize >= $sizeToMerge) { 0294 return true; 0295 } 0296 0297 return false; 0298 } 0299 0300 /** 0301 * Merge segments if necessary 0302 */ 0303 private function _maybeMergeSegments() 0304 { 0305 if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) { 0306 return; 0307 } 0308 0309 if (!$this->_hasAnythingToMerge()) { 0310 Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory); 0311 return; 0312 } 0313 0314 // Update segments list to be sure all segments are not merged yet by another process 0315 // 0316 // Segment merging functionality is concentrated in this class and surrounded 0317 // by optimization lock obtaining/releasing. 0318 // _updateSegments() refreshes segments list from the latest index generation. 0319 // So only new segments can be added to the index while we are merging some already existing 0320 // segments. 0321 // Newly added segments will be also included into the index by the _updateSegments() call 0322 // either by another process or by the current process with the commit() call at the end of _mergeSegments() method. 0323 // That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks. 0324 $this->_updateSegments(); 0325 0326 // Perform standard auto-optimization procedure 0327 $segmentSizes = array(); 0328 foreach ($this->_segmentInfos as $segName => $segmentInfo) { 0329 $segmentSizes[$segName] = $segmentInfo->count(); 0330 } 0331 0332 $mergePool = array(); 0333 $poolSize = 0; 0334 $sizeToMerge = $this->maxBufferedDocs; 0335 asort($segmentSizes, SORT_NUMERIC); 0336 foreach ($segmentSizes as $segName => $size) { 0337 // Check, if segment comes into a new merging block 0338 while ($size >= $sizeToMerge) { 0339 // Merge previous block if it's large enough 0340 if ($poolSize >= $sizeToMerge) { 0341 $this->_mergeSegments($mergePool); 0342 } 0343 $mergePool = array(); 0344 $poolSize = 0; 0345 0346 $sizeToMerge *= $this->mergeFactor; 0347 0348 if ($sizeToMerge > $this->maxMergeDocs) { 0349 Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory); 0350 return; 0351 } 0352 } 0353 0354 $mergePool[] = $this->_segmentInfos[$segName]; 0355 $poolSize += $size; 0356 } 0357 0358 if ($poolSize >= $sizeToMerge) { 0359 $this->_mergeSegments($mergePool); 0360 } 0361 0362 Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory); 0363 } 0364 0365 /** 0366 * Merge specified segments 0367 * 0368 * $segments is an array of SegmentInfo objects 0369 * 0370 * @param array $segments 0371 */ 0372 private function _mergeSegments($segments) 0373 { 0374 $newName = $this->_newSegmentName(); 0375 0376 /** Zend_Search_Lucene_Index_SegmentMerger */ 0377 // require_once 'Zend/Search/Lucene/Index/SegmentMerger.php'; 0378 $merger = new Zend_Search_Lucene_Index_SegmentMerger($this->_directory, 0379 $newName); 0380 foreach ($segments as $segmentInfo) { 0381 $merger->addSource($segmentInfo); 0382 $this->_segmentsToDelete[$segmentInfo->getName()] = $segmentInfo->getName(); 0383 } 0384 0385 $newSegment = $merger->merge(); 0386 if ($newSegment !== null) { 0387 $this->_newSegments[$newSegment->getName()] = $newSegment; 0388 } 0389 0390 $this->commit(); 0391 } 0392 0393 /** 0394 * Update segments file by adding current segment to a list 0395 * 0396 * @throws Zend_Search_Lucene_Exception 0397 */ 0398 private function _updateSegments() 0399 { 0400 // Get an exclusive index lock 0401 Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory); 0402 0403 // Write down changes for the segments 0404 foreach ($this->_segmentInfos as $segInfo) { 0405 $segInfo->writeChanges(); 0406 } 0407 0408 0409 $generation = Zend_Search_Lucene::getActualGeneration($this->_directory); 0410 $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false); 0411 $newSegmentFile = $this->_directory->createFile(Zend_Search_Lucene::getSegmentFileName(++$generation), false); 0412 0413 try { 0414 $genFile = $this->_directory->getFileObject('segments.gen', false); 0415 } catch (Zend_Search_Lucene_Exception $e) { 0416 if (strpos($e->getMessage(), 'is not readable') !== false) { 0417 $genFile = $this->_directory->createFile('segments.gen'); 0418 } else { 0419 throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e); 0420 } 0421 } 0422 0423 $genFile->writeInt((int)0xFFFFFFFE); 0424 // Write generation (first copy) 0425 $genFile->writeLong($generation); 0426 0427 try { 0428 // Write format marker 0429 if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_1) { 0430 $newSegmentFile->writeInt((int)0xFFFFFFFD); 0431 } else if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) { 0432 $newSegmentFile->writeInt((int)0xFFFFFFFC); 0433 } 0434 0435 // Read src file format identifier 0436 $format = $segmentsFile->readInt(); 0437 if ($format == (int)0xFFFFFFFF) { 0438 $srcFormat = Zend_Search_Lucene::FORMAT_PRE_2_1; 0439 } else if ($format == (int)0xFFFFFFFD) { 0440 $srcFormat = Zend_Search_Lucene::FORMAT_2_1; 0441 } else if ($format == (int)0xFFFFFFFC) { 0442 $srcFormat = Zend_Search_Lucene::FORMAT_2_3; 0443 } else { 0444 throw new Zend_Search_Lucene_Exception('Unsupported segments file format'); 0445 } 0446 0447 $version = $segmentsFile->readLong() + $this->_versionUpdate; 0448 $this->_versionUpdate = 0; 0449 $newSegmentFile->writeLong($version); 0450 0451 // Write segment name counter 0452 $newSegmentFile->writeInt($segmentsFile->readInt()); 0453 0454 // Get number of segments offset 0455 $numOfSegmentsOffset = $newSegmentFile->tell(); 0456 // Write dummy data (segment counter) 0457 $newSegmentFile->writeInt(0); 0458 0459 // Read number of segemnts 0460 $segmentsCount = $segmentsFile->readInt(); 0461 0462 $segments = array(); 0463 for ($count = 0; $count < $segmentsCount; $count++) { 0464 $segName = $segmentsFile->readString(); 0465 $segSize = $segmentsFile->readInt(); 0466 0467 if ($srcFormat == Zend_Search_Lucene::FORMAT_PRE_2_1) { 0468 // pre-2.1 index format 0469 $delGen = 0; 0470 $hasSingleNormFile = false; 0471 $numField = (int)0xFFFFFFFF; 0472 $isCompoundByte = 0; 0473 $docStoreOptions = null; 0474 } else { 0475 $delGen = $segmentsFile->readLong(); 0476 0477 if ($srcFormat == Zend_Search_Lucene::FORMAT_2_3) { 0478 $docStoreOffset = $segmentsFile->readInt(); 0479 0480 if ($docStoreOffset != (int)0xFFFFFFFF) { 0481 $docStoreSegment = $segmentsFile->readString(); 0482 $docStoreIsCompoundFile = $segmentsFile->readByte(); 0483 0484 $docStoreOptions = array('offset' => $docStoreOffset, 0485 'segment' => $docStoreSegment, 0486 'isCompound' => ($docStoreIsCompoundFile == 1)); 0487 } else { 0488 $docStoreOptions = null; 0489 } 0490 } else { 0491 $docStoreOptions = null; 0492 } 0493 0494 $hasSingleNormFile = $segmentsFile->readByte(); 0495 $numField = $segmentsFile->readInt(); 0496 0497 $normGens = array(); 0498 if ($numField != (int)0xFFFFFFFF) { 0499 for ($count1 = 0; $count1 < $numField; $count1++) { 0500 $normGens[] = $segmentsFile->readLong(); 0501 } 0502 } 0503 $isCompoundByte = $segmentsFile->readByte(); 0504 } 0505 0506 if (!in_array($segName, $this->_segmentsToDelete)) { 0507 // Load segment if necessary 0508 if (!isset($this->_segmentInfos[$segName])) { 0509 if ($isCompoundByte == 0xFF) { 0510 // The segment is not a compound file 0511 $isCompound = false; 0512 } else if ($isCompoundByte == 0x00) { 0513 // The status is unknown 0514 $isCompound = null; 0515 } else if ($isCompoundByte == 0x01) { 0516 // The segment is a compound file 0517 $isCompound = true; 0518 } 0519 0520 /** Zend_Search_Lucene_Index_SegmentInfo */ 0521 // require_once 'Zend/Search/Lucene/Index/SegmentInfo.php'; 0522 $this->_segmentInfos[$segName] = 0523 new Zend_Search_Lucene_Index_SegmentInfo($this->_directory, 0524 $segName, 0525 $segSize, 0526 $delGen, 0527 $docStoreOptions, 0528 $hasSingleNormFile, 0529 $isCompound); 0530 } else { 0531 // Retrieve actual deletions file generation number 0532 $delGen = $this->_segmentInfos[$segName]->getDelGen(); 0533 } 0534 0535 $newSegmentFile->writeString($segName); 0536 $newSegmentFile->writeInt($segSize); 0537 $newSegmentFile->writeLong($delGen); 0538 if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) { 0539 if ($docStoreOptions !== null) { 0540 $newSegmentFile->writeInt($docStoreOffset); 0541 $newSegmentFile->writeString($docStoreSegment); 0542 $newSegmentFile->writeByte($docStoreIsCompoundFile); 0543 } else { 0544 // Set DocStoreOffset to -1 0545 $newSegmentFile->writeInt((int)0xFFFFFFFF); 0546 } 0547 } else if ($docStoreOptions !== null) { 0548 // Release index write lock 0549 Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); 0550 0551 throw new Zend_Search_Lucene_Exception('Index conversion to lower format version is not supported.'); 0552 } 0553 0554 $newSegmentFile->writeByte($hasSingleNormFile); 0555 $newSegmentFile->writeInt($numField); 0556 if ($numField != (int)0xFFFFFFFF) { 0557 foreach ($normGens as $normGen) { 0558 $newSegmentFile->writeLong($normGen); 0559 } 0560 } 0561 $newSegmentFile->writeByte($isCompoundByte); 0562 0563 $segments[$segName] = $segSize; 0564 } 0565 } 0566 $segmentsFile->close(); 0567 0568 $segmentsCount = count($segments) + count($this->_newSegments); 0569 0570 foreach ($this->_newSegments as $segName => $segmentInfo) { 0571 $newSegmentFile->writeString($segName); 0572 $newSegmentFile->writeInt($segmentInfo->count()); 0573 0574 // delete file generation: -1 (there is no delete file yet) 0575 $newSegmentFile->writeInt((int)0xFFFFFFFF);$newSegmentFile->writeInt((int)0xFFFFFFFF); 0576 if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) { 0577 // docStoreOffset: -1 (segment doesn't use shared doc store) 0578 $newSegmentFile->writeInt((int)0xFFFFFFFF); 0579 } 0580 // HasSingleNormFile 0581 $newSegmentFile->writeByte($segmentInfo->hasSingleNormFile()); 0582 // NumField 0583 $newSegmentFile->writeInt((int)0xFFFFFFFF); 0584 // IsCompoundFile 0585 $newSegmentFile->writeByte($segmentInfo->isCompound() ? 1 : -1); 0586 0587 $segments[$segmentInfo->getName()] = $segmentInfo->count(); 0588 $this->_segmentInfos[$segName] = $segmentInfo; 0589 } 0590 $this->_newSegments = array(); 0591 0592 $newSegmentFile->seek($numOfSegmentsOffset); 0593 $newSegmentFile->writeInt($segmentsCount); // Update segments count 0594 $newSegmentFile->close(); 0595 } catch (Exception $e) { 0596 /** Restore previous index generation */ 0597 $generation--; 0598 $genFile->seek(4, SEEK_SET); 0599 // Write generation number twice 0600 $genFile->writeLong($generation); $genFile->writeLong($generation); 0601 0602 // Release index write lock 0603 Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); 0604 0605 // Throw the exception 0606 // require_once 'Zend/Search/Lucene/Exception.php'; 0607 throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e); 0608 } 0609 0610 // Write generation (second copy) 0611 $genFile->writeLong($generation); 0612 0613 0614 // Check if another update or read process is not running now 0615 // If yes, skip clean-up procedure 0616 if (Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory)) { 0617 /** 0618 * Clean-up directory 0619 */ 0620 $filesToDelete = array(); 0621 $filesTypes = array(); 0622 $filesNumbers = array(); 0623 0624 // list of .del files of currently used segments 0625 // each segment can have several generations of .del files 0626 // only last should not be deleted 0627 $delFiles = array(); 0628 0629 foreach ($this->_directory->fileList() as $file) { 0630 if ($file == 'deletable') { 0631 // 'deletable' file 0632 $filesToDelete[] = $file; 0633 $filesTypes[] = 0; // delete this file first, since it's not used starting from Lucene v2.1 0634 $filesNumbers[] = 0; 0635 } else if ($file == 'segments') { 0636 // 'segments' file 0637 $filesToDelete[] = $file; 0638 $filesTypes[] = 1; // second file to be deleted "zero" version of segments file (Lucene pre-2.1) 0639 $filesNumbers[] = 0; 0640 } else if (preg_match('/^segments_[a-zA-Z0-9]+$/i', $file)) { 0641 // 'segments_xxx' file 0642 // Check if it's not a just created generation file 0643 if ($file != Zend_Search_Lucene::getSegmentFileName($generation)) { 0644 $filesToDelete[] = $file; 0645 $filesTypes[] = 2; // first group of files for deletions 0646 $filesNumbers[] = (int)base_convert(substr($file, 9), 36, 10); // ordered by segment generation numbers 0647 } 0648 } else if (preg_match('/(^_([a-zA-Z0-9]+))\.f\d+$/i', $file, $matches)) { 0649 // one of per segment files ('<segment_name>.f<decimal_number>') 0650 // Check if it's not one of the segments in the current segments set 0651 if (!isset($segments[$matches[1]])) { 0652 $filesToDelete[] = $file; 0653 $filesTypes[] = 3; // second group of files for deletions 0654 $filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number 0655 } 0656 } else if (preg_match('/(^_([a-zA-Z0-9]+))(_([a-zA-Z0-9]+))\.del$/i', $file, $matches)) { 0657 // one of per segment files ('<segment_name>_<del_generation>.del' where <segment_name> is '_<segment_number>') 0658 // Check if it's not one of the segments in the current segments set 0659 if (!isset($segments[$matches[1]])) { 0660 $filesToDelete[] = $file; 0661 $filesTypes[] = 3; // second group of files for deletions 0662 $filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number 0663 } else { 0664 $segmentNumber = (int)base_convert($matches[2], 36, 10); 0665 $delGeneration = (int)base_convert($matches[4], 36, 10); 0666 if (!isset($delFiles[$segmentNumber])) { 0667 $delFiles[$segmentNumber] = array(); 0668 } 0669 $delFiles[$segmentNumber][$delGeneration] = $file; 0670 } 0671 } else if (isset(self::$_indexExtensions[substr($file, strlen($file)-4)])) { 0672 // one of per segment files ('<segment_name>.<ext>') 0673 $segmentName = substr($file, 0, strlen($file) - 4); 0674 // Check if it's not one of the segments in the current segments set 0675 if (!isset($segments[$segmentName]) && 0676 ($this->_currentSegment === null || $this->_currentSegment->getName() != $segmentName)) { 0677 $filesToDelete[] = $file; 0678 $filesTypes[] = 3; // second group of files for deletions 0679 $filesNumbers[] = (int)base_convert(substr($file, 1 /* skip '_' */, strlen($file)-5), 36, 10); // order by segment number 0680 } 0681 } 0682 } 0683 0684 $maxGenNumber = 0; 0685 // process .del files of currently used segments 0686 foreach ($delFiles as $segmentNumber => $segmentDelFiles) { 0687 ksort($delFiles[$segmentNumber], SORT_NUMERIC); 0688 array_pop($delFiles[$segmentNumber]); // remove last delete file generation from candidates for deleting 0689 0690 end($delFiles[$segmentNumber]); 0691 $lastGenNumber = key($delFiles[$segmentNumber]); 0692 if ($lastGenNumber > $maxGenNumber) { 0693 $maxGenNumber = $lastGenNumber; 0694 } 0695 } 0696 foreach ($delFiles as $segmentNumber => $segmentDelFiles) { 0697 foreach ($segmentDelFiles as $delGeneration => $file) { 0698 $filesToDelete[] = $file; 0699 $filesTypes[] = 4; // third group of files for deletions 0700 $filesNumbers[] = $segmentNumber*$maxGenNumber + $delGeneration; // order by <segment_number>,<del_generation> pair 0701 } 0702 } 0703 0704 // Reorder files for deleting 0705 array_multisort($filesTypes, SORT_ASC, SORT_NUMERIC, 0706 $filesNumbers, SORT_ASC, SORT_NUMERIC, 0707 $filesToDelete, SORT_ASC, SORT_STRING); 0708 0709 foreach ($filesToDelete as $file) { 0710 try { 0711 /** Skip shared docstore segments deleting */ 0712 /** @todo Process '.cfx' files to check if them are already unused */ 0713 if (substr($file, strlen($file)-4) != '.cfx') { 0714 $this->_directory->deleteFile($file); 0715 } 0716 } catch (Zend_Search_Lucene_Exception $e) { 0717 if (strpos($e->getMessage(), 'Can\'t delete file') === false) { 0718 // That's not "file is under processing or already deleted" exception 0719 // Pass it through 0720 throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e); 0721 } 0722 } 0723 } 0724 0725 // Return read lock into the previous state 0726 Zend_Search_Lucene_LockManager::deEscalateReadLock($this->_directory); 0727 } else { 0728 // Only release resources if another index reader is running now 0729 foreach ($this->_segmentsToDelete as $segName) { 0730 foreach (self::$_indexExtensions as $ext) { 0731 $this->_directory->purgeFile($segName . $ext); 0732 } 0733 } 0734 } 0735 0736 // Clean-up _segmentsToDelete container 0737 $this->_segmentsToDelete = array(); 0738 0739 0740 // Release index write lock 0741 Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); 0742 0743 // Remove unused segments from segments list 0744 foreach ($this->_segmentInfos as $segName => $segmentInfo) { 0745 if (!isset($segments[$segName])) { 0746 unset($this->_segmentInfos[$segName]); 0747 } 0748 } 0749 } 0750 0751 /** 0752 * Commit current changes 0753 */ 0754 public function commit() 0755 { 0756 if ($this->_currentSegment !== null) { 0757 $newSegment = $this->_currentSegment->close(); 0758 if ($newSegment !== null) { 0759 $this->_newSegments[$newSegment->getName()] = $newSegment; 0760 } 0761 $this->_currentSegment = null; 0762 } 0763 0764 $this->_updateSegments(); 0765 } 0766 0767 0768 /** 0769 * Merges the provided indexes into this index. 0770 * 0771 * @param array $readers 0772 * @return void 0773 */ 0774 public function addIndexes($readers) 0775 { 0776 /** 0777 * @todo implementation 0778 */ 0779 } 0780 0781 /** 0782 * Merges all segments together into new one 0783 * 0784 * Returns true on success and false if another optimization or auto-optimization process 0785 * is running now 0786 * 0787 * @return boolean 0788 */ 0789 public function optimize() 0790 { 0791 if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) { 0792 return false; 0793 } 0794 0795 // Update segments list to be sure all segments are not merged yet by another process 0796 // 0797 // Segment merging functionality is concentrated in this class and surrounded 0798 // by optimization lock obtaining/releasing. 0799 // _updateSegments() refreshes segments list from the latest index generation. 0800 // So only new segments can be added to the index while we are merging some already existing 0801 // segments. 0802 // Newly added segments will be also included into the index by the _updateSegments() call 0803 // either by another process or by the current process with the commit() call at the end of _mergeSegments() method. 0804 // That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks. 0805 $this->_updateSegments(); 0806 0807 $this->_mergeSegments($this->_segmentInfos); 0808 0809 Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory); 0810 0811 return true; 0812 } 0813 0814 /** 0815 * Get name for new segment 0816 * 0817 * @return string 0818 */ 0819 private function _newSegmentName() 0820 { 0821 Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory); 0822 0823 $generation = Zend_Search_Lucene::getActualGeneration($this->_directory); 0824 $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false); 0825 0826 $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version) 0827 $segmentNameCounter = $segmentsFile->readInt(); 0828 0829 $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version) 0830 $segmentsFile->writeInt($segmentNameCounter + 1); 0831 0832 // Flash output to guarantee that wrong value will not be loaded between unlock and 0833 // return (which calls $segmentsFile destructor) 0834 $segmentsFile->flush(); 0835 0836 Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); 0837 0838 return '_' . base_convert($segmentNameCounter, 10, 36); 0839 } 0840 0841 }