File indexing completed on 2025-03-02 05:29:43

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Search_Lucene
0017  * @subpackage Index
0018  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0019  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0020  * @version    $Id$
0021  */
0022 
0023 /** Zend_Search_Lucene_Index_TermsStream_Interface */
0024 // require_once 'Zend/Search/Lucene/Index/TermsStream/Interface.php';
0025 
0026 
0027 /** Zend_Search_Lucene_Search_Similarity */
0028 // require_once 'Zend/Search/Lucene/Search/Similarity.php';
0029 
0030 /** Zend_Search_Lucene_Index_FieldInfo */
0031 // require_once 'Zend/Search/Lucene/Index/FieldInfo.php';
0032 
0033 /** Zend_Search_Lucene_Index_Term */
0034 // require_once 'Zend/Search/Lucene/Index/Term.php';
0035 
0036 /** Zend_Search_Lucene_Index_TermInfo */
0037 // require_once 'Zend/Search/Lucene/Index/TermInfo.php';
0038 
0039 /**
0040  * @category   Zend
0041  * @package    Zend_Search_Lucene
0042  * @subpackage Index
0043  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0044  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0045  */
0046 class Zend_Search_Lucene_Index_SegmentInfo implements Zend_Search_Lucene_Index_TermsStream_Interface
0047 {
0048     /**
0049      * "Full scan vs fetch" boundary.
0050      *
0051      * If filter selectivity is less than this value, then full scan is performed
0052      * (since term entries fetching has some additional overhead).
0053      */
0054     const FULL_SCAN_VS_FETCH_BOUNDARY = 5;
0055 
0056     /**
0057      * Number of docs in a segment
0058      *
0059      * @var integer
0060      */
0061     private $_docCount;
0062 
0063     /**
0064      * Segment name
0065      *
0066      * @var string
0067      */
0068     private $_name;
0069 
0070     /**
0071      * Term Dictionary Index
0072      *
0073      * Array of arrays (Zend_Search_Lucene_Index_Term objects are represented as arrays because
0074      * of performance considerations)
0075      * [0] -> $termValue
0076      * [1] -> $termFieldNum
0077      *
0078      * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
0079      *
0080      * @var array
0081      */
0082     private $_termDictionary;
0083 
0084     /**
0085      * Term Dictionary Index TermInfos
0086      *
0087      * Array of arrays (Zend_Search_Lucene_Index_TermInfo objects are represented as arrays because
0088      * of performance considerations)
0089      * [0] -> $docFreq
0090      * [1] -> $freqPointer
0091      * [2] -> $proxPointer
0092      * [3] -> $skipOffset
0093      * [4] -> $indexPointer
0094      *
0095      * @var array
0096      */
0097     private $_termDictionaryInfos;
0098 
0099     /**
0100      * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
0101      *
0102      * @var array
0103      */
0104     private $_fields;
0105 
0106     /**
0107      * Field positions in a dictionary.
0108      * (Term dictionary contains filelds ordered by names)
0109      *
0110      * @var array
0111      */
0112     private $_fieldsDicPositions;
0113 
0114 
0115     /**
0116      * Associative array where the key is the file name and the value is data offset
0117      * in a compound segment file (.csf).
0118      *
0119      * @var array
0120      */
0121     private $_segFiles;
0122 
0123     /**
0124      * Associative array where the key is the file name and the value is file size (.csf).
0125      *
0126      * @var array
0127      */
0128     private $_segFileSizes;
0129 
0130     /**
0131      * Delete file generation number
0132      *
0133      * -2 means autodetect latest delete generation
0134      * -1 means 'there is no delete file'
0135      *  0 means pre-2.1 format delete file
0136      *  X specifies used delete file
0137      *
0138      * @var integer
0139      */
0140     private $_delGen;
0141 
0142     /**
0143      * Segment has single norms file
0144      *
0145      * If true then one .nrm file is used for all fields
0146      * Otherwise .fN files are used
0147      *
0148      * @var boolean
0149      */
0150     private $_hasSingleNormFile;
0151 
0152     /**
0153      * Use compound segment file (*.cfs) to collect all other segment files
0154      * (excluding .del files)
0155      *
0156      * @var boolean
0157      */
0158     private $_isCompound;
0159 
0160 
0161     /**
0162      * File system adapter.
0163      *
0164      * @var Zend_Search_Lucene_Storage_Directory_Filesystem
0165      */
0166     private $_directory;
0167 
0168     /**
0169      * Normalization factors.
0170      * An array fieldName => normVector
0171      * normVector is a binary string.
0172      * Each byte corresponds to an indexed document in a segment and
0173      * encodes normalization factor (float value, encoded by
0174      * Zend_Search_Lucene_Search_Similarity::encodeNorm())
0175      *
0176      * @var array
0177      */
0178     private $_norms = array();
0179 
0180     /**
0181      * List of deleted documents.
0182      * bitset if bitset extension is loaded or array otherwise.
0183      *
0184      * @var mixed
0185      */
0186     private $_deleted = null;
0187 
0188     /**
0189      * $this->_deleted update flag
0190      *
0191      * @var boolean
0192      */
0193     private $_deletedDirty = false;
0194 
0195     /**
0196      * True if segment uses shared doc store
0197      *
0198      * @var boolean
0199      */
0200     private $_usesSharedDocStore;
0201 
0202     /*
0203      * Shared doc store options.
0204      * It's an assotiative array with the following items:
0205      * - 'offset'     => $docStoreOffset           The starting document in the shared doc store files where this segment's documents begin
0206      * - 'segment'    => $docStoreSegment          The name of the segment that has the shared doc store files.
0207      * - 'isCompound' => $docStoreIsCompoundFile   True, if compound file format is used for the shared doc store files (.cfx file).
0208      */
0209     private $_sharedDocStoreOptions;
0210 
0211 
0212     /**
0213      * Zend_Search_Lucene_Index_SegmentInfo constructor
0214      *
0215      * @param Zend_Search_Lucene_Storage_Directory $directory
0216      * @param string     $name
0217      * @param integer    $docCount
0218      * @param integer    $delGen
0219      * @param array|null $docStoreOptions
0220      * @param boolean    $hasSingleNormFile
0221      * @param boolean    $isCompound
0222      */
0223     public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $docStoreOptions = null, $hasSingleNormFile = false, $isCompound = null)
0224     {
0225         $this->_directory = $directory;
0226         $this->_name      = $name;
0227         $this->_docCount  = $docCount;
0228 
0229         if ($docStoreOptions !== null) {
0230             $this->_usesSharedDocStore    = true;
0231             $this->_sharedDocStoreOptions = $docStoreOptions;
0232 
0233             if ($docStoreOptions['isCompound']) {
0234                 $cfxFile       = $this->_directory->getFileObject($docStoreOptions['segment'] . '.cfx');
0235                 $cfxFilesCount = $cfxFile->readVInt();
0236 
0237                 $cfxFiles     = array();
0238                 $cfxFileSizes = array();
0239 
0240                 for ($count = 0; $count < $cfxFilesCount; $count++) {
0241                     $dataOffset = $cfxFile->readLong();
0242                     if ($count != 0) {
0243                         $cfxFileSizes[$fileName] = $dataOffset - end($cfxFiles);
0244                     }
0245                     $fileName            = $cfxFile->readString();
0246                     $cfxFiles[$fileName] = $dataOffset;
0247                 }
0248                 if ($count != 0) {
0249                     $cfxFileSizes[$fileName] = $this->_directory->fileLength($docStoreOptions['segment'] . '.cfx') - $dataOffset;
0250                 }
0251 
0252                 $this->_sharedDocStoreOptions['files']     = $cfxFiles;
0253                 $this->_sharedDocStoreOptions['fileSizes'] = $cfxFileSizes;
0254             }
0255         }
0256 
0257         $this->_hasSingleNormFile = $hasSingleNormFile;
0258         $this->_delGen            = $delGen;
0259         $this->_termDictionary    = null;
0260 
0261 
0262         if ($isCompound !== null) {
0263             $this->_isCompound    = $isCompound;
0264         } else {
0265             // It's a pre-2.1 segment or isCompound is set to 'unknown'
0266             // Detect if segment uses compound file
0267             // require_once 'Zend/Search/Lucene/Exception.php';
0268             try {
0269                 // Try to open compound file
0270                 $this->_directory->getFileObject($name . '.cfs');
0271 
0272                 // Compound file is found
0273                 $this->_isCompound = true;
0274             } catch (Zend_Search_Lucene_Exception $e) {
0275                 if (strpos($e->getMessage(), 'is not readable') !== false) {
0276                     // Compound file is not found or is not readable
0277                     $this->_isCompound = false;
0278                 } else {
0279                     throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
0280                 }
0281             }
0282         }
0283 
0284         $this->_segFiles = array();
0285         if ($this->_isCompound) {
0286             $cfsFile = $this->_directory->getFileObject($name . '.cfs');
0287             $segFilesCount = $cfsFile->readVInt();
0288 
0289             for ($count = 0; $count < $segFilesCount; $count++) {
0290                 $dataOffset = $cfsFile->readLong();
0291                 if ($count != 0) {
0292                     $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles);
0293                 }
0294                 $fileName = $cfsFile->readString();
0295                 $this->_segFiles[$fileName] = $dataOffset;
0296             }
0297             if ($count != 0) {
0298                 $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset;
0299             }
0300         }
0301 
0302         $fnmFile = $this->openCompoundFile('.fnm');
0303         $fieldsCount = $fnmFile->readVInt();
0304         $fieldNames = array();
0305         $fieldNums  = array();
0306         $this->_fields = array();
0307 
0308         for ($count=0; $count < $fieldsCount; $count++) {
0309             $fieldName = $fnmFile->readString();
0310             $fieldBits = $fnmFile->readByte();
0311             $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
0312                                                                             $fieldBits & 0x01 /* field is indexed */,
0313                                                                             $count,
0314                                                                             $fieldBits & 0x02 /* termvectors are stored */,
0315                                                                             $fieldBits & 0x10 /* norms are omitted */,
0316                                                                             $fieldBits & 0x20 /* payloads are stored */);
0317             if ($fieldBits & 0x10) {
0318                 // norms are omitted for the indexed field
0319                 $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
0320             }
0321 
0322             $fieldNums[$count]  = $count;
0323             $fieldNames[$count] = $fieldName;
0324         }
0325         array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
0326         $this->_fieldsDicPositions = array_flip($fieldNums);
0327 
0328         if ($this->_delGen == -2) {
0329             // SegmentInfo constructor is invoked from index writer
0330             // Autodetect current delete file generation number
0331             $this->_delGen = $this->_detectLatestDelGen();
0332         }
0333 
0334         // Load deletions
0335         $this->_deleted = $this->_loadDelFile();
0336     }
0337 
0338     /**
0339      * Load detetions file
0340      *
0341      * Returns bitset or an array depending on bitset extension availability
0342      *
0343      * @return mixed
0344      * @throws Zend_Search_Lucene_Exception
0345      */
0346     private function _loadDelFile()
0347     {
0348         if ($this->_delGen == -1) {
0349             // There is no delete file for this segment
0350             return null;
0351         } else if ($this->_delGen == 0) {
0352             // It's a segment with pre-2.1 format delete file
0353             // Try to load deletions file
0354             return $this->_loadPre21DelFile();
0355         } else {
0356             // It's 2.1+ format deleteions file
0357             return $this->_load21DelFile();
0358         }
0359     }
0360 
0361     /**
0362      * Load pre-2.1 detetions file
0363      *
0364      * Returns bitset or an array depending on bitset extension availability
0365      *
0366      * @return mixed
0367      * @throws Zend_Search_Lucene_Exception
0368      */
0369     private function _loadPre21DelFile()
0370     {
0371         // require_once 'Zend/Search/Lucene/Exception.php';
0372         try {
0373             // '.del' files always stored in a separate file
0374             // Segment compound is not used
0375             $delFile = $this->_directory->getFileObject($this->_name . '.del');
0376 
0377             $byteCount = $delFile->readInt();
0378             $byteCount = ceil($byteCount/8);
0379             $bitCount  = $delFile->readInt();
0380 
0381             if ($bitCount == 0) {
0382                 $delBytes = '';
0383             } else {
0384                 $delBytes = $delFile->readBytes($byteCount);
0385             }
0386 
0387             if (extension_loaded('bitset')) {
0388                 return $delBytes;
0389             } else {
0390                 $deletions = array();
0391                 for ($count = 0; $count < $byteCount; $count++) {
0392                     $byte = ord($delBytes[$count]);
0393                     for ($bit = 0; $bit < 8; $bit++) {
0394                         if ($byte & (1<<$bit)) {
0395                             $deletions[$count*8 + $bit] = 1;
0396                         }
0397                     }
0398                 }
0399 
0400                 return $deletions;
0401             }
0402         } catch(Zend_Search_Lucene_Exception $e) {
0403             if (strpos($e->getMessage(), 'is not readable') === false) {
0404                 throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
0405             }
0406             // There is no deletion file
0407             $this->_delGen = -1;
0408 
0409             return null;
0410         }
0411     }
0412 
0413     /**
0414      * Load 2.1+ format detetions file
0415      *
0416      * Returns bitset or an array depending on bitset extension availability
0417      *
0418      * @return mixed
0419      */
0420     private function _load21DelFile()
0421     {
0422         $delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
0423 
0424         $format = $delFile->readInt();
0425 
0426         if ($format == (int)0xFFFFFFFF) {
0427             if (extension_loaded('bitset')) {
0428                 $deletions = bitset_empty();
0429             } else {
0430                 $deletions = array();
0431             }
0432 
0433             $byteCount = $delFile->readInt();
0434             $bitCount  = $delFile->readInt();
0435 
0436             $delFileSize = $this->_directory->fileLength($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
0437             $byteNum = 0;
0438 
0439             do {
0440                 $dgap = $delFile->readVInt();
0441                 $nonZeroByte = $delFile->readByte();
0442 
0443                 $byteNum += $dgap;
0444 
0445 
0446                 if (extension_loaded('bitset')) {
0447                     for ($bit = 0; $bit < 8; $bit++) {
0448                         if ($nonZeroByte & (1<<$bit)) {
0449                             bitset_incl($deletions, $byteNum*8 + $bit);
0450                         }
0451                     }
0452                     return $deletions;
0453                 } else {
0454                     for ($bit = 0; $bit < 8; $bit++) {
0455                         if ($nonZeroByte & (1<<$bit)) {
0456                             $deletions[$byteNum*8 + $bit] = 1;
0457                         }
0458                     }
0459                     return (count($deletions) > 0) ? $deletions : null;
0460                 }
0461 
0462             } while ($delFile->tell() < $delFileSize);
0463         } else {
0464             // $format is actually byte count
0465             $byteCount = ceil($format/8);
0466             $bitCount  = $delFile->readInt();
0467 
0468             if ($bitCount == 0) {
0469                 $delBytes = '';
0470             } else {
0471                 $delBytes = $delFile->readBytes($byteCount);
0472             }
0473 
0474             if (extension_loaded('bitset')) {
0475                 return $delBytes;
0476             } else {
0477                 $deletions = array();
0478                 for ($count = 0; $count < $byteCount; $count++) {
0479                     $byte = ord($delBytes[$count]);
0480                     for ($bit = 0; $bit < 8; $bit++) {
0481                         if ($byte & (1<<$bit)) {
0482                             $deletions[$count*8 + $bit] = 1;
0483                         }
0484                     }
0485                 }
0486 
0487                 return (count($deletions) > 0) ? $deletions : null;
0488             }
0489         }
0490     }
0491 
0492     /**
0493      * Opens index file stoted within compound index file
0494      *
0495      * @param string $extension
0496      * @param boolean $shareHandler
0497      * @throws Zend_Search_Lucene_Exception
0498      * @return Zend_Search_Lucene_Storage_File
0499      */
0500     public function openCompoundFile($extension, $shareHandler = true)
0501     {
0502         if (($extension == '.fdx'  || $extension == '.fdt')  &&  $this->_usesSharedDocStore) {
0503             $fdxFName = $this->_sharedDocStoreOptions['segment'] . '.fdx';
0504             $fdtFName = $this->_sharedDocStoreOptions['segment'] . '.fdt';
0505 
0506             if (!$this->_sharedDocStoreOptions['isCompound']) {
0507                 $fdxFile = $this->_directory->getFileObject($fdxFName, $shareHandler);
0508                 $fdxFile->seek($this->_sharedDocStoreOptions['offset']*8, SEEK_CUR);
0509 
0510                 if ($extension == '.fdx') {
0511                     // '.fdx' file is requested
0512                     return $fdxFile;
0513                 } else {
0514                     // '.fdt' file is requested
0515                     $fdtStartOffset = $fdxFile->readLong();
0516 
0517                     $fdtFile = $this->_directory->getFileObject($fdtFName, $shareHandler);
0518                     $fdtFile->seek($fdtStartOffset, SEEK_CUR);
0519 
0520                     return $fdtFile;
0521                 }
0522             }
0523 
0524             if( !isset($this->_sharedDocStoreOptions['files'][$fdxFName]) ) {
0525                 // require_once 'Zend/Search/Lucene/Exception.php';
0526                 throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
0527                                        . $fdxFName . ' file.' );
0528             }
0529             if( !isset($this->_sharedDocStoreOptions['files'][$fdtFName]) ) {
0530                 // require_once 'Zend/Search/Lucene/Exception.php';
0531                 throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
0532                                        . $fdtFName . ' file.' );
0533             }
0534 
0535             // Open shared docstore segment file
0536             $cfxFile = $this->_directory->getFileObject($this->_sharedDocStoreOptions['segment'] . '.cfx', $shareHandler);
0537             // Seek to the start of '.fdx' file within compound file
0538             $cfxFile->seek($this->_sharedDocStoreOptions['files'][$fdxFName]);
0539             // Seek to the start of current segment documents section
0540             $cfxFile->seek($this->_sharedDocStoreOptions['offset']*8, SEEK_CUR);
0541 
0542             if ($extension == '.fdx') {
0543                 // '.fdx' file is requested
0544                 return $cfxFile;
0545             } else {
0546                 // '.fdt' file is requested
0547                 $fdtStartOffset = $cfxFile->readLong();
0548 
0549                 // Seek to the start of '.fdt' file within compound file
0550                 $cfxFile->seek($this->_sharedDocStoreOptions['files'][$fdtFName]);
0551                 // Seek to the start of current segment documents section
0552                 $cfxFile->seek($fdtStartOffset, SEEK_CUR);
0553 
0554                 return $fdtFile;
0555             }
0556         }
0557 
0558         $filename = $this->_name . $extension;
0559 
0560         if (!$this->_isCompound) {
0561             return $this->_directory->getFileObject($filename, $shareHandler);
0562         }
0563 
0564         if( !isset($this->_segFiles[$filename]) ) {
0565             // require_once 'Zend/Search/Lucene/Exception.php';
0566             throw new Zend_Search_Lucene_Exception('Segment compound file doesn\'t contain '
0567                                        . $filename . ' file.' );
0568         }
0569 
0570         $file = $this->_directory->getFileObject($this->_name . '.cfs', $shareHandler);
0571         $file->seek($this->_segFiles[$filename]);
0572         return $file;
0573     }
0574 
0575     /**
0576      * Get compound file length
0577      *
0578      * @param string $extension
0579      * @return integer
0580      */
0581     public function compoundFileLength($extension)
0582     {
0583         if (($extension == '.fdx'  || $extension == '.fdt')  &&  $this->_usesSharedDocStore) {
0584             $filename = $this->_sharedDocStoreOptions['segment'] . $extension;
0585 
0586             if (!$this->_sharedDocStoreOptions['isCompound']) {
0587                 return $this->_directory->fileLength($filename);
0588             }
0589 
0590             if( !isset($this->_sharedDocStoreOptions['fileSizes'][$filename]) ) {
0591                 // require_once 'Zend/Search/Lucene/Exception.php';
0592                 throw new Zend_Search_Lucene_Exception('Shared doc store compound file doesn\'t contain '
0593                                            . $filename . ' file.' );
0594             }
0595 
0596             return $this->_sharedDocStoreOptions['fileSizes'][$filename];
0597         }
0598 
0599 
0600         $filename = $this->_name . $extension;
0601 
0602         // Try to get common file first
0603         if ($this->_directory->fileExists($filename)) {
0604             return $this->_directory->fileLength($filename);
0605         }
0606 
0607         if( !isset($this->_segFileSizes[$filename]) ) {
0608             // require_once 'Zend/Search/Lucene/Exception.php';
0609             throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
0610                                        . $filename . ' file.' );
0611         }
0612 
0613         return $this->_segFileSizes[$filename];
0614     }
0615 
0616     /**
0617      * Returns field index or -1 if field is not found
0618      *
0619      * @param string $fieldName
0620      * @return integer
0621      */
0622     public function getFieldNum($fieldName)
0623     {
0624         foreach( $this->_fields as $field ) {
0625             if( $field->name == $fieldName ) {
0626                 return $field->number;
0627             }
0628         }
0629 
0630         return -1;
0631     }
0632 
0633     /**
0634      * Returns field info for specified field
0635      *
0636      * @param integer $fieldNum
0637      * @return Zend_Search_Lucene_Index_FieldInfo
0638      */
0639     public function getField($fieldNum)
0640     {
0641         return $this->_fields[$fieldNum];
0642     }
0643 
0644     /**
0645      * Returns array of fields.
0646      * if $indexed parameter is true, then returns only indexed fields.
0647      *
0648      * @param boolean $indexed
0649      * @return array
0650      */
0651     public function getFields($indexed = false)
0652     {
0653         $result = array();
0654         foreach( $this->_fields as $field ) {
0655             if( (!$indexed) || $field->isIndexed ) {
0656                 $result[ $field->name ] = $field->name;
0657             }
0658         }
0659         return $result;
0660     }
0661 
0662     /**
0663      * Returns array of FieldInfo objects.
0664      *
0665      * @return array
0666      */
0667     public function getFieldInfos()
0668     {
0669         return $this->_fields;
0670     }
0671 
0672     /**
0673      * Returns actual deletions file generation number.
0674      *
0675      * @return integer
0676      */
0677     public function getDelGen()
0678     {
0679         return $this->_delGen;
0680     }
0681 
0682     /**
0683      * Returns the total number of documents in this segment (including deleted documents).
0684      *
0685      * @return integer
0686      */
0687     public function count()
0688     {
0689         return $this->_docCount;
0690     }
0691 
0692     /**
0693      * Returns number of deleted documents.
0694      *
0695      * @return integer
0696      */
0697     private function _deletedCount()
0698     {
0699         if ($this->_deleted === null) {
0700             return 0;
0701         }
0702 
0703         if (extension_loaded('bitset')) {
0704             return count(bitset_to_array($this->_deleted));
0705         } else {
0706             return count($this->_deleted);
0707         }
0708     }
0709 
0710     /**
0711      * Returns the total number of non-deleted documents in this segment.
0712      *
0713      * @return integer
0714      */
0715     public function numDocs()
0716     {
0717         if ($this->hasDeletions()) {
0718             return $this->_docCount - $this->_deletedCount();
0719         } else {
0720             return $this->_docCount;
0721         }
0722     }
0723 
0724     /**
0725      * Get field position in a fields dictionary
0726      *
0727      * @param integer $fieldNum
0728      * @return integer
0729      */
0730     private function _getFieldPosition($fieldNum) {
0731         // Treat values which are not in a translation table as a 'direct value'
0732         return isset($this->_fieldsDicPositions[$fieldNum]) ?
0733                            $this->_fieldsDicPositions[$fieldNum] : $fieldNum;
0734     }
0735 
0736     /**
0737      * Return segment name
0738      *
0739      * @return string
0740      */
0741     public function getName()
0742     {
0743         return $this->_name;
0744     }
0745 
0746 
0747     /**
0748      * TermInfo cache
0749      *
0750      * Size is 1024.
0751      * Numbers are used instead of class constants because of performance considerations
0752      *
0753      * @var array
0754      */
0755     private $_termInfoCache = array();
0756 
0757     private function _cleanUpTermInfoCache()
0758     {
0759         // Clean 256 term infos
0760         foreach ($this->_termInfoCache as $key => $termInfo) {
0761             unset($this->_termInfoCache[$key]);
0762 
0763             // leave 768 last used term infos
0764             if (count($this->_termInfoCache) == 768) {
0765                 break;
0766             }
0767         }
0768     }
0769 
0770     /**
0771      * Load terms dictionary index
0772      *
0773      * @throws Zend_Search_Lucene_Exception
0774      */
0775     private function _loadDictionaryIndex()
0776     {
0777         // Check, if index is already serialized
0778         if ($this->_directory->fileExists($this->_name . '.sti')) {
0779             // Load serialized dictionary index data
0780             $stiFile = $this->_directory->getFileObject($this->_name . '.sti');
0781             $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));
0782 
0783             // Load dictionary index data
0784             if (($unserializedData = @unserialize($stiFileData)) !== false) {
0785                 list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData;
0786                 return;
0787             }
0788         }
0789 
0790         // Load data from .tii file and generate .sti file
0791 
0792         // Prefetch dictionary index data
0793         $tiiFile = $this->openCompoundFile('.tii');
0794         $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));
0795 
0796         /** Zend_Search_Lucene_Index_DictionaryLoader */
0797         // require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';
0798 
0799         // Load dictionary index data
0800         list($this->_termDictionary, $this->_termDictionaryInfos) =
0801                     Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);
0802 
0803         $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));
0804         $stiFile = $this->_directory->createFile($this->_name . '.sti');
0805         $stiFile->writeBytes($stiFileData);
0806     }
0807 
0808     /**
0809      * Scans terms dictionary and returns term info
0810      *
0811      * @param Zend_Search_Lucene_Index_Term $term
0812      * @return Zend_Search_Lucene_Index_TermInfo
0813      */
0814     public function getTermInfo(Zend_Search_Lucene_Index_Term $term)
0815     {
0816         $termKey = $term->key();
0817         if (isset($this->_termInfoCache[$termKey])) {
0818             $termInfo = $this->_termInfoCache[$termKey];
0819 
0820             // Move termInfo to the end of cache
0821             unset($this->_termInfoCache[$termKey]);
0822             $this->_termInfoCache[$termKey] = $termInfo;
0823 
0824             return $termInfo;
0825         }
0826 
0827 
0828         if ($this->_termDictionary === null) {
0829             $this->_loadDictionaryIndex();
0830         }
0831 
0832         $searchField = $this->getFieldNum($term->field);
0833 
0834         if ($searchField == -1) {
0835             return null;
0836         }
0837         $searchDicField = $this->_getFieldPosition($searchField);
0838 
0839         // search for appropriate value in dictionary
0840         $lowIndex = 0;
0841         $highIndex = count($this->_termDictionary)-1;
0842         while ($highIndex >= $lowIndex) {
0843             // $mid = ($highIndex - $lowIndex)/2;
0844             $mid = ($highIndex + $lowIndex) >> 1;
0845             $midTerm = $this->_termDictionary[$mid];
0846 
0847             $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
0848             $delta = $searchDicField - $fieldNum;
0849             if ($delta == 0) {
0850                 $delta = strcmp($term->text, $midTerm[1] /* text */);
0851             }
0852 
0853             if ($delta < 0) {
0854                 $highIndex = $mid-1;
0855             } elseif ($delta > 0) {
0856                 $lowIndex  = $mid+1;
0857             } else {
0858                 // return $this->_termDictionaryInfos[$mid]; // We got it!
0859                 $a = $this->_termDictionaryInfos[$mid];
0860                 $termInfo = new Zend_Search_Lucene_Index_TermInfo($a[0], $a[1], $a[2], $a[3], $a[4]);
0861 
0862                 // Put loaded termInfo into cache
0863                 $this->_termInfoCache[$termKey] = $termInfo;
0864 
0865                 return $termInfo;
0866             }
0867         }
0868 
0869         if ($highIndex == -1) {
0870             // Term is out of the dictionary range
0871             return null;
0872         }
0873 
0874         $prevPosition = $highIndex;
0875         $prevTerm = $this->_termDictionary[$prevPosition];
0876         $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
0877 
0878         $tisFile = $this->openCompoundFile('.tis');
0879         $tiVersion = $tisFile->readInt();
0880         if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */  &&
0881             $tiVersion != (int)0xFFFFFFFD /* 2.1+ format    */) {
0882             // require_once 'Zend/Search/Lucene/Exception.php';
0883             throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
0884         }
0885 
0886         $termCount     = $tisFile->readLong();
0887         $indexInterval = $tisFile->readInt();
0888         $skipInterval  = $tisFile->readInt();
0889         if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
0890             $maxSkipLevels = $tisFile->readInt();
0891         }
0892 
0893         $tisFile->seek($prevTermInfo[4] /* indexPointer */ - (($tiVersion == (int)0xFFFFFFFD)? 24 : 20) /* header size*/, SEEK_CUR);
0894 
0895         $termValue    = $prevTerm[1] /* text */;
0896         $termFieldNum = $prevTerm[0] /* field */;
0897         $freqPointer = $prevTermInfo[1] /* freqPointer */;
0898         $proxPointer = $prevTermInfo[2] /* proxPointer */;
0899         for ($count = $prevPosition*$indexInterval + 1;
0900              $count <= $termCount &&
0901              ( $this->_getFieldPosition($termFieldNum) < $searchDicField ||
0902               ($this->_getFieldPosition($termFieldNum) == $searchDicField &&
0903                strcmp($termValue, $term->text) < 0) );
0904              $count++) {
0905             $termPrefixLength = $tisFile->readVInt();
0906             $termSuffix       = $tisFile->readString();
0907             $termFieldNum     = $tisFile->readVInt();
0908             $termValue        = Zend_Search_Lucene_Index_Term::getPrefix($termValue, $termPrefixLength) . $termSuffix;
0909 
0910             $docFreq      = $tisFile->readVInt();
0911             $freqPointer += $tisFile->readVInt();
0912             $proxPointer += $tisFile->readVInt();
0913             if( $docFreq >= $skipInterval ) {
0914                 $skipOffset = $tisFile->readVInt();
0915             } else {
0916                 $skipOffset = 0;
0917             }
0918         }
0919 
0920         if ($termFieldNum == $searchField && $termValue == $term->text) {
0921             $termInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
0922         } else {
0923             $termInfo = null;
0924         }
0925 
0926         // Put loaded termInfo into cache
0927         $this->_termInfoCache[$termKey] = $termInfo;
0928 
0929         if (count($this->_termInfoCache) == 1024) {
0930             $this->_cleanUpTermInfoCache();
0931         }
0932 
0933         return $termInfo;
0934     }
0935 
0936     /**
0937      * Returns IDs of all the documents containing term.
0938      *
0939      * @param Zend_Search_Lucene_Index_Term $term
0940      * @param integer $shift
0941      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
0942      * @return array
0943      */
0944     public function termDocs(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
0945     {
0946         $termInfo = $this->getTermInfo($term);
0947 
0948         if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
0949             if ($docsFilter !== null  &&  $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
0950                 $docsFilter->segmentFilters[$this->_name] = array();
0951             }
0952             return array();
0953         }
0954 
0955         $frqFile = $this->openCompoundFile('.frq');
0956         $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
0957         $docId  = 0;
0958         $result = array();
0959 
0960         if ($docsFilter !== null) {
0961             if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
0962                 // require_once 'Zend/Search/Lucene/Exception.php';
0963                 throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
0964             }
0965 
0966             if (isset($docsFilter->segmentFilters[$this->_name])) {
0967                 // Filter already has some data for the current segment
0968 
0969                 // Make short name for the filter (which doesn't need additional dereferencing)
0970                 $filter = &$docsFilter->segmentFilters[$this->_name];
0971 
0972                 // Check if filter is not empty
0973                 if (count($filter) == 0) {
0974                     return array();
0975                 }
0976 
0977                 if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
0978                     // Perform fetching
0979 // ---------------------------------------------------------------
0980                     $updatedFilterData = array();
0981 
0982                     for( $count=0; $count < $termInfo->docFreq; $count++ ) {
0983                         $docDelta = $frqFile->readVInt();
0984                         if( $docDelta % 2 == 1 ) {
0985                             $docId += ($docDelta-1)/2;
0986                         } else {
0987                             $docId += $docDelta/2;
0988                             // read freq
0989                             $frqFile->readVInt();
0990                         }
0991 
0992                         if (isset($filter[$docId])) {
0993                            $result[] = $shift + $docId;
0994                            $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
0995                         }
0996                     }
0997                     $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
0998 // ---------------------------------------------------------------
0999                 } else {
1000                     // Perform full scan
1001                     $updatedFilterData = array();
1002 
1003                     for( $count=0; $count < $termInfo->docFreq; $count++ ) {
1004                         $docDelta = $frqFile->readVInt();
1005                         if( $docDelta % 2 == 1 ) {
1006                             $docId += ($docDelta-1)/2;
1007                         } else {
1008                             $docId += $docDelta/2;
1009                             // read freq
1010                             $frqFile->readVInt();
1011                         }
1012 
1013                         if (isset($filter[$docId])) {
1014                            $result[] = $shift + $docId;
1015                            $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1016                         }
1017                     }
1018                     $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1019                 }
1020             } else {
1021                 // Filter is present, but doesn't has data for the current segment yet
1022                 $filterData = array();
1023                 for( $count=0; $count < $termInfo->docFreq; $count++ ) {
1024                     $docDelta = $frqFile->readVInt();
1025                     if( $docDelta % 2 == 1 ) {
1026                         $docId += ($docDelta-1)/2;
1027                     } else {
1028                         $docId += $docDelta/2;
1029                         // read freq
1030                         $frqFile->readVInt();
1031                     }
1032 
1033                     $result[] = $shift + $docId;
1034                     $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1035                 }
1036                 $docsFilter->segmentFilters[$this->_name] = $filterData;
1037             }
1038         } else {
1039             for( $count=0; $count < $termInfo->docFreq; $count++ ) {
1040                 $docDelta = $frqFile->readVInt();
1041                 if( $docDelta % 2 == 1 ) {
1042                     $docId += ($docDelta-1)/2;
1043                 } else {
1044                     $docId += $docDelta/2;
1045                     // read freq
1046                     $frqFile->readVInt();
1047                 }
1048 
1049                 $result[] = $shift + $docId;
1050             }
1051         }
1052 
1053         return $result;
1054     }
1055 
1056     /**
1057      * Returns term freqs array.
1058      * Result array structure: array(docId => freq, ...)
1059      *
1060      * @param Zend_Search_Lucene_Index_Term $term
1061      * @param integer $shift
1062      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
1063      * @return Zend_Search_Lucene_Index_TermInfo
1064      */
1065     public function termFreqs(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
1066     {
1067         $termInfo = $this->getTermInfo($term);
1068 
1069         if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
1070             if ($docsFilter !== null  &&  $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
1071                 $docsFilter->segmentFilters[$this->_name] = array();
1072             }
1073             return array();
1074         }
1075 
1076         $frqFile = $this->openCompoundFile('.frq');
1077         $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
1078         $result = array();
1079         $docId = 0;
1080 
1081         $result = array();
1082 
1083         if ($docsFilter !== null) {
1084             if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
1085                 // require_once 'Zend/Search/Lucene/Exception.php';
1086                 throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
1087             }
1088 
1089             if (isset($docsFilter->segmentFilters[$this->_name])) {
1090                 // Filter already has some data for the current segment
1091 
1092                 // Make short name for the filter (which doesn't need additional dereferencing)
1093                 $filter = &$docsFilter->segmentFilters[$this->_name];
1094 
1095                 // Check if filter is not empty
1096                 if (count($filter) == 0) {
1097                     return array();
1098                 }
1099 
1100 
1101                 if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
1102                     // Perform fetching
1103 // ---------------------------------------------------------------
1104                     $updatedFilterData = array();
1105 
1106                     for ($count = 0; $count < $termInfo->docFreq; $count++) {
1107                         $docDelta = $frqFile->readVInt();
1108                         if ($docDelta % 2 == 1) {
1109                             $docId += ($docDelta-1)/2;
1110                             if (isset($filter[$docId])) {
1111                                 $result[$shift + $docId] = 1;
1112                                 $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1113                             }
1114                         } else {
1115                             $docId += $docDelta/2;
1116                             $freq = $frqFile->readVInt();
1117                             if (isset($filter[$docId])) {
1118                                 $result[$shift + $docId] = $freq;
1119                                 $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1120                             }
1121                         }
1122                     }
1123                     $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1124 // ---------------------------------------------------------------
1125                 } else {
1126                     // Perform full scan
1127                     $updatedFilterData = array();
1128 
1129                     for ($count = 0; $count < $termInfo->docFreq; $count++) {
1130                         $docDelta = $frqFile->readVInt();
1131                         if ($docDelta % 2 == 1) {
1132                             $docId += ($docDelta-1)/2;
1133                             if (isset($filter[$docId])) {
1134                                 $result[$shift + $docId] = 1;
1135                                 $updatedFilterData[$docId] = 1; // 1 is just some constant value, so we don't need additional var dereference here
1136                             }
1137                         } else {
1138                             $docId += $docDelta/2;
1139                             $freq = $frqFile->readVInt();
1140                             if (isset($filter[$docId])) {
1141                                 $result[$shift + $docId] = $freq;
1142                                 $updatedFilterData[$docId] = 1; // 1 is just some constant value, so we don't need additional var dereference here
1143                             }
1144                         }
1145                     }
1146                     $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1147                 }
1148             } else {
1149                 // Filter doesn't has data for current segment
1150                 $filterData = array();
1151 
1152                 for ($count = 0; $count < $termInfo->docFreq; $count++) {
1153                     $docDelta = $frqFile->readVInt();
1154                     if ($docDelta % 2 == 1) {
1155                         $docId += ($docDelta-1)/2;
1156                         $result[$shift + $docId] = 1;
1157                         $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1158                     } else {
1159                         $docId += $docDelta/2;
1160                         $result[$shift + $docId] = $frqFile->readVInt();
1161                         $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1162                     }
1163                 }
1164 
1165                 $docsFilter->segmentFilters[$this->_name] = $filterData;
1166             }
1167         } else {
1168             for ($count = 0; $count < $termInfo->docFreq; $count++) {
1169                 $docDelta = $frqFile->readVInt();
1170                 if ($docDelta % 2 == 1) {
1171                     $docId += ($docDelta-1)/2;
1172                     $result[$shift + $docId] = 1;
1173                 } else {
1174                     $docId += $docDelta/2;
1175                     $result[$shift + $docId] = $frqFile->readVInt();
1176                 }
1177             }
1178         }
1179 
1180         return $result;
1181     }
1182 
1183     /**
1184      * Returns term positions array.
1185      * Result array structure: array(docId => array(pos1, pos2, ...), ...)
1186      *
1187      * @param Zend_Search_Lucene_Index_Term $term
1188      * @param integer $shift
1189      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
1190      * @return Zend_Search_Lucene_Index_TermInfo
1191      */
1192     public function termPositions(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
1193     {
1194         $termInfo = $this->getTermInfo($term);
1195 
1196         if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
1197             if ($docsFilter !== null  &&  $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
1198                 $docsFilter->segmentFilters[$this->_name] = array();
1199             }
1200             return array();
1201         }
1202 
1203         $frqFile = $this->openCompoundFile('.frq');
1204         $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
1205 
1206         $docId = 0;
1207         $freqs = array();
1208 
1209 
1210         if ($docsFilter !== null) {
1211             if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
1212                 // require_once 'Zend/Search/Lucene/Exception.php';
1213                 throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
1214             }
1215 
1216             if (isset($docsFilter->segmentFilters[$this->_name])) {
1217                 // Filter already has some data for the current segment
1218 
1219                 // Make short name for the filter (which doesn't need additional dereferencing)
1220                 $filter = &$docsFilter->segmentFilters[$this->_name];
1221 
1222                 // Check if filter is not empty
1223                 if (count($filter) == 0) {
1224                     return array();
1225                 }
1226 
1227                 if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
1228                     // Perform fetching
1229 // ---------------------------------------------------------------
1230                     for ($count = 0; $count < $termInfo->docFreq; $count++) {
1231                         $docDelta = $frqFile->readVInt();
1232                         if ($docDelta % 2 == 1) {
1233                             $docId += ($docDelta-1)/2;
1234                             $freqs[$docId] = 1;
1235                         } else {
1236                             $docId += $docDelta/2;
1237                             $freqs[$docId] = $frqFile->readVInt();
1238                         }
1239                     }
1240 
1241                     $updatedFilterData = array();
1242                     $result = array();
1243                     $prxFile = $this->openCompoundFile('.prx');
1244                     $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
1245                     foreach ($freqs as $docId => $freq) {
1246                         $termPosition = 0;
1247                         $positions = array();
1248 
1249                         // we have to read .prx file to get right position for next doc
1250                         // even filter doesn't match current document
1251                         for ($count = 0; $count < $freq; $count++ ) {
1252                             $termPosition += $prxFile->readVInt();
1253                             $positions[] = $termPosition;
1254                         }
1255 
1256                         // Include into updated filter and into result only if doc is matched by filter
1257                         if (isset($filter[$docId])) {
1258                             $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1259                             $result[$shift + $docId] = $positions;
1260                         }
1261                     }
1262 
1263                     $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1264 // ---------------------------------------------------------------
1265                 } else {
1266                     // Perform full scan
1267                     for ($count = 0; $count < $termInfo->docFreq; $count++) {
1268                         $docDelta = $frqFile->readVInt();
1269                         if ($docDelta % 2 == 1) {
1270                             $docId += ($docDelta-1)/2;
1271                             $freqs[$docId] = 1;
1272                         } else {
1273                             $docId += $docDelta/2;
1274                             $freqs[$docId] = $frqFile->readVInt();
1275                         }
1276                     }
1277 
1278                     $updatedFilterData = array();
1279                     $result = array();
1280                     $prxFile = $this->openCompoundFile('.prx');
1281                     $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
1282                     foreach ($freqs as $docId => $freq) {
1283                         $termPosition = 0;
1284                         $positions = array();
1285 
1286                         // we have to read .prx file to get right position for next doc
1287                         // even filter doesn't match current document
1288                         for ($count = 0; $count < $freq; $count++ ) {
1289                             $termPosition += $prxFile->readVInt();
1290                             $positions[] = $termPosition;
1291                         }
1292 
1293                         // Include into updated filter and into result only if doc is matched by filter
1294                         if (isset($filter[$docId])) {
1295                             $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1296                             $result[$shift + $docId] = $positions;
1297                         }
1298                     }
1299 
1300                     $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1301                 }
1302             } else {
1303                 // Filter doesn't has data for current segment
1304                 for ($count = 0; $count < $termInfo->docFreq; $count++) {
1305                     $docDelta = $frqFile->readVInt();
1306                     if ($docDelta % 2 == 1) {
1307                         $docId += ($docDelta-1)/2;
1308                         $freqs[$docId] = 1;
1309                     } else {
1310                         $docId += $docDelta/2;
1311                         $freqs[$docId] = $frqFile->readVInt();
1312                     }
1313                 }
1314 
1315                 $filterData = array();
1316                 $result = array();
1317                 $prxFile = $this->openCompoundFile('.prx');
1318                 $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
1319                 foreach ($freqs as $docId => $freq) {
1320                     $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1321 
1322                     $termPosition = 0;
1323                     $positions = array();
1324 
1325                     for ($count = 0; $count < $freq; $count++ ) {
1326                         $termPosition += $prxFile->readVInt();
1327                         $positions[] = $termPosition;
1328                     }
1329 
1330                     $result[$shift + $docId] = $positions;
1331                 }
1332 
1333                 $docsFilter->segmentFilters[$this->_name] = $filterData;
1334             }
1335         } else {
1336             for ($count = 0; $count < $termInfo->docFreq; $count++) {
1337                 $docDelta = $frqFile->readVInt();
1338                 if ($docDelta % 2 == 1) {
1339                     $docId += ($docDelta-1)/2;
1340                     $freqs[$docId] = 1;
1341                 } else {
1342                     $docId += $docDelta/2;
1343                     $freqs[$docId] = $frqFile->readVInt();
1344                 }
1345             }
1346 
1347             $result = array();
1348             $prxFile = $this->openCompoundFile('.prx');
1349             $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
1350             foreach ($freqs as $docId => $freq) {
1351                 $termPosition = 0;
1352                 $positions = array();
1353 
1354                 for ($count = 0; $count < $freq; $count++ ) {
1355                     $termPosition += $prxFile->readVInt();
1356                     $positions[] = $termPosition;
1357                 }
1358 
1359                 $result[$shift + $docId] = $positions;
1360             }
1361         }
1362 
1363         return $result;
1364     }
1365 
1366     /**
1367      * Load normalizatin factors from an index file
1368      *
1369      * @param integer $fieldNum
1370      * @throws Zend_Search_Lucene_Exception
1371      */
1372     private function _loadNorm($fieldNum)
1373     {
1374         if ($this->_hasSingleNormFile) {
1375             $normfFile = $this->openCompoundFile('.nrm');
1376 
1377             $header              = $normfFile->readBytes(3);
1378             $headerFormatVersion = $normfFile->readByte();
1379 
1380             if ($header != 'NRM'  ||  $headerFormatVersion != (int)0xFF) {
1381                 // require_once 'Zend/Search/Lucene/Exception.php';
1382                 throw new  Zend_Search_Lucene_Exception('Wrong norms file format.');
1383             }
1384 
1385             foreach ($this->_fields as $fNum => $fieldInfo) {
1386                 if ($fieldInfo->isIndexed) {
1387                     $this->_norms[$fNum] = $normfFile->readBytes($this->_docCount);
1388                 }
1389             }
1390         } else {
1391             $fFile = $this->openCompoundFile('.f' . $fieldNum);
1392             $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);
1393         }
1394     }
1395 
1396     /**
1397      * Returns normalization factor for specified documents
1398      *
1399      * @param integer $id
1400      * @param string $fieldName
1401      * @return float
1402      */
1403     public function norm($id, $fieldName)
1404     {
1405         $fieldNum = $this->getFieldNum($fieldName);
1406 
1407         if ( !($this->_fields[$fieldNum]->isIndexed) ) {
1408             return null;
1409         }
1410 
1411         if (!isset($this->_norms[$fieldNum])) {
1412             $this->_loadNorm($fieldNum);
1413         }
1414 
1415         return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum][$id]) );
1416     }
1417 
1418     /**
1419      * Returns norm vector, encoded in a byte string
1420      *
1421      * @param string $fieldName
1422      * @return string
1423      */
1424     public function normVector($fieldName)
1425     {
1426         $fieldNum = $this->getFieldNum($fieldName);
1427 
1428         if ($fieldNum == -1  ||  !($this->_fields[$fieldNum]->isIndexed)) {
1429             $similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
1430 
1431             return str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
1432                               $this->_docCount);
1433         }
1434 
1435         if (!isset($this->_norms[$fieldNum])) {
1436             $this->_loadNorm($fieldNum);
1437         }
1438 
1439         return $this->_norms[$fieldNum];
1440     }
1441 
1442 
1443     /**
1444      * Returns true if any documents have been deleted from this index segment.
1445      *
1446      * @return boolean
1447      */
1448     public function hasDeletions()
1449     {
1450         return $this->_deleted !== null;
1451     }
1452 
1453 
1454     /**
1455      * Returns true if segment has single norms file.
1456      *
1457      * @return boolean
1458      */
1459     public function hasSingleNormFile()
1460     {
1461         return $this->_hasSingleNormFile ? true : false;
1462     }
1463 
1464     /**
1465      * Returns true if segment is stored using compound segment file.
1466      *
1467      * @return boolean
1468      */
1469     public function isCompound()
1470     {
1471         return $this->_isCompound;
1472     }
1473 
1474     /**
1475      * Deletes a document from the index segment.
1476      * $id is an internal document id
1477      *
1478      * @param integer
1479      */
1480     public function delete($id)
1481     {
1482         $this->_deletedDirty = true;
1483 
1484         if (extension_loaded('bitset')) {
1485             if ($this->_deleted === null) {
1486                 $this->_deleted = bitset_empty($id);
1487             }
1488             bitset_incl($this->_deleted, $id);
1489         } else {
1490             if ($this->_deleted === null) {
1491                 $this->_deleted = array();
1492             }
1493 
1494             $this->_deleted[$id] = 1;
1495         }
1496     }
1497 
1498     /**
1499      * Checks, that document is deleted
1500      *
1501      * @param integer
1502      * @return boolean
1503      */
1504     public function isDeleted($id)
1505     {
1506         if ($this->_deleted === null) {
1507             return false;
1508         }
1509 
1510         if (extension_loaded('bitset')) {
1511             return bitset_in($this->_deleted, $id);
1512         } else {
1513             return isset($this->_deleted[$id]);
1514         }
1515     }
1516 
1517     /**
1518      * Detect latest delete generation
1519      *
1520      * Is actualy used from writeChanges() method or from the constructor if it's invoked from
1521      * Index writer. In both cases index write lock is already obtained, so we shouldn't care
1522      * about it
1523      *
1524      * @return integer
1525      */
1526     private function _detectLatestDelGen()
1527     {
1528         $delFileList = array();
1529         foreach ($this->_directory->fileList() as $file) {
1530             if ($file == $this->_name . '.del') {
1531                 // Matches <segment_name>.del file name
1532                 $delFileList[] = 0;
1533             } else if (preg_match('/^' . $this->_name . '_([a-zA-Z0-9]+)\.del$/i', $file, $matches)) {
1534                 // Matches <segment_name>_NNN.del file names
1535                 $delFileList[] = (int)base_convert($matches[1], 36, 10);
1536             }
1537         }
1538 
1539         if (count($delFileList) == 0) {
1540             // There is no deletions file for current segment in the directory
1541             // Set deletions file generation number to 1
1542             return -1;
1543         } else {
1544             // There are some deletions files for current segment in the directory
1545             // Set deletions file generation number to the highest nuber
1546             return max($delFileList);
1547         }
1548     }
1549 
1550     /**
1551      * Write changes if it's necessary.
1552      *
1553      * This method must be invoked only from the Writer _updateSegments() method,
1554      * so index Write lock has to be already obtained.
1555      *
1556      * @internal
1557      * @throws Zend_Search_Lucene_Exceptions
1558      */
1559     public function writeChanges()
1560     {
1561         // Get new generation number
1562         $latestDelGen = $this->_detectLatestDelGen();
1563 
1564         if (!$this->_deletedDirty) {
1565             // There was no deletions by current process
1566 
1567             if ($latestDelGen == $this->_delGen) {
1568                 // Delete file hasn't been updated by any concurrent process
1569                 return;
1570             } else if ($latestDelGen > $this->_delGen) {
1571                 // Delete file has been updated by some concurrent process
1572                 // Reload deletions file
1573                 $this->_delGen  = $latestDelGen;
1574                 $this->_deleted = $this->_loadDelFile();
1575 
1576                 return;
1577             } else {
1578                 // require_once 'Zend/Search/Lucene/Exception.php';
1579                 throw new Zend_Search_Lucene_Exception('Delete file processing workflow is corrupted for the segment \'' . $this->_name . '\'.');
1580             }
1581         }
1582 
1583         if ($latestDelGen > $this->_delGen) {
1584             // Merge current deletions with latest deletions file
1585             $this->_delGen = $latestDelGen;
1586 
1587             $latestDelete = $this->_loadDelFile();
1588 
1589             if (extension_loaded('bitset')) {
1590                 $this->_deleted = bitset_union($this->_deleted, $latestDelete);
1591             } else {
1592                 $this->_deleted += $latestDelete;
1593             }
1594         }
1595 
1596         if (extension_loaded('bitset')) {
1597             $delBytes = $this->_deleted;
1598             $bitCount = count(bitset_to_array($delBytes));
1599         } else {
1600             $byteCount = floor($this->_docCount/8)+1;
1601             $delBytes = str_repeat(chr(0), $byteCount);
1602             for ($count = 0; $count < $byteCount; $count++) {
1603                 $byte = 0;
1604                 for ($bit = 0; $bit < 8; $bit++) {
1605                     if (isset($this->_deleted[$count*8 + $bit])) {
1606                         $byte |= (1<<$bit);
1607                     }
1608                 }
1609                 $delBytes[$count] = chr($byte);
1610             }
1611             $bitCount = count($this->_deleted);
1612         }
1613 
1614         if ($this->_delGen == -1) {
1615             // Set delete file generation number to 1
1616             $this->_delGen = 1;
1617         } else {
1618             // Increase delete file generation number by 1
1619             $this->_delGen++;
1620         }
1621 
1622         $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
1623         $delFile->writeInt($this->_docCount);
1624         $delFile->writeInt($bitCount);
1625         $delFile->writeBytes($delBytes);
1626 
1627         $this->_deletedDirty = false;
1628     }
1629 
1630 
1631     /**
1632      * Term Dictionary File object for stream like terms reading
1633      *
1634      * @var Zend_Search_Lucene_Storage_File
1635      */
1636     private $_tisFile = null;
1637 
1638     /**
1639      * Actual offset of the .tis file data
1640      *
1641      * @var integer
1642      */
1643     private $_tisFileOffset;
1644 
1645     /**
1646      * Frequencies File object for stream like terms reading
1647      *
1648      * @var Zend_Search_Lucene_Storage_File
1649      */
1650     private $_frqFile = null;
1651 
1652     /**
1653      * Actual offset of the .frq file data
1654      *
1655      * @var integer
1656      */
1657     private $_frqFileOffset;
1658 
1659     /**
1660      * Positions File object for stream like terms reading
1661      *
1662      * @var Zend_Search_Lucene_Storage_File
1663      */
1664     private $_prxFile = null;
1665 
1666     /**
1667      * Actual offset of the .prx file in the compound file
1668      *
1669      * @var integer
1670      */
1671     private $_prxFileOffset;
1672 
1673 
1674     /**
1675      * Actual number of terms in term stream
1676      *
1677      * @var integer
1678      */
1679     private $_termCount = 0;
1680 
1681     /**
1682      * Overall number of terms in term stream
1683      *
1684      * @var integer
1685      */
1686     private $_termNum = 0;
1687 
1688     /**
1689      * Segment index interval
1690      *
1691      * @var integer
1692      */
1693     private $_indexInterval;
1694 
1695     /**
1696      * Segment skip interval
1697      *
1698      * @var integer
1699      */
1700     private $_skipInterval;
1701 
1702     /**
1703      * Last TermInfo in a terms stream
1704      *
1705      * @var Zend_Search_Lucene_Index_TermInfo
1706      */
1707     private $_lastTermInfo = null;
1708 
1709     /**
1710      * Last Term in a terms stream
1711      *
1712      * @var Zend_Search_Lucene_Index_Term
1713      */
1714     private $_lastTerm = null;
1715 
1716     /**
1717      * Map of the document IDs
1718      * Used to get new docID after removing deleted documents.
1719      * It's not very effective from memory usage point of view,
1720      * but much more faster, then other methods
1721      *
1722      * @var array|null
1723      */
1724     private $_docMap = null;
1725 
1726     /**
1727      * An array of all term positions in the documents.
1728      * Array structure: array( docId => array( pos1, pos2, ...), ...)
1729      *
1730      * Is set to null if term positions loading has to be skipped
1731      *
1732      * @var array|null
1733      */
1734     private $_lastTermPositions;
1735 
1736 
1737     /**
1738      * Terms scan mode
1739      *
1740      * Values:
1741      *
1742      * self::SM_TERMS_ONLY - terms are scanned, no additional info is retrieved
1743      * self::SM_FULL_INFO  - terms are scanned, frequency and position info is retrieved
1744      * self::SM_MERGE_INFO - terms are scanned, frequency and position info is retrieved
1745      *                       document numbers are compacted (shifted if segment has deleted documents)
1746      *
1747      * @var integer
1748      */
1749     private $_termsScanMode;
1750 
1751     /** Scan modes */
1752     const SM_TERMS_ONLY = 0;    // terms are scanned, no additional info is retrieved
1753     const SM_FULL_INFO  = 1;    // terms are scanned, frequency and position info is retrieved
1754     const SM_MERGE_INFO = 2;    // terms are scanned, frequency and position info is retrieved
1755                                 // document numbers are compacted (shifted if segment contains deleted documents)
1756 
1757     /**
1758      * Reset terms stream
1759      *
1760      * $startId - id for the fist document
1761      * $compact - remove deleted documents
1762      *
1763      * Returns start document id for the next segment
1764      *
1765      * @param integer $startId
1766      * @param integer $mode
1767      * @throws Zend_Search_Lucene_Exception
1768      * @return integer
1769      */
1770     public function resetTermsStream(/** $startId = 0, $mode = self::SM_TERMS_ONLY */)
1771     {
1772         /**
1773          * SegmentInfo->resetTermsStream() method actually takes two optional parameters:
1774          *   $startId (default value is 0)
1775          *   $mode (default value is self::SM_TERMS_ONLY)
1776          */
1777         $argList = func_get_args();
1778         if (count($argList) > 2) {
1779             // require_once 'Zend/Search/Lucene/Exception.php';
1780             throw new Zend_Search_Lucene_Exception('Wrong number of arguments');
1781         } else if (count($argList) == 2) {
1782             $startId = $argList[0];
1783             $mode    = $argList[1];
1784         } else if (count($argList) == 1) {
1785             $startId = $argList[0];
1786             $mode    = self::SM_TERMS_ONLY;
1787         } else {
1788             $startId = 0;
1789             $mode    = self::SM_TERMS_ONLY;
1790         }
1791 
1792         if ($this->_tisFile !== null) {
1793             $this->_tisFile = null;
1794         }
1795 
1796         $this->_tisFile = $this->openCompoundFile('.tis', false);
1797         $this->_tisFileOffset = $this->_tisFile->tell();
1798 
1799         $tiVersion = $this->_tisFile->readInt();
1800         if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */  &&
1801             $tiVersion != (int)0xFFFFFFFD /* 2.1+ format    */) {
1802             // require_once 'Zend/Search/Lucene/Exception.php';
1803             throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
1804         }
1805 
1806         $this->_termCount     =
1807               $this->_termNum = $this->_tisFile->readLong(); // Read terms count
1808         $this->_indexInterval = $this->_tisFile->readInt();  // Read Index interval
1809         $this->_skipInterval  = $this->_tisFile->readInt();  // Read skip interval
1810         if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
1811             $maxSkipLevels = $this->_tisFile->readInt();
1812         }
1813 
1814         if ($this->_frqFile !== null) {
1815             $this->_frqFile = null;
1816         }
1817         if ($this->_prxFile !== null) {
1818             $this->_prxFile = null;
1819         }
1820         $this->_docMap = array();
1821 
1822         $this->_lastTerm          = new Zend_Search_Lucene_Index_Term('', -1);
1823         $this->_lastTermInfo      = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0);
1824         $this->_lastTermPositions = null;
1825 
1826         $this->_termsScanMode = $mode;
1827 
1828         switch ($mode) {
1829             case self::SM_TERMS_ONLY:
1830                 // Do nothing
1831                 break;
1832 
1833             case self::SM_FULL_INFO:
1834                 // break intentionally omitted
1835             case self::SM_MERGE_INFO:
1836                 $this->_frqFile = $this->openCompoundFile('.frq', false);
1837                 $this->_frqFileOffset = $this->_frqFile->tell();
1838 
1839                 $this->_prxFile = $this->openCompoundFile('.prx', false);
1840                 $this->_prxFileOffset = $this->_prxFile->tell();
1841 
1842                 for ($count = 0; $count < $this->_docCount; $count++) {
1843                     if (!$this->isDeleted($count)) {
1844                         $this->_docMap[$count] = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $count);
1845                     }
1846                 }
1847                 break;
1848 
1849             default:
1850                 // require_once 'Zend/Search/Lucene/Exception.php';
1851                 throw new Zend_Search_Lucene_Exception('Wrong terms scaning mode specified.');
1852                 break;
1853         }
1854 
1855         // Calculate next segment start id (since $this->_docMap structure may be cleaned by $this->nextTerm() call)
1856         $nextSegmentStartId = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $this->_docCount);
1857         $this->nextTerm();
1858 
1859         return $nextSegmentStartId;
1860     }
1861 
1862 
1863     /**
1864      * Skip terms stream up to the specified term preffix.
1865      *
1866      * Prefix contains fully specified field info and portion of searched term
1867      *
1868      * @param Zend_Search_Lucene_Index_Term $prefix
1869      * @throws Zend_Search_Lucene_Exception
1870      */
1871     public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
1872     {
1873         if ($this->_termDictionary === null) {
1874             $this->_loadDictionaryIndex();
1875         }
1876 
1877         $searchField = $this->getFieldNum($prefix->field);
1878 
1879         if ($searchField == -1) {
1880             /**
1881              * Field is not presented in this segment
1882              * Go to the end of dictionary
1883              */
1884             $this->_tisFile = null;
1885             $this->_frqFile = null;
1886             $this->_prxFile = null;
1887 
1888             $this->_lastTerm          = null;
1889             $this->_lastTermInfo      = null;
1890             $this->_lastTermPositions = null;
1891 
1892             return;
1893         }
1894         $searchDicField = $this->_getFieldPosition($searchField);
1895 
1896         // search for appropriate value in dictionary
1897         $lowIndex = 0;
1898         $highIndex = count($this->_termDictionary)-1;
1899         while ($highIndex >= $lowIndex) {
1900             // $mid = ($highIndex - $lowIndex)/2;
1901             $mid = ($highIndex + $lowIndex) >> 1;
1902             $midTerm = $this->_termDictionary[$mid];
1903 
1904             $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
1905             $delta = $searchDicField - $fieldNum;
1906             if ($delta == 0) {
1907                 $delta = strcmp($prefix->text, $midTerm[1] /* text */);
1908             }
1909 
1910             if ($delta < 0) {
1911                 $highIndex = $mid-1;
1912             } elseif ($delta > 0) {
1913                 $lowIndex  = $mid+1;
1914             } else {
1915                 // We have reached term we are looking for
1916                 break;
1917             }
1918         }
1919 
1920         if ($highIndex == -1) {
1921             // Term is out of the dictionary range
1922             $this->_tisFile = null;
1923             $this->_frqFile = null;
1924             $this->_prxFile = null;
1925 
1926             $this->_lastTerm          = null;
1927             $this->_lastTermInfo      = null;
1928             $this->_lastTermPositions = null;
1929 
1930             return;
1931         }
1932 
1933         $prevPosition = $highIndex;
1934         $prevTerm = $this->_termDictionary[$prevPosition];
1935         $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
1936 
1937         if ($this->_tisFile === null) {
1938             // The end of terms stream is reached and terms dictionary file is closed
1939             // Perform mini-reset operation
1940             $this->_tisFile = $this->openCompoundFile('.tis', false);
1941 
1942             if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
1943                 $this->_frqFile = $this->openCompoundFile('.frq', false);
1944                 $this->_prxFile = $this->openCompoundFile('.prx', false);
1945             }
1946         }
1947         $this->_tisFile->seek($this->_tisFileOffset + $prevTermInfo[4], SEEK_SET);
1948 
1949         $this->_lastTerm     = new Zend_Search_Lucene_Index_Term($prevTerm[1] /* text */,
1950                                                                  ($prevTerm[0] == -1) ? '' : $this->_fields[$prevTerm[0] /* field */]->name);
1951         $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($prevTermInfo[0] /* docFreq */,
1952                                                                      $prevTermInfo[1] /* freqPointer */,
1953                                                                      $prevTermInfo[2] /* proxPointer */,
1954                                                                      $prevTermInfo[3] /* skipOffset */);
1955         $this->_termCount  =  $this->_termNum - $prevPosition*$this->_indexInterval;
1956 
1957         if ($highIndex == 0) {
1958             // skip start entry
1959             $this->nextTerm();
1960         } else if ($prefix->field == $this->_lastTerm->field  &&  $prefix->text  == $this->_lastTerm->text) {
1961             // We got exact match in the dictionary index
1962 
1963             if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
1964                 $this->_lastTermPositions = array();
1965 
1966                 $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
1967                 $freqs = array();   $docId = 0;
1968                 for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
1969                     $docDelta = $this->_frqFile->readVInt();
1970                     if( $docDelta % 2 == 1 ) {
1971                         $docId += ($docDelta-1)/2;
1972                         $freqs[ $docId ] = 1;
1973                     } else {
1974                         $docId += $docDelta/2;
1975                         $freqs[ $docId ] = $this->_frqFile->readVInt();
1976                     }
1977                 }
1978 
1979                 $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
1980                 foreach ($freqs as $docId => $freq) {
1981                     $termPosition = 0;  $positions = array();
1982 
1983                     for ($count = 0; $count < $freq; $count++ ) {
1984                         $termPosition += $this->_prxFile->readVInt();
1985                         $positions[] = $termPosition;
1986                     }
1987 
1988                     if (isset($this->_docMap[$docId])) {
1989                         $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
1990                     }
1991                 }
1992             }
1993 
1994             return;
1995         }
1996 
1997         // Search term matching specified prefix
1998         while ($this->_lastTerm !== null) {
1999             if ( strcmp($this->_lastTerm->field, $prefix->field) > 0  ||
2000                  ($prefix->field == $this->_lastTerm->field  &&  strcmp($this->_lastTerm->text, $prefix->text) >= 0) ) {
2001                     // Current term matches or greate than the pattern
2002                     return;
2003             }
2004 
2005             $this->nextTerm();
2006         }
2007     }
2008 
2009 
2010     /**
2011      * Scans terms dictionary and returns next term
2012      *
2013      * @return Zend_Search_Lucene_Index_Term|null
2014      */
2015     public function nextTerm()
2016     {
2017         if ($this->_tisFile === null  ||  $this->_termCount == 0) {
2018             $this->_lastTerm          = null;
2019             $this->_lastTermInfo      = null;
2020             $this->_lastTermPositions = null;
2021             $this->_docMap            = null;
2022 
2023             // may be necessary for "empty" segment
2024             $this->_tisFile = null;
2025             $this->_frqFile = null;
2026             $this->_prxFile = null;
2027 
2028             return null;
2029         }
2030 
2031         $termPrefixLength = $this->_tisFile->readVInt();
2032         $termSuffix       = $this->_tisFile->readString();
2033         $termFieldNum     = $this->_tisFile->readVInt();
2034         $termValue        = Zend_Search_Lucene_Index_Term::getPrefix($this->_lastTerm->text, $termPrefixLength) . $termSuffix;
2035 
2036         $this->_lastTerm = new Zend_Search_Lucene_Index_Term($termValue, $this->_fields[$termFieldNum]->name);
2037 
2038         $docFreq     = $this->_tisFile->readVInt();
2039         $freqPointer = $this->_lastTermInfo->freqPointer + $this->_tisFile->readVInt();
2040         $proxPointer = $this->_lastTermInfo->proxPointer + $this->_tisFile->readVInt();
2041         if ($docFreq >= $this->_skipInterval) {
2042             $skipOffset = $this->_tisFile->readVInt();
2043         } else {
2044             $skipOffset = 0;
2045         }
2046 
2047         $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
2048 
2049 
2050         if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
2051             $this->_lastTermPositions = array();
2052 
2053             $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
2054             $freqs = array();   $docId = 0;
2055             for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
2056                 $docDelta = $this->_frqFile->readVInt();
2057                 if( $docDelta % 2 == 1 ) {
2058                     $docId += ($docDelta-1)/2;
2059                     $freqs[ $docId ] = 1;
2060                 } else {
2061                     $docId += $docDelta/2;
2062                     $freqs[ $docId ] = $this->_frqFile->readVInt();
2063                 }
2064             }
2065 
2066             $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
2067             foreach ($freqs as $docId => $freq) {
2068                 $termPosition = 0;  $positions = array();
2069 
2070                 for ($count = 0; $count < $freq; $count++ ) {
2071                     $termPosition += $this->_prxFile->readVInt();
2072                     $positions[] = $termPosition;
2073                 }
2074 
2075                 if (isset($this->_docMap[$docId])) {
2076                     $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
2077                 }
2078             }
2079         }
2080 
2081         $this->_termCount--;
2082         if ($this->_termCount == 0) {
2083             $this->_tisFile = null;
2084             $this->_frqFile = null;
2085             $this->_prxFile = null;
2086         }
2087 
2088         return $this->_lastTerm;
2089     }
2090 
2091     /**
2092      * Close terms stream
2093      *
2094      * Should be used for resources clean up if stream is not read up to the end
2095      */
2096     public function closeTermsStream()
2097     {
2098         $this->_tisFile = null;
2099         $this->_frqFile = null;
2100         $this->_prxFile = null;
2101 
2102         $this->_lastTerm          = null;
2103         $this->_lastTermInfo      = null;
2104         $this->_lastTermPositions = null;
2105 
2106         $this->_docMap            = null;
2107     }
2108 
2109 
2110     /**
2111      * Returns term in current position
2112      *
2113      * @return Zend_Search_Lucene_Index_Term|null
2114      */
2115     public function currentTerm()
2116     {
2117         return $this->_lastTerm;
2118     }
2119 
2120 
2121     /**
2122      * Returns an array of all term positions in the documents.
2123      * Return array structure: array( docId => array( pos1, pos2, ...), ...)
2124      *
2125      * @return array
2126      */
2127     public function currentTermPositions()
2128     {
2129         return $this->_lastTermPositions;
2130     }
2131 }
2132