File indexing completed on 2025-01-19 05:21:27

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Search_Lucene
0017  * @subpackage Storage
0018  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0019  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0020  * @version    $Id$
0021  */
0022 
0023 /**
0024  * @category   Zend
0025  * @package    Zend_Search_Lucene
0026  * @subpackage Storage
0027  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0028  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0029  */
0030 abstract class Zend_Search_Lucene_Storage_File
0031 {
0032     /**
0033      * Reads $length number of bytes at the current position in the
0034      * file and advances the file pointer.
0035      *
0036      * @param integer $length
0037      * @return string
0038      */
0039     abstract protected function _fread($length=1);
0040 
0041 
0042     /**
0043      * Sets the file position indicator and advances the file pointer.
0044      * The new position, measured in bytes from the beginning of the file,
0045      * is obtained by adding offset to the position specified by whence,
0046      * whose values are defined as follows:
0047      * SEEK_SET - Set position equal to offset bytes.
0048      * SEEK_CUR - Set position to current location plus offset.
0049      * SEEK_END - Set position to end-of-file plus offset. (To move to
0050      * a position before the end-of-file, you need to pass a negative value
0051      * in offset.)
0052      * Upon success, returns 0; otherwise, returns -1
0053      *
0054      * @param integer $offset
0055      * @param integer $whence
0056      * @return integer
0057      */
0058     abstract public function seek($offset, $whence=SEEK_SET);
0059 
0060     /**
0061      * Get file position.
0062      *
0063      * @return integer
0064      */
0065     abstract public function tell();
0066 
0067     /**
0068      * Flush output.
0069      *
0070      * Returns true on success or false on failure.
0071      *
0072      * @return boolean
0073      */
0074     abstract public function flush();
0075 
0076     /**
0077      * Writes $length number of bytes (all, if $length===null) to the end
0078      * of the file.
0079      *
0080      * @param string $data
0081      * @param integer $length
0082      */
0083     abstract protected function _fwrite($data, $length=null);
0084 
0085     /**
0086      * Lock file
0087      *
0088      * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
0089      *
0090      * @param integer $lockType
0091      * @return boolean
0092      */
0093     abstract public function lock($lockType, $nonBlockinLock = false);
0094 
0095     /**
0096      * Unlock file
0097      */
0098     abstract public function unlock();
0099 
0100     /**
0101      * Reads a byte from the current position in the file
0102      * and advances the file pointer.
0103      *
0104      * @return integer
0105      */
0106     public function readByte()
0107     {
0108         return ord($this->_fread(1));
0109     }
0110 
0111     /**
0112      * Writes a byte to the end of the file.
0113      *
0114      * @param integer $byte
0115      */
0116     public function writeByte($byte)
0117     {
0118         return $this->_fwrite(chr($byte), 1);
0119     }
0120 
0121     /**
0122      * Read num bytes from the current position in the file
0123      * and advances the file pointer.
0124      *
0125      * @param integer $num
0126      * @return string
0127      */
0128     public function readBytes($num)
0129     {
0130         return $this->_fread($num);
0131     }
0132 
0133     /**
0134      * Writes num bytes of data (all, if $num===null) to the end
0135      * of the string.
0136      *
0137      * @param string $data
0138      * @param integer $num
0139      */
0140     public function writeBytes($data, $num=null)
0141     {
0142         $this->_fwrite($data, $num);
0143     }
0144 
0145 
0146     /**
0147      * Reads an integer from the current position in the file
0148      * and advances the file pointer.
0149      *
0150      * @return integer
0151      */
0152     public function readInt()
0153     {
0154         $str = $this->_fread(4);
0155 
0156         return  ord($str[0]) << 24 |
0157                 ord($str[1]) << 16 |
0158                 ord($str[2]) << 8  |
0159                 ord($str[3]);
0160     }
0161 
0162 
0163     /**
0164      * Writes an integer to the end of file.
0165      *
0166      * @param integer $value
0167      */
0168     public function writeInt($value)
0169     {
0170         settype($value, 'integer');
0171         $this->_fwrite( chr($value>>24 & 0xFF) .
0172                         chr($value>>16 & 0xFF) .
0173                         chr($value>>8  & 0xFF) .
0174                         chr($value     & 0xFF),   4  );
0175     }
0176 
0177 
0178     /**
0179      * Returns a long integer from the current position in the file
0180      * and advances the file pointer.
0181      *
0182      * @return integer|float
0183      * @throws Zend_Search_Lucene_Exception
0184      */
0185     public function readLong()
0186     {
0187         /**
0188          * Check, that we work in 64-bit mode.
0189          * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
0190          */
0191         if (PHP_INT_SIZE > 4) {
0192             $str = $this->_fread(8);
0193 
0194             return  ord($str[0]) << 56  |
0195                     ord($str[1]) << 48  |
0196                     ord($str[2]) << 40  |
0197                     ord($str[3]) << 32  |
0198                     ord($str[4]) << 24  |
0199                     ord($str[5]) << 16  |
0200                     ord($str[6]) << 8   |
0201                     ord($str[7]);
0202         } else {
0203             return $this->readLong32Bit();
0204         }
0205     }
0206 
0207     /**
0208      * Writes long integer to the end of file
0209      *
0210      * @param integer $value
0211      * @throws Zend_Search_Lucene_Exception
0212      */
0213     public function writeLong($value)
0214     {
0215         /**
0216          * Check, that we work in 64-bit mode.
0217          * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
0218          */
0219         if (PHP_INT_SIZE > 4) {
0220             settype($value, 'integer');
0221             $this->_fwrite( chr($value>>56 & 0xFF) .
0222                             chr($value>>48 & 0xFF) .
0223                             chr($value>>40 & 0xFF) .
0224                             chr($value>>32 & 0xFF) .
0225                             chr($value>>24 & 0xFF) .
0226                             chr($value>>16 & 0xFF) .
0227                             chr($value>>8  & 0xFF) .
0228                             chr($value     & 0xFF),   8  );
0229         } else {
0230             $this->writeLong32Bit($value);
0231         }
0232     }
0233 
0234 
0235     /**
0236      * Returns a long integer from the current position in the file,
0237      * advances the file pointer and return it as float (for 32-bit platforms).
0238      *
0239      * @return integer|float
0240      * @throws Zend_Search_Lucene_Exception
0241      */
0242     public function readLong32Bit()
0243     {
0244         $wordHigh = $this->readInt();
0245         $wordLow  = $this->readInt();
0246 
0247         if ($wordHigh & (int)0x80000000) {
0248             // It's a negative value since the highest bit is set
0249             if ($wordHigh == (int)0xFFFFFFFF  &&  ($wordLow & (int)0x80000000)) {
0250                 return $wordLow;
0251             } else {
0252                 // require_once 'Zend/Search/Lucene/Exception.php';
0253                 throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
0254             }
0255 
0256         }
0257 
0258         if ($wordLow < 0) {
0259             // Value is large than 0x7FFF FFFF. Represent low word as float.
0260             $wordLow &= 0x7FFFFFFF;
0261             $wordLow += (float)0x80000000;
0262         }
0263 
0264         if ($wordHigh == 0) {
0265             // Return value as integer if possible
0266             return $wordLow;
0267         }
0268 
0269         return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow;
0270     }
0271 
0272 
0273     /**
0274      * Writes long integer to the end of file (32-bit platforms implementation)
0275      *
0276      * @param integer|float $value
0277      * @throws Zend_Search_Lucene_Exception
0278      */
0279     public function writeLong32Bit($value)
0280     {
0281         if ($value < (int)0x80000000) {
0282             // require_once 'Zend/Search/Lucene/Exception.php';
0283             throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
0284         }
0285 
0286         if ($value < 0) {
0287             $wordHigh = (int)0xFFFFFFFF;
0288             $wordLow  = (int)$value;
0289         } else {
0290             $wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */);
0291             $wordLow  = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */;
0292 
0293             if ($wordLow > 0x7FFFFFFF) {
0294                 // Highest bit of low word is set. Translate it to the corresponding negative integer value
0295                 $wordLow -= 0x80000000;
0296                 $wordLow |= 0x80000000;
0297             }
0298         }
0299 
0300         $this->writeInt($wordHigh);
0301         $this->writeInt($wordLow);
0302     }
0303 
0304 
0305     /**
0306      * Returns a variable-length integer from the current
0307      * position in the file and advances the file pointer.
0308      *
0309      * @return integer
0310      */
0311     public function readVInt()
0312     {
0313         $nextByte = ord($this->_fread(1));
0314         $val = $nextByte & 0x7F;
0315 
0316         for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
0317             $nextByte = ord($this->_fread(1));
0318             $val |= ($nextByte & 0x7F) << $shift;
0319         }
0320         return $val;
0321     }
0322 
0323     /**
0324      * Writes a variable-length integer to the end of file.
0325      *
0326      * @param integer $value
0327      */
0328     public function writeVInt($value)
0329     {
0330         settype($value, 'integer');
0331         while ($value > 0x7F) {
0332             $this->_fwrite(chr( ($value & 0x7F)|0x80 ));
0333             $value >>= 7;
0334         }
0335         $this->_fwrite(chr($value));
0336     }
0337 
0338 
0339     /**
0340      * Reads a string from the current position in the file
0341      * and advances the file pointer.
0342      *
0343      * @return string
0344      */
0345     public function readString()
0346     {
0347         $strlen = $this->readVInt();
0348         if ($strlen == 0) {
0349             return '';
0350         } else {
0351             /**
0352              * This implementation supports only Basic Multilingual Plane
0353              * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
0354              * "supplementary characters" (characters whose code points are
0355              * greater than 0xFFFF)
0356              * Java 2 represents these characters as a pair of char (16-bit)
0357              * values, the first from the high-surrogates range (0xD800-0xDBFF),
0358              * the second from the low-surrogates range (0xDC00-0xDFFF). Then
0359              * they are encoded as usual UTF-8 characters in six bytes.
0360              * Standard UTF-8 representation uses four bytes for supplementary
0361              * characters.
0362              */
0363 
0364             $str_val = $this->_fread($strlen);
0365 
0366             for ($count = 0; $count < $strlen; $count++ ) {
0367                 if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) {
0368                     $addBytes = 1;
0369                     if (ord($str_val[$count]) & 0x20 ) {
0370                         $addBytes++;
0371 
0372                         // Never used. Java2 doesn't encode strings in four bytes
0373                         if (ord($str_val[$count]) & 0x10 ) {
0374                             $addBytes++;
0375                         }
0376                     }
0377                     $str_val .= $this->_fread($addBytes);
0378                     $strlen += $addBytes;
0379 
0380                     // Check for null character. Java2 encodes null character
0381                     // in two bytes.
0382                     if (ord($str_val[$count])   == 0xC0 &&
0383                         ord($str_val[$count+1]) == 0x80   ) {
0384                         $str_val[$count] = 0;
0385                         $str_val = substr($str_val,0,$count+1)
0386                                  . substr($str_val,$count+2);
0387                     }
0388                     $count += $addBytes;
0389                 }
0390             }
0391 
0392             return $str_val;
0393         }
0394     }
0395 
0396     /**
0397      * Writes a string to the end of file.
0398      *
0399      * @param string $str
0400      * @throws Zend_Search_Lucene_Exception
0401      */
0402     public function writeString($str)
0403     {
0404         /**
0405          * This implementation supports only Basic Multilingual Plane
0406          * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
0407          * "supplementary characters" (characters whose code points are
0408          * greater than 0xFFFF)
0409          * Java 2 represents these characters as a pair of char (16-bit)
0410          * values, the first from the high-surrogates range (0xD800-0xDBFF),
0411          * the second from the low-surrogates range (0xDC00-0xDFFF). Then
0412          * they are encoded as usual UTF-8 characters in six bytes.
0413          * Standard UTF-8 representation uses four bytes for supplementary
0414          * characters.
0415          */
0416 
0417         // convert input to a string before iterating string characters
0418         settype($str, 'string');
0419 
0420         $chars = $strlen = strlen($str);
0421         $containNullChars = false;
0422 
0423         for ($count = 0; $count < $strlen; $count++ ) {
0424             /**
0425              * String is already in Java 2 representation.
0426              * We should only calculate actual string length and replace
0427              * \x00 by \xC0\x80
0428              */
0429             if ((ord($str[$count]) & 0xC0) == 0xC0) {
0430                 $addBytes = 1;
0431                 if (ord($str[$count]) & 0x20 ) {
0432                     $addBytes++;
0433 
0434                     // Never used. Java2 doesn't encode strings in four bytes
0435                     // and we dont't support non-BMP characters
0436                     if (ord($str[$count]) & 0x10 ) {
0437                         $addBytes++;
0438                     }
0439                 }
0440                 $chars -= $addBytes;
0441 
0442                 if (ord($str[$count]) == 0 ) {
0443                     $containNullChars = true;
0444                 }
0445                 $count += $addBytes;
0446             }
0447         }
0448 
0449         if ($chars < 0) {
0450             // require_once 'Zend/Search/Lucene/Exception.php';
0451             throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
0452         }
0453 
0454         $this->writeVInt($chars);
0455         if ($containNullChars) {
0456             $this->_fwrite(str_replace($str, "\x00", "\xC0\x80"));
0457         } else {
0458             $this->_fwrite($str);
0459         }
0460     }
0461 
0462 
0463     /**
0464      * Reads binary data from the current position in the file
0465      * and advances the file pointer.
0466      *
0467      * @return string
0468      */
0469     public function readBinary()
0470     {
0471         return $this->_fread($this->readVInt());
0472     }
0473 }