File indexing completed on 2025-01-19 05:21:27
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Search_Lucene 0017 * @subpackage Storage 0018 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0019 * @license http://framework.zend.com/license/new-bsd New BSD License 0020 * @version $Id$ 0021 */ 0022 0023 /** 0024 * @category Zend 0025 * @package Zend_Search_Lucene 0026 * @subpackage Storage 0027 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0028 * @license http://framework.zend.com/license/new-bsd New BSD License 0029 */ 0030 abstract class Zend_Search_Lucene_Storage_File 0031 { 0032 /** 0033 * Reads $length number of bytes at the current position in the 0034 * file and advances the file pointer. 0035 * 0036 * @param integer $length 0037 * @return string 0038 */ 0039 abstract protected function _fread($length=1); 0040 0041 0042 /** 0043 * Sets the file position indicator and advances the file pointer. 0044 * The new position, measured in bytes from the beginning of the file, 0045 * is obtained by adding offset to the position specified by whence, 0046 * whose values are defined as follows: 0047 * SEEK_SET - Set position equal to offset bytes. 0048 * SEEK_CUR - Set position to current location plus offset. 0049 * SEEK_END - Set position to end-of-file plus offset. (To move to 0050 * a position before the end-of-file, you need to pass a negative value 0051 * in offset.) 0052 * Upon success, returns 0; otherwise, returns -1 0053 * 0054 * @param integer $offset 0055 * @param integer $whence 0056 * @return integer 0057 */ 0058 abstract public function seek($offset, $whence=SEEK_SET); 0059 0060 /** 0061 * Get file position. 0062 * 0063 * @return integer 0064 */ 0065 abstract public function tell(); 0066 0067 /** 0068 * Flush output. 0069 * 0070 * Returns true on success or false on failure. 0071 * 0072 * @return boolean 0073 */ 0074 abstract public function flush(); 0075 0076 /** 0077 * Writes $length number of bytes (all, if $length===null) to the end 0078 * of the file. 0079 * 0080 * @param string $data 0081 * @param integer $length 0082 */ 0083 abstract protected function _fwrite($data, $length=null); 0084 0085 /** 0086 * Lock file 0087 * 0088 * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock) 0089 * 0090 * @param integer $lockType 0091 * @return boolean 0092 */ 0093 abstract public function lock($lockType, $nonBlockinLock = false); 0094 0095 /** 0096 * Unlock file 0097 */ 0098 abstract public function unlock(); 0099 0100 /** 0101 * Reads a byte from the current position in the file 0102 * and advances the file pointer. 0103 * 0104 * @return integer 0105 */ 0106 public function readByte() 0107 { 0108 return ord($this->_fread(1)); 0109 } 0110 0111 /** 0112 * Writes a byte to the end of the file. 0113 * 0114 * @param integer $byte 0115 */ 0116 public function writeByte($byte) 0117 { 0118 return $this->_fwrite(chr($byte), 1); 0119 } 0120 0121 /** 0122 * Read num bytes from the current position in the file 0123 * and advances the file pointer. 0124 * 0125 * @param integer $num 0126 * @return string 0127 */ 0128 public function readBytes($num) 0129 { 0130 return $this->_fread($num); 0131 } 0132 0133 /** 0134 * Writes num bytes of data (all, if $num===null) to the end 0135 * of the string. 0136 * 0137 * @param string $data 0138 * @param integer $num 0139 */ 0140 public function writeBytes($data, $num=null) 0141 { 0142 $this->_fwrite($data, $num); 0143 } 0144 0145 0146 /** 0147 * Reads an integer from the current position in the file 0148 * and advances the file pointer. 0149 * 0150 * @return integer 0151 */ 0152 public function readInt() 0153 { 0154 $str = $this->_fread(4); 0155 0156 return ord($str[0]) << 24 | 0157 ord($str[1]) << 16 | 0158 ord($str[2]) << 8 | 0159 ord($str[3]); 0160 } 0161 0162 0163 /** 0164 * Writes an integer to the end of file. 0165 * 0166 * @param integer $value 0167 */ 0168 public function writeInt($value) 0169 { 0170 settype($value, 'integer'); 0171 $this->_fwrite( chr($value>>24 & 0xFF) . 0172 chr($value>>16 & 0xFF) . 0173 chr($value>>8 & 0xFF) . 0174 chr($value & 0xFF), 4 ); 0175 } 0176 0177 0178 /** 0179 * Returns a long integer from the current position in the file 0180 * and advances the file pointer. 0181 * 0182 * @return integer|float 0183 * @throws Zend_Search_Lucene_Exception 0184 */ 0185 public function readLong() 0186 { 0187 /** 0188 * Check, that we work in 64-bit mode. 0189 * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb 0190 */ 0191 if (PHP_INT_SIZE > 4) { 0192 $str = $this->_fread(8); 0193 0194 return ord($str[0]) << 56 | 0195 ord($str[1]) << 48 | 0196 ord($str[2]) << 40 | 0197 ord($str[3]) << 32 | 0198 ord($str[4]) << 24 | 0199 ord($str[5]) << 16 | 0200 ord($str[6]) << 8 | 0201 ord($str[7]); 0202 } else { 0203 return $this->readLong32Bit(); 0204 } 0205 } 0206 0207 /** 0208 * Writes long integer to the end of file 0209 * 0210 * @param integer $value 0211 * @throws Zend_Search_Lucene_Exception 0212 */ 0213 public function writeLong($value) 0214 { 0215 /** 0216 * Check, that we work in 64-bit mode. 0217 * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb 0218 */ 0219 if (PHP_INT_SIZE > 4) { 0220 settype($value, 'integer'); 0221 $this->_fwrite( chr($value>>56 & 0xFF) . 0222 chr($value>>48 & 0xFF) . 0223 chr($value>>40 & 0xFF) . 0224 chr($value>>32 & 0xFF) . 0225 chr($value>>24 & 0xFF) . 0226 chr($value>>16 & 0xFF) . 0227 chr($value>>8 & 0xFF) . 0228 chr($value & 0xFF), 8 ); 0229 } else { 0230 $this->writeLong32Bit($value); 0231 } 0232 } 0233 0234 0235 /** 0236 * Returns a long integer from the current position in the file, 0237 * advances the file pointer and return it as float (for 32-bit platforms). 0238 * 0239 * @return integer|float 0240 * @throws Zend_Search_Lucene_Exception 0241 */ 0242 public function readLong32Bit() 0243 { 0244 $wordHigh = $this->readInt(); 0245 $wordLow = $this->readInt(); 0246 0247 if ($wordHigh & (int)0x80000000) { 0248 // It's a negative value since the highest bit is set 0249 if ($wordHigh == (int)0xFFFFFFFF && ($wordLow & (int)0x80000000)) { 0250 return $wordLow; 0251 } else { 0252 // require_once 'Zend/Search/Lucene/Exception.php'; 0253 throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.'); 0254 } 0255 0256 } 0257 0258 if ($wordLow < 0) { 0259 // Value is large than 0x7FFF FFFF. Represent low word as float. 0260 $wordLow &= 0x7FFFFFFF; 0261 $wordLow += (float)0x80000000; 0262 } 0263 0264 if ($wordHigh == 0) { 0265 // Return value as integer if possible 0266 return $wordLow; 0267 } 0268 0269 return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow; 0270 } 0271 0272 0273 /** 0274 * Writes long integer to the end of file (32-bit platforms implementation) 0275 * 0276 * @param integer|float $value 0277 * @throws Zend_Search_Lucene_Exception 0278 */ 0279 public function writeLong32Bit($value) 0280 { 0281 if ($value < (int)0x80000000) { 0282 // require_once 'Zend/Search/Lucene/Exception.php'; 0283 throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.'); 0284 } 0285 0286 if ($value < 0) { 0287 $wordHigh = (int)0xFFFFFFFF; 0288 $wordLow = (int)$value; 0289 } else { 0290 $wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */); 0291 $wordLow = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */; 0292 0293 if ($wordLow > 0x7FFFFFFF) { 0294 // Highest bit of low word is set. Translate it to the corresponding negative integer value 0295 $wordLow -= 0x80000000; 0296 $wordLow |= 0x80000000; 0297 } 0298 } 0299 0300 $this->writeInt($wordHigh); 0301 $this->writeInt($wordLow); 0302 } 0303 0304 0305 /** 0306 * Returns a variable-length integer from the current 0307 * position in the file and advances the file pointer. 0308 * 0309 * @return integer 0310 */ 0311 public function readVInt() 0312 { 0313 $nextByte = ord($this->_fread(1)); 0314 $val = $nextByte & 0x7F; 0315 0316 for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) { 0317 $nextByte = ord($this->_fread(1)); 0318 $val |= ($nextByte & 0x7F) << $shift; 0319 } 0320 return $val; 0321 } 0322 0323 /** 0324 * Writes a variable-length integer to the end of file. 0325 * 0326 * @param integer $value 0327 */ 0328 public function writeVInt($value) 0329 { 0330 settype($value, 'integer'); 0331 while ($value > 0x7F) { 0332 $this->_fwrite(chr( ($value & 0x7F)|0x80 )); 0333 $value >>= 7; 0334 } 0335 $this->_fwrite(chr($value)); 0336 } 0337 0338 0339 /** 0340 * Reads a string from the current position in the file 0341 * and advances the file pointer. 0342 * 0343 * @return string 0344 */ 0345 public function readString() 0346 { 0347 $strlen = $this->readVInt(); 0348 if ($strlen == 0) { 0349 return ''; 0350 } else { 0351 /** 0352 * This implementation supports only Basic Multilingual Plane 0353 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support 0354 * "supplementary characters" (characters whose code points are 0355 * greater than 0xFFFF) 0356 * Java 2 represents these characters as a pair of char (16-bit) 0357 * values, the first from the high-surrogates range (0xD800-0xDBFF), 0358 * the second from the low-surrogates range (0xDC00-0xDFFF). Then 0359 * they are encoded as usual UTF-8 characters in six bytes. 0360 * Standard UTF-8 representation uses four bytes for supplementary 0361 * characters. 0362 */ 0363 0364 $str_val = $this->_fread($strlen); 0365 0366 for ($count = 0; $count < $strlen; $count++ ) { 0367 if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) { 0368 $addBytes = 1; 0369 if (ord($str_val[$count]) & 0x20 ) { 0370 $addBytes++; 0371 0372 // Never used. Java2 doesn't encode strings in four bytes 0373 if (ord($str_val[$count]) & 0x10 ) { 0374 $addBytes++; 0375 } 0376 } 0377 $str_val .= $this->_fread($addBytes); 0378 $strlen += $addBytes; 0379 0380 // Check for null character. Java2 encodes null character 0381 // in two bytes. 0382 if (ord($str_val[$count]) == 0xC0 && 0383 ord($str_val[$count+1]) == 0x80 ) { 0384 $str_val[$count] = 0; 0385 $str_val = substr($str_val,0,$count+1) 0386 . substr($str_val,$count+2); 0387 } 0388 $count += $addBytes; 0389 } 0390 } 0391 0392 return $str_val; 0393 } 0394 } 0395 0396 /** 0397 * Writes a string to the end of file. 0398 * 0399 * @param string $str 0400 * @throws Zend_Search_Lucene_Exception 0401 */ 0402 public function writeString($str) 0403 { 0404 /** 0405 * This implementation supports only Basic Multilingual Plane 0406 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support 0407 * "supplementary characters" (characters whose code points are 0408 * greater than 0xFFFF) 0409 * Java 2 represents these characters as a pair of char (16-bit) 0410 * values, the first from the high-surrogates range (0xD800-0xDBFF), 0411 * the second from the low-surrogates range (0xDC00-0xDFFF). Then 0412 * they are encoded as usual UTF-8 characters in six bytes. 0413 * Standard UTF-8 representation uses four bytes for supplementary 0414 * characters. 0415 */ 0416 0417 // convert input to a string before iterating string characters 0418 settype($str, 'string'); 0419 0420 $chars = $strlen = strlen($str); 0421 $containNullChars = false; 0422 0423 for ($count = 0; $count < $strlen; $count++ ) { 0424 /** 0425 * String is already in Java 2 representation. 0426 * We should only calculate actual string length and replace 0427 * \x00 by \xC0\x80 0428 */ 0429 if ((ord($str[$count]) & 0xC0) == 0xC0) { 0430 $addBytes = 1; 0431 if (ord($str[$count]) & 0x20 ) { 0432 $addBytes++; 0433 0434 // Never used. Java2 doesn't encode strings in four bytes 0435 // and we dont't support non-BMP characters 0436 if (ord($str[$count]) & 0x10 ) { 0437 $addBytes++; 0438 } 0439 } 0440 $chars -= $addBytes; 0441 0442 if (ord($str[$count]) == 0 ) { 0443 $containNullChars = true; 0444 } 0445 $count += $addBytes; 0446 } 0447 } 0448 0449 if ($chars < 0) { 0450 // require_once 'Zend/Search/Lucene/Exception.php'; 0451 throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string'); 0452 } 0453 0454 $this->writeVInt($chars); 0455 if ($containNullChars) { 0456 $this->_fwrite(str_replace($str, "\x00", "\xC0\x80")); 0457 } else { 0458 $this->_fwrite($str); 0459 } 0460 } 0461 0462 0463 /** 0464 * Reads binary data from the current position in the file 0465 * and advances the file pointer. 0466 * 0467 * @return string 0468 */ 0469 public function readBinary() 0470 { 0471 return $this->_fread($this->readVInt()); 0472 } 0473 }