File indexing completed on 2025-01-19 05:21:27
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Search_Lucene 0017 * @subpackage Storage 0018 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0019 * @license http://framework.zend.com/license/new-bsd New BSD License 0020 * @version $Id$ 0021 */ 0022 0023 /** Zend_Search_Lucene_Storage_File */ 0024 // require_once 'Zend/Search/Lucene/Storage/File.php'; 0025 0026 /** 0027 * @category Zend 0028 * @package Zend_Search_Lucene 0029 * @subpackage Storage 0030 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0031 * @license http://framework.zend.com/license/new-bsd New BSD License 0032 */ 0033 class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_File 0034 { 0035 /** 0036 * FileData 0037 * 0038 * @var string 0039 */ 0040 private $_data; 0041 0042 /** 0043 * File Position 0044 * 0045 * @var integer 0046 */ 0047 private $_position = 0; 0048 0049 0050 /** 0051 * Object constractor 0052 * 0053 * @param string $data 0054 */ 0055 public function __construct($data) 0056 { 0057 $this->_data = $data; 0058 } 0059 0060 /** 0061 * Reads $length number of bytes at the current position in the 0062 * file and advances the file pointer. 0063 * 0064 * @param integer $length 0065 * @return string 0066 */ 0067 protected function _fread($length = 1) 0068 { 0069 $returnValue = substr($this->_data, $this->_position, $length); 0070 $this->_position += $length; 0071 return $returnValue; 0072 } 0073 0074 0075 /** 0076 * Sets the file position indicator and advances the file pointer. 0077 * The new position, measured in bytes from the beginning of the file, 0078 * is obtained by adding offset to the position specified by whence, 0079 * whose values are defined as follows: 0080 * SEEK_SET - Set position equal to offset bytes. 0081 * SEEK_CUR - Set position to current location plus offset. 0082 * SEEK_END - Set position to end-of-file plus offset. (To move to 0083 * a position before the end-of-file, you need to pass a negative value 0084 * in offset.) 0085 * Upon success, returns 0; otherwise, returns -1 0086 * 0087 * @param integer $offset 0088 * @param integer $whence 0089 * @return integer 0090 */ 0091 public function seek($offset, $whence=SEEK_SET) 0092 { 0093 switch ($whence) { 0094 case SEEK_SET: 0095 $this->_position = $offset; 0096 break; 0097 0098 case SEEK_CUR: 0099 $this->_position += $offset; 0100 break; 0101 0102 case SEEK_END: 0103 $this->_position = strlen($this->_data); 0104 $this->_position += $offset; 0105 break; 0106 0107 default: 0108 break; 0109 } 0110 } 0111 0112 /** 0113 * Get file position. 0114 * 0115 * @return integer 0116 */ 0117 public function tell() 0118 { 0119 return $this->_position; 0120 } 0121 0122 /** 0123 * Flush output. 0124 * 0125 * Returns true on success or false on failure. 0126 * 0127 * @return boolean 0128 */ 0129 public function flush() 0130 { 0131 // Do nothing 0132 0133 return true; 0134 } 0135 0136 /** 0137 * Writes $length number of bytes (all, if $length===null) to the end 0138 * of the file. 0139 * 0140 * @param string $data 0141 * @param integer $length 0142 */ 0143 protected function _fwrite($data, $length=null) 0144 { 0145 // We do not need to check if file position points to the end of "file". 0146 // Only append operation is supported now 0147 0148 if ($length !== null) { 0149 $this->_data .= substr($data, 0, $length); 0150 } else { 0151 $this->_data .= $data; 0152 } 0153 0154 $this->_position = strlen($this->_data); 0155 } 0156 0157 /** 0158 * Lock file 0159 * 0160 * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock) 0161 * 0162 * @param integer $lockType 0163 * @return boolean 0164 */ 0165 public function lock($lockType, $nonBlockinLock = false) 0166 { 0167 // Memory files can't be shared 0168 // do nothing 0169 0170 return true; 0171 } 0172 0173 /** 0174 * Unlock file 0175 */ 0176 public function unlock() 0177 { 0178 // Memory files can't be shared 0179 // do nothing 0180 } 0181 0182 /** 0183 * Reads a byte from the current position in the file 0184 * and advances the file pointer. 0185 * 0186 * @return integer 0187 */ 0188 public function readByte() 0189 { 0190 return ord($this->_data[$this->_position++]); 0191 } 0192 0193 /** 0194 * Writes a byte to the end of the file. 0195 * 0196 * @param integer $byte 0197 */ 0198 public function writeByte($byte) 0199 { 0200 // We do not need to check if file position points to the end of "file". 0201 // Only append operation is supported now 0202 0203 $this->_data .= chr($byte); 0204 $this->_position = strlen($this->_data); 0205 0206 return 1; 0207 } 0208 0209 /** 0210 * Read num bytes from the current position in the file 0211 * and advances the file pointer. 0212 * 0213 * @param integer $num 0214 * @return string 0215 */ 0216 public function readBytes($num) 0217 { 0218 $returnValue = substr($this->_data, $this->_position, $num); 0219 $this->_position += $num; 0220 0221 return $returnValue; 0222 } 0223 0224 /** 0225 * Writes num bytes of data (all, if $num===null) to the end 0226 * of the string. 0227 * 0228 * @param string $data 0229 * @param integer $num 0230 */ 0231 public function writeBytes($data, $num=null) 0232 { 0233 // We do not need to check if file position points to the end of "file". 0234 // Only append operation is supported now 0235 0236 if ($num !== null) { 0237 $this->_data .= substr($data, 0, $num); 0238 } else { 0239 $this->_data .= $data; 0240 } 0241 0242 $this->_position = strlen($this->_data); 0243 } 0244 0245 0246 /** 0247 * Reads an integer from the current position in the file 0248 * and advances the file pointer. 0249 * 0250 * @return integer 0251 */ 0252 public function readInt() 0253 { 0254 $str = substr($this->_data, $this->_position, 4); 0255 $this->_position += 4; 0256 0257 return ord($str[0]) << 24 | 0258 ord($str[1]) << 16 | 0259 ord($str[2]) << 8 | 0260 ord($str[3]); 0261 } 0262 0263 0264 /** 0265 * Writes an integer to the end of file. 0266 * 0267 * @param integer $value 0268 */ 0269 public function writeInt($value) 0270 { 0271 // We do not need to check if file position points to the end of "file". 0272 // Only append operation is supported now 0273 0274 settype($value, 'integer'); 0275 $this->_data .= chr($value>>24 & 0xFF) . 0276 chr($value>>16 & 0xFF) . 0277 chr($value>>8 & 0xFF) . 0278 chr($value & 0xFF); 0279 0280 $this->_position = strlen($this->_data); 0281 } 0282 0283 0284 /** 0285 * Returns a long integer from the current position in the file 0286 * and advances the file pointer. 0287 * 0288 * @return integer 0289 * @throws Zend_Search_Lucene_Exception 0290 */ 0291 public function readLong() 0292 { 0293 /** 0294 * Check, that we work in 64-bit mode. 0295 * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb 0296 */ 0297 if (PHP_INT_SIZE > 4) { 0298 $str = substr($this->_data, $this->_position, 8); 0299 $this->_position += 8; 0300 0301 return ord($str[0]) << 56 | 0302 ord($str[1]) << 48 | 0303 ord($str[2]) << 40 | 0304 ord($str[3]) << 32 | 0305 ord($str[4]) << 24 | 0306 ord($str[5]) << 16 | 0307 ord($str[6]) << 8 | 0308 ord($str[7]); 0309 } else { 0310 return $this->readLong32Bit(); 0311 } 0312 } 0313 0314 /** 0315 * Writes long integer to the end of file 0316 * 0317 * @param integer $value 0318 * @throws Zend_Search_Lucene_Exception 0319 */ 0320 public function writeLong($value) 0321 { 0322 // We do not need to check if file position points to the end of "file". 0323 // Only append operation is supported now 0324 0325 /** 0326 * Check, that we work in 64-bit mode. 0327 * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb 0328 */ 0329 if (PHP_INT_SIZE > 4) { 0330 settype($value, 'integer'); 0331 $this->_data .= chr($value>>56 & 0xFF) . 0332 chr($value>>48 & 0xFF) . 0333 chr($value>>40 & 0xFF) . 0334 chr($value>>32 & 0xFF) . 0335 chr($value>>24 & 0xFF) . 0336 chr($value>>16 & 0xFF) . 0337 chr($value>>8 & 0xFF) . 0338 chr($value & 0xFF); 0339 } else { 0340 $this->writeLong32Bit($value); 0341 } 0342 0343 $this->_position = strlen($this->_data); 0344 } 0345 0346 0347 /** 0348 * Returns a long integer from the current position in the file, 0349 * advances the file pointer and return it as float (for 32-bit platforms). 0350 * 0351 * @return integer|float 0352 * @throws Zend_Search_Lucene_Exception 0353 */ 0354 public function readLong32Bit() 0355 { 0356 $wordHigh = $this->readInt(); 0357 $wordLow = $this->readInt(); 0358 0359 if ($wordHigh & (int)0x80000000) { 0360 // It's a negative value since the highest bit is set 0361 if ($wordHigh == (int)0xFFFFFFFF && ($wordLow & (int)0x80000000)) { 0362 return $wordLow; 0363 } else { 0364 // require_once 'Zend/Search/Lucene/Exception.php'; 0365 throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.'); 0366 } 0367 0368 } 0369 0370 if ($wordLow < 0) { 0371 // Value is large than 0x7FFF FFFF. Represent low word as float. 0372 $wordLow &= 0x7FFFFFFF; 0373 $wordLow += (float)0x80000000; 0374 } 0375 0376 if ($wordHigh == 0) { 0377 // Return value as integer if possible 0378 return $wordLow; 0379 } 0380 0381 return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow; 0382 } 0383 0384 0385 /** 0386 * Writes long integer to the end of file (32-bit platforms implementation) 0387 * 0388 * @param integer|float $value 0389 * @throws Zend_Search_Lucene_Exception 0390 */ 0391 public function writeLong32Bit($value) 0392 { 0393 if ($value < (int)0x80000000) { 0394 // require_once 'Zend/Search/Lucene/Exception.php'; 0395 throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.'); 0396 } 0397 0398 if ($value < 0) { 0399 $wordHigh = (int)0xFFFFFFFF; 0400 $wordLow = (int)$value; 0401 } else { 0402 $wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */); 0403 $wordLow = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */; 0404 0405 if ($wordLow > 0x7FFFFFFF) { 0406 // Highest bit of low word is set. Translate it to the corresponding negative integer value 0407 $wordLow -= 0x80000000; 0408 $wordLow |= 0x80000000; 0409 } 0410 } 0411 0412 $this->writeInt($wordHigh); 0413 $this->writeInt($wordLow); 0414 } 0415 0416 /** 0417 * Returns a variable-length integer from the current 0418 * position in the file and advances the file pointer. 0419 * 0420 * @return integer 0421 */ 0422 public function readVInt() 0423 { 0424 $nextByte = ord($this->_data[$this->_position++]); 0425 $val = $nextByte & 0x7F; 0426 0427 for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) { 0428 $nextByte = ord($this->_data[$this->_position++]); 0429 $val |= ($nextByte & 0x7F) << $shift; 0430 } 0431 return $val; 0432 } 0433 0434 /** 0435 * Writes a variable-length integer to the end of file. 0436 * 0437 * @param integer $value 0438 */ 0439 public function writeVInt($value) 0440 { 0441 // We do not need to check if file position points to the end of "file". 0442 // Only append operation is supported now 0443 0444 settype($value, 'integer'); 0445 while ($value > 0x7F) { 0446 $this->_data .= chr( ($value & 0x7F)|0x80 ); 0447 $value >>= 7; 0448 } 0449 $this->_data .= chr($value); 0450 0451 $this->_position = strlen($this->_data); 0452 } 0453 0454 0455 /** 0456 * Reads a string from the current position in the file 0457 * and advances the file pointer. 0458 * 0459 * @return string 0460 */ 0461 public function readString() 0462 { 0463 $strlen = $this->readVInt(); 0464 if ($strlen == 0) { 0465 return ''; 0466 } else { 0467 /** 0468 * This implementation supports only Basic Multilingual Plane 0469 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support 0470 * "supplementary characters" (characters whose code points are 0471 * greater than 0xFFFF) 0472 * Java 2 represents these characters as a pair of char (16-bit) 0473 * values, the first from the high-surrogates range (0xD800-0xDBFF), 0474 * the second from the low-surrogates range (0xDC00-0xDFFF). Then 0475 * they are encoded as usual UTF-8 characters in six bytes. 0476 * Standard UTF-8 representation uses four bytes for supplementary 0477 * characters. 0478 */ 0479 0480 $str_val = substr($this->_data, $this->_position, $strlen); 0481 $this->_position += $strlen; 0482 0483 for ($count = 0; $count < $strlen; $count++ ) { 0484 if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) { 0485 $addBytes = 1; 0486 if (ord($str_val[$count]) & 0x20 ) { 0487 $addBytes++; 0488 0489 // Never used. Java2 doesn't encode strings in four bytes 0490 if (ord($str_val[$count]) & 0x10 ) { 0491 $addBytes++; 0492 } 0493 } 0494 $str_val .= substr($this->_data, $this->_position, $addBytes); 0495 $this->_position += $addBytes; 0496 $strlen += $addBytes; 0497 0498 // Check for null character. Java2 encodes null character 0499 // in two bytes. 0500 if (ord($str_val[$count]) == 0xC0 && 0501 ord($str_val[$count+1]) == 0x80 ) { 0502 $str_val[$count] = 0; 0503 $str_val = substr($str_val,0,$count+1) 0504 . substr($str_val,$count+2); 0505 } 0506 $count += $addBytes; 0507 } 0508 } 0509 0510 return $str_val; 0511 } 0512 } 0513 0514 /** 0515 * Writes a string to the end of file. 0516 * 0517 * @param string $str 0518 * @throws Zend_Search_Lucene_Exception 0519 */ 0520 public function writeString($str) 0521 { 0522 /** 0523 * This implementation supports only Basic Multilingual Plane 0524 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support 0525 * "supplementary characters" (characters whose code points are 0526 * greater than 0xFFFF) 0527 * Java 2 represents these characters as a pair of char (16-bit) 0528 * values, the first from the high-surrogates range (0xD800-0xDBFF), 0529 * the second from the low-surrogates range (0xDC00-0xDFFF). Then 0530 * they are encoded as usual UTF-8 characters in six bytes. 0531 * Standard UTF-8 representation uses four bytes for supplementary 0532 * characters. 0533 */ 0534 0535 // We do not need to check if file position points to the end of "file". 0536 // Only append operation is supported now 0537 0538 // convert input to a string before iterating string characters 0539 settype($str, 'string'); 0540 0541 $chars = $strlen = strlen($str); 0542 $containNullChars = false; 0543 0544 for ($count = 0; $count < $strlen; $count++ ) { 0545 /** 0546 * String is already in Java 2 representation. 0547 * We should only calculate actual string length and replace 0548 * \x00 by \xC0\x80 0549 */ 0550 if ((ord($str[$count]) & 0xC0) == 0xC0) { 0551 $addBytes = 1; 0552 if (ord($str[$count]) & 0x20 ) { 0553 $addBytes++; 0554 0555 // Never used. Java2 doesn't encode strings in four bytes 0556 // and we dont't support non-BMP characters 0557 if (ord($str[$count]) & 0x10 ) { 0558 $addBytes++; 0559 } 0560 } 0561 $chars -= $addBytes; 0562 0563 if (ord($str[$count]) == 0 ) { 0564 $containNullChars = true; 0565 } 0566 $count += $addBytes; 0567 } 0568 } 0569 0570 if ($chars < 0) { 0571 // require_once 'Zend/Search/Lucene/Exception.php'; 0572 throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string'); 0573 } 0574 0575 $this->writeVInt($chars); 0576 if ($containNullChars) { 0577 $this->_data .= str_replace($str, "\x00", "\xC0\x80"); 0578 0579 } else { 0580 $this->_data .= $str; 0581 } 0582 0583 $this->_position = strlen($this->_data); 0584 } 0585 0586 0587 /** 0588 * Reads binary data from the current position in the file 0589 * and advances the file pointer. 0590 * 0591 * @return string 0592 */ 0593 public function readBinary() 0594 { 0595 $length = $this->readVInt(); 0596 $returnValue = substr($this->_data, $this->_position, $length); 0597 $this->_position += $length; 0598 return $returnValue; 0599 } 0600 } 0601