File indexing completed on 2024-12-22 05:36:56
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Pdf 0017 * @subpackage FileParser 0018 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0019 * @license http://framework.zend.com/license/new-bsd New BSD License 0020 * @version $Id$ 0021 */ 0022 0023 /** 0024 * Abstract utility class for parsing binary files. 0025 * 0026 * Provides a library of methods to quickly navigate and extract various data 0027 * types (signed and unsigned integers, floating- and fixed-point numbers, 0028 * strings, etc.) from the file. 0029 * 0030 * File access is managed via a {@link Zend_Pdf_FileParserDataSource} object. 0031 * This allows the same parser code to work with many different data sources: 0032 * in-memory objects, filesystem files, etc. 0033 * 0034 * @package Zend_Pdf 0035 * @subpackage FileParser 0036 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0037 * @license http://framework.zend.com/license/new-bsd New BSD License 0038 */ 0039 abstract class Zend_Pdf_FileParser 0040 { 0041 /**** Class Constants ****/ 0042 0043 /** 0044 * Little-endian byte order (0x04 0x03 0x02 0x01). 0045 */ 0046 const BYTE_ORDER_LITTLE_ENDIAN = 0; 0047 0048 /** 0049 * Big-endian byte order (0x01 0x02 0x03 0x04). 0050 */ 0051 const BYTE_ORDER_BIG_ENDIAN = 1; 0052 0053 0054 0055 /**** Instance Variables ****/ 0056 0057 0058 /** 0059 * Flag indicating that the file has passed a cursory validation check. 0060 * @var boolean 0061 */ 0062 protected $_isScreened = false; 0063 0064 /** 0065 * Flag indicating that the file has been sucessfully parsed. 0066 * @var boolean 0067 */ 0068 protected $_isParsed = false; 0069 0070 /** 0071 * Object representing the data source to be parsed. 0072 * @var Zend_Pdf_FileParserDataSource 0073 */ 0074 protected $_dataSource = null; 0075 0076 0077 0078 /**** Public Interface ****/ 0079 0080 0081 /* Abstract Methods */ 0082 0083 /** 0084 * Performs a cursory check to verify that the binary file is in the expected 0085 * format. Intended to quickly weed out obviously bogus files. 0086 * 0087 * Must set $this->_isScreened to true if successful. 0088 * 0089 * @throws Zend_Pdf_Exception 0090 */ 0091 abstract public function screen(); 0092 0093 /** 0094 * Reads and parses the complete binary file. 0095 * 0096 * Must set $this->_isParsed to true if successful. 0097 * 0098 * @throws Zend_Pdf_Exception 0099 */ 0100 abstract public function parse(); 0101 0102 0103 /* Object Lifecycle */ 0104 0105 /** 0106 * Object constructor. 0107 * 0108 * Verifies that the data source has been properly initialized. 0109 * 0110 * @param Zend_Pdf_FileParserDataSource $dataSource 0111 * @throws Zend_Pdf_Exception 0112 */ 0113 public function __construct(Zend_Pdf_FileParserDataSource $dataSource) 0114 { 0115 if ($dataSource->getSize() == 0) { 0116 // require_once 'Zend/Pdf/Exception.php'; 0117 throw new Zend_Pdf_Exception('The data source has not been properly initialized', 0118 Zend_Pdf_Exception::BAD_DATA_SOURCE); 0119 } 0120 $this->_dataSource = $dataSource; 0121 } 0122 0123 /** 0124 * Object destructor. 0125 * 0126 * Discards the data source object. 0127 */ 0128 public function __destruct() 0129 { 0130 $this->_dataSource = null; 0131 } 0132 0133 0134 /* Accessors */ 0135 0136 /** 0137 * Returns true if the file has passed a cursory validation check. 0138 * 0139 * @return boolean 0140 */ 0141 public function isScreened() 0142 { 0143 return $this->_isScreened; 0144 } 0145 0146 /** 0147 * Returns true if the file has been successfully parsed. 0148 * 0149 * @return boolean 0150 */ 0151 public function isParsed() 0152 { 0153 return $this->_isParsed; 0154 } 0155 0156 /** 0157 * Returns the data source object representing the file being parsed. 0158 * 0159 * @return Zend_Pdf_FileParserDataSource 0160 */ 0161 public function getDataSource() 0162 { 0163 return $this->_dataSource; 0164 } 0165 0166 0167 /* Primitive Methods */ 0168 0169 /** 0170 * Convenience wrapper for the data source object's moveToOffset() method. 0171 * 0172 * @param integer $offset Destination byte offset. 0173 * @throws Zend_Pdf_Exception 0174 */ 0175 public function moveToOffset($offset) 0176 { 0177 $this->_dataSource->moveToOffset($offset); 0178 } 0179 0180 public function getOffset() { 0181 return $this->_dataSource->getOffset(); 0182 } 0183 0184 public function getSize() { 0185 return $this->_dataSource->getSize(); 0186 } 0187 0188 /** 0189 * Convenience wrapper for the data source object's readBytes() method. 0190 * 0191 * @param integer $byteCount Number of bytes to read. 0192 * @return string 0193 * @throws Zend_Pdf_Exception 0194 */ 0195 public function readBytes($byteCount) 0196 { 0197 return $this->_dataSource->readBytes($byteCount); 0198 } 0199 0200 /** 0201 * Convenience wrapper for the data source object's skipBytes() method. 0202 * 0203 * @param integer $byteCount Number of bytes to skip. 0204 * @throws Zend_Pdf_Exception 0205 */ 0206 public function skipBytes($byteCount) 0207 { 0208 $this->_dataSource->skipBytes($byteCount); 0209 } 0210 0211 0212 /* Parser Methods */ 0213 0214 /** 0215 * Reads the signed integer value from the binary file at the current byte 0216 * offset. 0217 * 0218 * Advances the offset by the number of bytes read. Throws an exception if 0219 * an error occurs. 0220 * 0221 * @param integer $size Size of integer in bytes: 1-4 0222 * @param integer $byteOrder (optional) Big- or little-endian byte order. 0223 * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}. 0224 * If omitted, uses big-endian. 0225 * @return integer 0226 * @throws Zend_Pdf_Exception 0227 */ 0228 public function readInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) 0229 { 0230 if (($size < 1) || ($size > 4)) { 0231 // require_once 'Zend/Pdf/Exception.php'; 0232 throw new Zend_Pdf_Exception("Invalid signed integer size: $size", 0233 Zend_Pdf_Exception::INVALID_INTEGER_SIZE); 0234 } 0235 $bytes = $this->_dataSource->readBytes($size); 0236 /* unpack() will not work for this method because it always works in 0237 * the host byte order for signed integers. It also does not allow for 0238 * variable integer sizes. 0239 */ 0240 if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) { 0241 $number = ord($bytes[0]); 0242 if (($number & 0x80) == 0x80) { 0243 /* This number is negative. Extract the positive equivalent. 0244 */ 0245 $number = (~ $number) & 0xff; 0246 for ($i = 1; $i < $size; $i++) { 0247 $number = ($number << 8) | ((~ ord($bytes[$i])) & 0xff); 0248 } 0249 /* Now turn this back into a negative number by taking the 0250 * two's complement (we didn't add one above so won't 0251 * subtract it below). This works reliably on both 32- and 0252 * 64-bit systems. 0253 */ 0254 $number = ~$number; 0255 } else { 0256 for ($i = 1; $i < $size; $i++) { 0257 $number = ($number << 8) | ord($bytes[$i]); 0258 } 0259 } 0260 } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) { 0261 $number = ord($bytes[$size - 1]); 0262 if (($number & 0x80) == 0x80) { 0263 /* Negative number. See discussion above. 0264 */ 0265 $number = 0; 0266 for ($i = --$size; $i >= 0; $i--) { 0267 $number |= ((~ ord($bytes[$i])) & 0xff) << ($i * 8); 0268 } 0269 $number = ~$number; 0270 } else { 0271 $number = 0; 0272 for ($i = --$size; $i >= 0; $i--) { 0273 $number |= ord($bytes[$i]) << ($i * 8); 0274 } 0275 } 0276 } else { 0277 // require_once 'Zend/Pdf/Exception.php'; 0278 throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder", 0279 Zend_Pdf_Exception::INVALID_BYTE_ORDER); 0280 } 0281 return $number; 0282 } 0283 0284 /** 0285 * Reads the unsigned integer value from the binary file at the current byte 0286 * offset. 0287 * 0288 * Advances the offset by the number of bytes read. Throws an exception if 0289 * an error occurs. 0290 * 0291 * NOTE: If you ask for a 4-byte unsigned integer on a 32-bit machine, the 0292 * resulting value WILL BE SIGNED because PHP uses signed integers internally 0293 * for everything. To guarantee portability, be sure to use bitwise operators 0294 * operators on large unsigned integers! 0295 * 0296 * @param integer $size Size of integer in bytes: 1-4 0297 * @param integer $byteOrder (optional) Big- or little-endian byte order. 0298 * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}. 0299 * If omitted, uses big-endian. 0300 * @return integer 0301 * @throws Zend_Pdf_Exception 0302 */ 0303 public function readUInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) 0304 { 0305 if (($size < 1) || ($size > 4)) { 0306 // require_once 'Zend/Pdf/Exception.php'; 0307 throw new Zend_Pdf_Exception("Invalid unsigned integer size: $size", 0308 Zend_Pdf_Exception::INVALID_INTEGER_SIZE); 0309 } 0310 $bytes = $this->_dataSource->readBytes($size); 0311 /* unpack() is a bit heavyweight for this simple conversion. Just 0312 * work the bytes directly. 0313 */ 0314 if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) { 0315 $number = ord($bytes[0]); 0316 for ($i = 1; $i < $size; $i++) { 0317 $number = ($number << 8) | ord($bytes[$i]); 0318 } 0319 } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) { 0320 $number = 0; 0321 for ($i = --$size; $i >= 0; $i--) { 0322 $number |= ord($bytes[$i]) << ($i * 8); 0323 } 0324 } else { 0325 // require_once 'Zend/Pdf/Exception.php'; 0326 throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder", 0327 Zend_Pdf_Exception::INVALID_BYTE_ORDER); 0328 } 0329 return $number; 0330 } 0331 0332 /** 0333 * Returns true if the specified bit is set in the integer bitfield. 0334 * 0335 * @param integer $bit Bit number to test (i.e. - 0-31) 0336 * @param integer $bitField 0337 * @return boolean 0338 */ 0339 public function isBitSet($bit, $bitField) 0340 { 0341 $bitMask = 1 << $bit; 0342 $isSet = (($bitField & $bitMask) == $bitMask); 0343 return $isSet; 0344 } 0345 0346 /** 0347 * Reads the signed fixed-point number from the binary file at the current 0348 * byte offset. 0349 * 0350 * Common fixed-point sizes are 2.14 and 16.16. 0351 * 0352 * Advances the offset by the number of bytes read. Throws an exception if 0353 * an error occurs. 0354 * 0355 * @param integer $mantissaBits Number of bits in the mantissa 0356 * @param integer $fractionBits Number of bits in the fraction 0357 * @param integer $byteOrder (optional) Big- or little-endian byte order. 0358 * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}. 0359 * If omitted, uses big-endian. 0360 * @return float 0361 * @throws Zend_Pdf_Exception 0362 */ 0363 public function readFixed($mantissaBits, $fractionBits, 0364 $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) 0365 { 0366 $bitsToRead = $mantissaBits + $fractionBits; 0367 if (($bitsToRead % 8) !== 0) { 0368 // require_once 'Zend/Pdf/Exception.php'; 0369 throw new Zend_Pdf_Exception('Fixed-point numbers are whole bytes', 0370 Zend_Pdf_Exception::BAD_FIXED_POINT_SIZE); 0371 } 0372 $number = $this->readInt(($bitsToRead >> 3), $byteOrder) / (1 << $fractionBits); 0373 return $number; 0374 } 0375 0376 /** 0377 * Reads the Unicode UTF-16-encoded string from the binary file at the 0378 * current byte offset. 0379 * 0380 * The byte order of the UTF-16 string must be specified. You must also 0381 * supply the desired resulting character set. 0382 * 0383 * Advances the offset by the number of bytes read. Throws an exception if 0384 * an error occurs. 0385 * 0386 * @todo Consider changing $byteCount to a character count. They are not 0387 * always equivalent (in the case of surrogates). 0388 * @todo Make $byteOrder optional if there is a byte-order mark (BOM) in the 0389 * string being extracted. 0390 * 0391 * @param integer $byteCount Number of bytes (characters * 2) to return. 0392 * @param integer $byteOrder (optional) Big- or little-endian byte order. 0393 * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}. 0394 * If omitted, uses big-endian. 0395 * @param string $characterSet (optional) Desired resulting character set. 0396 * You may use any character set supported by {@link iconv()}. If omitted, 0397 * uses 'current locale'. 0398 * @return string 0399 * @throws Zend_Pdf_Exception 0400 */ 0401 public function readStringUTF16($byteCount, 0402 $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN, 0403 $characterSet = '') 0404 { 0405 if ($byteCount == 0) { 0406 return ''; 0407 } 0408 $bytes = $this->_dataSource->readBytes($byteCount); 0409 if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) { 0410 if ($characterSet == 'UTF-16BE') { 0411 return $bytes; 0412 } 0413 return iconv('UTF-16BE', $characterSet, $bytes); 0414 } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) { 0415 if ($characterSet == 'UTF-16LE') { 0416 return $bytes; 0417 } 0418 return iconv('UTF-16LE', $characterSet, $bytes); 0419 } else { 0420 // require_once 'Zend/Pdf/Exception.php'; 0421 throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder", 0422 Zend_Pdf_Exception::INVALID_BYTE_ORDER); 0423 } 0424 } 0425 0426 /** 0427 * Reads the Mac Roman-encoded string from the binary file at the current 0428 * byte offset. 0429 * 0430 * You must supply the desired resulting character set. 0431 * 0432 * Advances the offset by the number of bytes read. Throws an exception if 0433 * an error occurs. 0434 * 0435 * @param integer $byteCount Number of bytes (characters) to return. 0436 * @param string $characterSet (optional) Desired resulting character set. 0437 * You may use any character set supported by {@link iconv()}. If omitted, 0438 * uses 'current locale'. 0439 * @return string 0440 * @throws Zend_Pdf_Exception 0441 */ 0442 public function readStringMacRoman($byteCount, $characterSet = '') 0443 { 0444 if ($byteCount == 0) { 0445 return ''; 0446 } 0447 $bytes = $this->_dataSource->readBytes($byteCount); 0448 if ($characterSet == 'MacRoman') { 0449 return $bytes; 0450 } 0451 return iconv('MacRoman', $characterSet, $bytes); 0452 } 0453 0454 /** 0455 * Reads the Pascal string from the binary file at the current byte offset. 0456 * 0457 * The length of the Pascal string is determined by reading the length bytes 0458 * which preceed the character data. You must supply the desired resulting 0459 * character set. 0460 * 0461 * Advances the offset by the number of bytes read. Throws an exception if 0462 * an error occurs. 0463 * 0464 * @param string $characterSet (optional) Desired resulting character set. 0465 * You may use any character set supported by {@link iconv()}. If omitted, 0466 * uses 'current locale'. 0467 * @param integer $lengthBytes (optional) Number of bytes that make up the 0468 * length. Default is 1. 0469 * @return string 0470 * @throws Zend_Pdf_Exception 0471 */ 0472 public function readStringPascal($characterSet = '', $lengthBytes = 1) 0473 { 0474 $byteCount = $this->readUInt($lengthBytes); 0475 if ($byteCount == 0) { 0476 return ''; 0477 } 0478 $bytes = $this->_dataSource->readBytes($byteCount); 0479 if ($characterSet == 'ASCII') { 0480 return $bytes; 0481 } 0482 return iconv('ASCII', $characterSet, $bytes); 0483 } 0484 0485 }