File indexing completed on 2024-12-22 05:36:56

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Pdf
0017  * @subpackage FileParser
0018  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0019  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0020  * @version    $Id$
0021  */
0022 
0023 /**
0024  * Abstract utility class for parsing binary files.
0025  *
0026  * Provides a library of methods to quickly navigate and extract various data
0027  * types (signed and unsigned integers, floating- and fixed-point numbers,
0028  * strings, etc.) from the file.
0029  *
0030  * File access is managed via a {@link Zend_Pdf_FileParserDataSource} object.
0031  * This allows the same parser code to work with many different data sources:
0032  * in-memory objects, filesystem files, etc.
0033  *
0034  * @package    Zend_Pdf
0035  * @subpackage FileParser
0036  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0037  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0038  */
0039 abstract class Zend_Pdf_FileParser
0040 {
0041   /**** Class Constants ****/
0042 
0043     /**
0044      * Little-endian byte order (0x04 0x03 0x02 0x01).
0045      */
0046     const BYTE_ORDER_LITTLE_ENDIAN = 0;
0047 
0048     /**
0049      * Big-endian byte order (0x01 0x02 0x03 0x04).
0050      */
0051     const BYTE_ORDER_BIG_ENDIAN    = 1;
0052 
0053 
0054 
0055   /**** Instance Variables ****/
0056 
0057 
0058     /**
0059      * Flag indicating that the file has passed a cursory validation check.
0060      * @var boolean
0061      */
0062     protected $_isScreened = false;
0063 
0064     /**
0065      * Flag indicating that the file has been sucessfully parsed.
0066      * @var boolean
0067      */
0068     protected $_isParsed = false;
0069 
0070     /**
0071      * Object representing the data source to be parsed.
0072      * @var Zend_Pdf_FileParserDataSource
0073      */
0074     protected $_dataSource = null;
0075 
0076 
0077 
0078   /**** Public Interface ****/
0079 
0080 
0081   /* Abstract Methods */
0082 
0083     /**
0084      * Performs a cursory check to verify that the binary file is in the expected
0085      * format. Intended to quickly weed out obviously bogus files.
0086      *
0087      * Must set $this->_isScreened to true if successful.
0088      *
0089      * @throws Zend_Pdf_Exception
0090      */
0091     abstract public function screen();
0092 
0093     /**
0094      * Reads and parses the complete binary file.
0095      *
0096      * Must set $this->_isParsed to true if successful.
0097      *
0098      * @throws Zend_Pdf_Exception
0099      */
0100     abstract public function parse();
0101 
0102 
0103   /* Object Lifecycle */
0104 
0105     /**
0106      * Object constructor.
0107      *
0108      * Verifies that the data source has been properly initialized.
0109      *
0110      * @param Zend_Pdf_FileParserDataSource $dataSource
0111      * @throws Zend_Pdf_Exception
0112      */
0113     public function __construct(Zend_Pdf_FileParserDataSource $dataSource)
0114     {
0115         if ($dataSource->getSize() == 0) {
0116             // require_once 'Zend/Pdf/Exception.php';
0117             throw new Zend_Pdf_Exception('The data source has not been properly initialized',
0118                                          Zend_Pdf_Exception::BAD_DATA_SOURCE);
0119         }
0120         $this->_dataSource = $dataSource;
0121     }
0122 
0123     /**
0124      * Object destructor.
0125      *
0126      * Discards the data source object.
0127      */
0128     public function __destruct()
0129     {
0130         $this->_dataSource = null;
0131     }
0132 
0133 
0134   /* Accessors */
0135 
0136     /**
0137      * Returns true if the file has passed a cursory validation check.
0138      *
0139      * @return boolean
0140      */
0141     public function isScreened()
0142     {
0143         return $this->_isScreened;
0144     }
0145 
0146     /**
0147      * Returns true if the file has been successfully parsed.
0148      *
0149      * @return boolean
0150      */
0151     public function isParsed()
0152     {
0153         return $this->_isParsed;
0154     }
0155 
0156     /**
0157      * Returns the data source object representing the file being parsed.
0158      *
0159      * @return Zend_Pdf_FileParserDataSource
0160      */
0161     public function getDataSource()
0162     {
0163         return $this->_dataSource;
0164     }
0165 
0166 
0167   /* Primitive Methods */
0168 
0169     /**
0170      * Convenience wrapper for the data source object's moveToOffset() method.
0171      *
0172      * @param integer $offset Destination byte offset.
0173      * @throws Zend_Pdf_Exception
0174      */
0175     public function moveToOffset($offset)
0176     {
0177         $this->_dataSource->moveToOffset($offset);
0178     }
0179 
0180     public function getOffset() {
0181        return $this->_dataSource->getOffset();
0182     }
0183 
0184     public function getSize() {
0185        return $this->_dataSource->getSize();
0186     }
0187 
0188     /**
0189      * Convenience wrapper for the data source object's readBytes() method.
0190      *
0191      * @param integer $byteCount Number of bytes to read.
0192      * @return string
0193      * @throws Zend_Pdf_Exception
0194      */
0195     public function readBytes($byteCount)
0196     {
0197         return $this->_dataSource->readBytes($byteCount);
0198     }
0199 
0200     /**
0201      * Convenience wrapper for the data source object's skipBytes() method.
0202      *
0203      * @param integer $byteCount Number of bytes to skip.
0204      * @throws Zend_Pdf_Exception
0205      */
0206     public function skipBytes($byteCount)
0207     {
0208         $this->_dataSource->skipBytes($byteCount);
0209     }
0210 
0211 
0212   /* Parser Methods */
0213 
0214     /**
0215      * Reads the signed integer value from the binary file at the current byte
0216      * offset.
0217      *
0218      * Advances the offset by the number of bytes read. Throws an exception if
0219      * an error occurs.
0220      *
0221      * @param integer $size Size of integer in bytes: 1-4
0222      * @param integer $byteOrder (optional) Big- or little-endian byte order.
0223      *   Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
0224      *   If omitted, uses big-endian.
0225      * @return integer
0226      * @throws Zend_Pdf_Exception
0227      */
0228     public function readInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
0229     {
0230         if (($size < 1) || ($size > 4)) {
0231             // require_once 'Zend/Pdf/Exception.php';
0232             throw new Zend_Pdf_Exception("Invalid signed integer size: $size",
0233                                          Zend_Pdf_Exception::INVALID_INTEGER_SIZE);
0234         }
0235         $bytes = $this->_dataSource->readBytes($size);
0236         /* unpack() will not work for this method because it always works in
0237          * the host byte order for signed integers. It also does not allow for
0238          * variable integer sizes.
0239          */
0240         if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
0241             $number = ord($bytes[0]);
0242             if (($number & 0x80) == 0x80) {
0243                 /* This number is negative. Extract the positive equivalent.
0244                  */
0245                 $number = (~ $number) & 0xff;
0246                 for ($i = 1; $i < $size; $i++) {
0247                     $number = ($number << 8) | ((~ ord($bytes[$i])) & 0xff);
0248                 }
0249                 /* Now turn this back into a negative number by taking the
0250                  * two's complement (we didn't add one above so won't
0251                  * subtract it below). This works reliably on both 32- and
0252                  * 64-bit systems.
0253                  */
0254                 $number = ~$number;
0255             } else {
0256                 for ($i = 1; $i < $size; $i++) {
0257                     $number = ($number << 8) | ord($bytes[$i]);
0258                 }
0259             }
0260         } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
0261             $number = ord($bytes[$size - 1]);
0262             if (($number & 0x80) == 0x80) {
0263                 /* Negative number. See discussion above.
0264                  */
0265                 $number = 0;
0266                 for ($i = --$size; $i >= 0; $i--) {
0267                     $number |= ((~ ord($bytes[$i])) & 0xff) << ($i * 8);
0268                 }
0269                 $number = ~$number;
0270             } else {
0271                 $number = 0;
0272                 for ($i = --$size; $i >= 0; $i--) {
0273                     $number |= ord($bytes[$i]) << ($i * 8);
0274                 }
0275             }
0276         } else {
0277             // require_once 'Zend/Pdf/Exception.php';
0278             throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
0279                                          Zend_Pdf_Exception::INVALID_BYTE_ORDER);
0280         }
0281         return $number;
0282     }
0283 
0284     /**
0285      * Reads the unsigned integer value from the binary file at the current byte
0286      * offset.
0287      *
0288      * Advances the offset by the number of bytes read. Throws an exception if
0289      * an error occurs.
0290      *
0291      * NOTE: If you ask for a 4-byte unsigned integer on a 32-bit machine, the
0292      * resulting value WILL BE SIGNED because PHP uses signed integers internally
0293      * for everything. To guarantee portability, be sure to use bitwise operators
0294      * operators on large unsigned integers!
0295      *
0296      * @param integer $size Size of integer in bytes: 1-4
0297      * @param integer $byteOrder (optional) Big- or little-endian byte order.
0298      *   Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
0299      *   If omitted, uses big-endian.
0300      * @return integer
0301      * @throws Zend_Pdf_Exception
0302      */
0303     public function readUInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
0304     {
0305         if (($size < 1) || ($size > 4)) {
0306             // require_once 'Zend/Pdf/Exception.php';
0307             throw new Zend_Pdf_Exception("Invalid unsigned integer size: $size",
0308                                          Zend_Pdf_Exception::INVALID_INTEGER_SIZE);
0309         }
0310         $bytes = $this->_dataSource->readBytes($size);
0311         /* unpack() is a bit heavyweight for this simple conversion. Just
0312          * work the bytes directly.
0313          */
0314         if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
0315             $number = ord($bytes[0]);
0316             for ($i = 1; $i < $size; $i++) {
0317                 $number = ($number << 8) | ord($bytes[$i]);
0318             }
0319         } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
0320             $number = 0;
0321             for ($i = --$size; $i >= 0; $i--) {
0322                 $number |= ord($bytes[$i]) << ($i * 8);
0323             }
0324         } else {
0325             // require_once 'Zend/Pdf/Exception.php';
0326             throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
0327                                          Zend_Pdf_Exception::INVALID_BYTE_ORDER);
0328         }
0329         return $number;
0330     }
0331 
0332     /**
0333      * Returns true if the specified bit is set in the integer bitfield.
0334      *
0335      * @param integer $bit Bit number to test (i.e. - 0-31)
0336      * @param integer $bitField
0337      * @return boolean
0338      */
0339     public function isBitSet($bit, $bitField)
0340     {
0341         $bitMask = 1 << $bit;
0342         $isSet = (($bitField & $bitMask) == $bitMask);
0343         return $isSet;
0344     }
0345 
0346     /**
0347      * Reads the signed fixed-point number from the binary file at the current
0348      * byte offset.
0349      *
0350      * Common fixed-point sizes are 2.14 and 16.16.
0351      *
0352      * Advances the offset by the number of bytes read. Throws an exception if
0353      * an error occurs.
0354      *
0355      * @param integer $mantissaBits Number of bits in the mantissa
0356      * @param integer $fractionBits Number of bits in the fraction
0357      * @param integer $byteOrder (optional) Big- or little-endian byte order.
0358      *   Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
0359      *   If omitted, uses big-endian.
0360      * @return float
0361      * @throws Zend_Pdf_Exception
0362      */
0363     public function readFixed($mantissaBits, $fractionBits,
0364                               $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
0365     {
0366         $bitsToRead = $mantissaBits + $fractionBits;
0367         if (($bitsToRead % 8) !== 0) {
0368             // require_once 'Zend/Pdf/Exception.php';
0369             throw new Zend_Pdf_Exception('Fixed-point numbers are whole bytes',
0370                                          Zend_Pdf_Exception::BAD_FIXED_POINT_SIZE);
0371         }
0372         $number = $this->readInt(($bitsToRead >> 3), $byteOrder) / (1 << $fractionBits);
0373         return $number;
0374     }
0375 
0376     /**
0377      * Reads the Unicode UTF-16-encoded string from the binary file at the
0378      * current byte offset.
0379      *
0380      * The byte order of the UTF-16 string must be specified. You must also
0381      * supply the desired resulting character set.
0382      *
0383      * Advances the offset by the number of bytes read. Throws an exception if
0384      * an error occurs.
0385      *
0386      * @todo Consider changing $byteCount to a character count. They are not
0387      *   always equivalent (in the case of surrogates).
0388      * @todo Make $byteOrder optional if there is a byte-order mark (BOM) in the
0389      *   string being extracted.
0390      *
0391      * @param integer $byteCount Number of bytes (characters * 2) to return.
0392      * @param integer $byteOrder (optional) Big- or little-endian byte order.
0393      *   Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
0394      *   If omitted, uses big-endian.
0395      * @param string $characterSet (optional) Desired resulting character set.
0396      *   You may use any character set supported by {@link iconv()}. If omitted,
0397      *   uses 'current locale'.
0398      * @return string
0399      * @throws Zend_Pdf_Exception
0400      */
0401     public function readStringUTF16($byteCount,
0402                                     $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN,
0403                                     $characterSet = '')
0404     {
0405         if ($byteCount == 0) {
0406             return '';
0407         }
0408         $bytes = $this->_dataSource->readBytes($byteCount);
0409         if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
0410             if ($characterSet == 'UTF-16BE') {
0411                 return $bytes;
0412             }
0413             return iconv('UTF-16BE', $characterSet, $bytes);
0414         } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
0415             if ($characterSet == 'UTF-16LE') {
0416                 return $bytes;
0417             }
0418             return iconv('UTF-16LE', $characterSet, $bytes);
0419         } else {
0420             // require_once 'Zend/Pdf/Exception.php';
0421             throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
0422                                          Zend_Pdf_Exception::INVALID_BYTE_ORDER);
0423         }
0424     }
0425 
0426     /**
0427      * Reads the Mac Roman-encoded string from the binary file at the current
0428      * byte offset.
0429      *
0430      * You must supply the desired resulting character set.
0431      *
0432      * Advances the offset by the number of bytes read. Throws an exception if
0433      * an error occurs.
0434      *
0435      * @param integer $byteCount Number of bytes (characters) to return.
0436      * @param string $characterSet (optional) Desired resulting character set.
0437      *   You may use any character set supported by {@link iconv()}. If omitted,
0438      *   uses 'current locale'.
0439      * @return string
0440      * @throws Zend_Pdf_Exception
0441      */
0442     public function readStringMacRoman($byteCount, $characterSet = '')
0443     {
0444         if ($byteCount == 0) {
0445             return '';
0446         }
0447         $bytes = $this->_dataSource->readBytes($byteCount);
0448         if ($characterSet == 'MacRoman') {
0449             return $bytes;
0450         }
0451         return iconv('MacRoman', $characterSet, $bytes);
0452     }
0453 
0454     /**
0455      * Reads the Pascal string from the binary file at the current byte offset.
0456      *
0457      * The length of the Pascal string is determined by reading the length bytes
0458      * which preceed the character data. You must supply the desired resulting
0459      * character set.
0460      *
0461      * Advances the offset by the number of bytes read. Throws an exception if
0462      * an error occurs.
0463      *
0464      * @param string $characterSet (optional) Desired resulting character set.
0465      *   You may use any character set supported by {@link iconv()}. If omitted,
0466      *   uses 'current locale'.
0467      * @param integer $lengthBytes (optional) Number of bytes that make up the
0468      *   length. Default is 1.
0469      * @return string
0470      * @throws Zend_Pdf_Exception
0471      */
0472     public function readStringPascal($characterSet = '', $lengthBytes = 1)
0473     {
0474         $byteCount = $this->readUInt($lengthBytes);
0475         if ($byteCount == 0) {
0476             return '';
0477         }
0478         $bytes = $this->_dataSource->readBytes($byteCount);
0479         if ($characterSet == 'ASCII') {
0480             return $bytes;
0481         }
0482         return iconv('ASCII', $characterSet, $bytes);
0483     }
0484 
0485 }