File indexing completed on 2024-05-12 06:02:54

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Pdf
0017  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0018  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0019  * @version    $Id$
0020  */
0021 
0022 /** Internally used classes */
0023 // require_once 'Zend/Pdf/Element.php';
0024 // require_once 'Zend/Pdf/Element/Numeric.php';
0025 
0026 
0027 /** Zend_Pdf_StringParser */
0028 // require_once 'Zend/Pdf/StringParser.php';
0029 
0030 
0031 /**
0032  * PDF file parser
0033  *
0034  * @package    Zend_Pdf
0035  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0036  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0037  */
0038 class Zend_Pdf_Parser
0039 {
0040     /**
0041      * String parser
0042      *
0043      * @var Zend_Pdf_StringParser
0044      */
0045     private $_stringParser;
0046 
0047     /**
0048      * Last PDF file trailer
0049      *
0050      * @var Zend_Pdf_Trailer_Keeper
0051      */
0052     private $_trailer;
0053 
0054     /**
0055      * PDF version specified in the file header
0056      *
0057      * @var string
0058      */
0059     private $_pdfVersion;
0060 
0061 
0062     /**
0063      * Get length of source PDF
0064      *
0065      * @return integer
0066      */
0067     public function getPDFLength()
0068     {
0069         return strlen($this->_stringParser->data);
0070     }
0071 
0072     /**
0073      * Get PDF String
0074      *
0075      * @return string
0076      */
0077     public function getPDFString()
0078     {
0079         return $this->_stringParser->data;
0080     }
0081 
0082     /**
0083      * PDF version specified in the file header
0084      *
0085      * @return string
0086      */
0087     public function getPDFVersion()
0088     {
0089         return $this->_pdfVersion;
0090     }
0091 
0092     /**
0093      * Load XReference table and referenced objects
0094      *
0095      * @param integer $offset
0096      * @throws Zend_Pdf_Exception
0097      * @return Zend_Pdf_Trailer_Keeper
0098      */
0099     private function _loadXRefTable($offset)
0100     {
0101         $this->_stringParser->offset = $offset;
0102 
0103         // require_once 'Zend/Pdf/Element/Reference/Table.php';
0104         $refTable = new Zend_Pdf_Element_Reference_Table();
0105         // require_once 'Zend/Pdf/Element/Reference/Context.php';
0106         $context  = new Zend_Pdf_Element_Reference_Context($this->_stringParser, $refTable);
0107         $this->_stringParser->setContext($context);
0108 
0109         $nextLexeme = $this->_stringParser->readLexeme();
0110         if ($nextLexeme == 'xref') {
0111             /**
0112              * Common cross-reference table
0113              */
0114             $this->_stringParser->skipWhiteSpace();
0115             while ( ($nextLexeme = $this->_stringParser->readLexeme()) != 'trailer' ) {
0116                 if (!ctype_digit($nextLexeme)) {
0117                     // require_once 'Zend/Pdf/Exception.php';
0118                     throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($nextLexeme)));
0119                 }
0120                 $objNum = (int)$nextLexeme;
0121 
0122                 $refCount = $this->_stringParser->readLexeme();
0123                 if (!ctype_digit($refCount)) {
0124                     // require_once 'Zend/Pdf/Exception.php';
0125                     throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($refCount)));
0126                 }
0127 
0128                 $this->_stringParser->skipWhiteSpace();
0129                 while ($refCount > 0) {
0130                     $objectOffset = substr($this->_stringParser->data, $this->_stringParser->offset, 10);
0131                     if (!ctype_digit($objectOffset)) {
0132                         // require_once 'Zend/Pdf/Exception.php';
0133                         throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset));
0134                     }
0135                     // Force $objectOffset to be treated as decimal instead of octal number
0136                     for ($numStart = 0; $numStart < strlen($objectOffset)-1; $numStart++) {
0137                         if ($objectOffset[$numStart] != '0') {
0138                             break;
0139                         }
0140                     }
0141                     $objectOffset = substr($objectOffset, $numStart);
0142                     $this->_stringParser->offset += 10;
0143 
0144                     if (strpos("\x00\t\n\f\r ", $this->_stringParser->data[$this->_stringParser->offset]) === false) {
0145                         // require_once 'Zend/Pdf/Exception.php';
0146                         throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
0147                     }
0148                     $this->_stringParser->offset++;
0149 
0150                     $genNumber = substr($this->_stringParser->data, $this->_stringParser->offset, 5);
0151                     if (!ctype_digit($objectOffset)) {
0152                         // require_once 'Zend/Pdf/Exception.php';
0153                         throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset));
0154                     }
0155                     // Force $objectOffset to be treated as decimal instead of octal number
0156                     for ($numStart = 0; $numStart < strlen($genNumber)-1; $numStart++) {
0157                         if ($genNumber[$numStart] != '0') {
0158                             break;
0159                         }
0160                     }
0161                     $genNumber = substr($genNumber, $numStart);
0162                     $this->_stringParser->offset += 5;
0163 
0164                     if (strpos("\x00\t\n\f\r ", $this->_stringParser->data[$this->_stringParser->offset]) === false) {
0165                         // require_once 'Zend/Pdf/Exception.php';
0166                         throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
0167                     }
0168                     $this->_stringParser->offset++;
0169 
0170                     $inUseKey = $this->_stringParser->data[$this->_stringParser->offset];
0171                     $this->_stringParser->offset++;
0172 
0173                     switch ($inUseKey) {
0174                         case 'f':
0175                             // free entry
0176                             unset( $this->_refTable[$objNum . ' ' . $genNumber . ' R'] );
0177                             $refTable->addReference($objNum . ' ' . $genNumber . ' R',
0178                                                     $objectOffset,
0179                                                     false);
0180                             break;
0181 
0182                         case 'n':
0183                             // in-use entry
0184 
0185                             $refTable->addReference($objNum . ' ' . $genNumber . ' R',
0186                                                     $objectOffset,
0187                                                     true);
0188                     }
0189 
0190                     if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
0191                         // require_once 'Zend/Pdf/Exception.php';
0192                         throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
0193                     }
0194                     $this->_stringParser->offset++;
0195                     if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
0196                         // require_once 'Zend/Pdf/Exception.php';
0197                         throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
0198                     }
0199                     $this->_stringParser->offset++;
0200 
0201                     $refCount--;
0202                     $objNum++;
0203                 }
0204             }
0205 
0206             $trailerDictOffset = $this->_stringParser->offset;
0207             $trailerDict = $this->_stringParser->readElement();
0208             if (!$trailerDict instanceof Zend_Pdf_Element_Dictionary) {
0209                 // require_once 'Zend/Pdf/Exception.php';
0210                 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.  Dictionary expected after \'trailer\' keyword.', $trailerDictOffset));
0211             }
0212         } else {
0213             $xrefStream = $this->_stringParser->getObject($offset, $context);
0214 
0215             if (!$xrefStream instanceof Zend_Pdf_Element_Object_Stream) {
0216                 // require_once 'Zend/Pdf/Exception.php';
0217                 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.  Cross-reference stream expected.', $offset));
0218             }
0219 
0220             $trailerDict = $xrefStream->dictionary;
0221             if ($trailerDict->Type->value != 'XRef') {
0222                 // require_once 'Zend/Pdf/Exception.php';
0223                 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.  Cross-reference stream object must have /Type property assigned to /XRef.', $offset));
0224             }
0225             if ($trailerDict->W === null  || $trailerDict->W->getType() != Zend_Pdf_Element::TYPE_ARRAY) {
0226                 // require_once 'Zend/Pdf/Exception.php';
0227                 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary doesn\'t have W entry or it\'s not an array.', $offset));
0228             }
0229 
0230             $entryField1Size = $trailerDict->W->items[0]->value;
0231             $entryField2Size = $trailerDict->W->items[1]->value;
0232             $entryField3Size = $trailerDict->W->items[2]->value;
0233 
0234             if ($entryField2Size == 0 || $entryField3Size == 0) {
0235                 // require_once 'Zend/Pdf/Exception.php';
0236                 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Wrong W dictionary entry. Only type field of stream entries has default value and could be zero length.', $offset));
0237             }
0238 
0239             $xrefStreamData = $xrefStream->value;
0240 
0241             if ($trailerDict->Index !== null) {
0242                 if ($trailerDict->Index->getType() != Zend_Pdf_Element::TYPE_ARRAY) {
0243                     // require_once 'Zend/Pdf/Exception.php';
0244                     throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary Index entry must be an array.', $offset));
0245                 }
0246                 $sections = count($trailerDict->Index->items)/2;
0247             } else {
0248                 $sections = 1;
0249             }
0250 
0251             $streamOffset = 0;
0252 
0253             $size    = $entryField1Size + $entryField2Size + $entryField3Size;
0254             $entries = strlen($xrefStreamData)/$size;
0255 
0256             for ($count = 0; $count < $sections; $count++) {
0257                 if ($trailerDict->Index !== null) {
0258                     $objNum  = $trailerDict->Index->items[$count*2    ]->value;
0259                     $entries = $trailerDict->Index->items[$count*2 + 1]->value;
0260                 } else {
0261                     $objNum  = 0;
0262                     $entries = $trailerDict->Size->value;
0263                 }
0264 
0265                 for ($count2 = 0; $count2 < $entries; $count2++) {
0266                     if ($entryField1Size == 0) {
0267                         $type = 1;
0268                     } else if ($entryField1Size == 1) { // Optimyze one-byte field case
0269                         $type = ord($xrefStreamData[$streamOffset++]);
0270                     } else {
0271                         $type = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField1Size);
0272                         $streamOffset += $entryField1Size;
0273                     }
0274 
0275                     if ($entryField2Size == 1) { // Optimyze one-byte field case
0276                         $field2 = ord($xrefStreamData[$streamOffset++]);
0277                     } else {
0278                         $field2 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField2Size);
0279                         $streamOffset += $entryField2Size;
0280                     }
0281 
0282                     if ($entryField3Size == 1) { // Optimyze one-byte field case
0283                         $field3 = ord($xrefStreamData[$streamOffset++]);
0284                     } else {
0285                         $field3 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField3Size);
0286                         $streamOffset += $entryField3Size;
0287                     }
0288 
0289                     switch ($type) {
0290                         case 0:
0291                             // Free object
0292                             $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, false);
0293                             // Debug output:
0294                             // echo "Free object - $objNum $field3 R, next free - $field2\n";
0295                             break;
0296 
0297                         case 1:
0298                             // In use object
0299                             $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, true);
0300                             // Debug output:
0301                             // echo "In-use object - $objNum $field3 R, offset - $field2\n";
0302                             break;
0303 
0304                         case 2:
0305                             // Object in an object stream
0306                             // Debug output:
0307                             // echo "Compressed object - $objNum 0 R, object stream - $field2 0 R, offset - $field3\n";
0308                             break;
0309                     }
0310 
0311                     $objNum++;
0312                 }
0313             }
0314 
0315             // $streamOffset . ' ' . strlen($xrefStreamData) . "\n";
0316             // "$entries\n";
0317             // require_once 'Zend/Pdf/Exception.php';
0318             throw new Zend_Pdf_Exception('Cross-reference streams are not supported yet.');
0319         }
0320 
0321 
0322         // require_once 'Zend/Pdf/Trailer/Keeper.php';
0323         $trailerObj = new Zend_Pdf_Trailer_Keeper($trailerDict, $context);
0324         if ($trailerDict->Prev instanceof Zend_Pdf_Element_Numeric ||
0325             $trailerDict->Prev instanceof Zend_Pdf_Element_Reference ) {
0326             $trailerObj->setPrev($this->_loadXRefTable($trailerDict->Prev->value));
0327             $context->getRefTable()->setParent($trailerObj->getPrev()->getRefTable());
0328         }
0329 
0330         /**
0331          * We set '/Prev' dictionary property to the current cross-reference section offset.
0332          * It doesn't correspond to the actual data, but is true when trailer will be used
0333          * as a trailer for next generated PDF section.
0334          */
0335         $trailerObj->Prev = new Zend_Pdf_Element_Numeric($offset);
0336 
0337         return $trailerObj;
0338     }
0339 
0340 
0341     /**
0342      * Get Trailer object
0343      *
0344      * @return Zend_Pdf_Trailer_Keeper
0345      */
0346     public function getTrailer()
0347     {
0348         return $this->_trailer;
0349     }
0350 
0351     /**
0352      * Object constructor
0353      *
0354      * Note: PHP duplicates string, which is sent by value, only of it's updated.
0355      * Thus we don't need to care about overhead
0356      *
0357      * @param mixed $source
0358      * @param Zend_Pdf_ElementFactory_Interface $factory
0359      * @param boolean $load
0360      * @throws Zend_Exception
0361      */
0362     public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory, $load)
0363     {
0364         if ($load) {
0365             if (($pdfFile = @fopen($source, 'rb')) === false ) {
0366                 // require_once 'Zend/Pdf/Exception.php';
0367                 throw new Zend_Pdf_Exception( "Can not open '$source' file for reading." );
0368             }
0369 
0370             $data = '';
0371             $byteCount = filesize($source);
0372             while ($byteCount > 0 && !feof($pdfFile)) {
0373                 $nextBlock = fread($pdfFile, $byteCount);
0374                 if ($nextBlock === false) {
0375                     // require_once 'Zend/Pdf/Exception.php';
0376                     throw new Zend_Pdf_Exception( "Error occured while '$source' file reading." );
0377                 }
0378 
0379                 $data .= $nextBlock;
0380                 $byteCount -= strlen($nextBlock);
0381             }
0382             if ($byteCount != 0) {
0383                 // require_once 'Zend/Pdf/Exception.php';
0384                 throw new Zend_Pdf_Exception( "Error occured while '$source' file reading." );
0385             }
0386             fclose($pdfFile);
0387 
0388             $this->_stringParser = new Zend_Pdf_StringParser($data, $factory);
0389         } else {
0390             $this->_stringParser = new Zend_Pdf_StringParser($source, $factory);
0391         }
0392 
0393         $pdfVersionComment = $this->_stringParser->readComment();
0394         if (substr($pdfVersionComment, 0, 5) != '%PDF-') {
0395             // require_once 'Zend/Pdf/Exception.php';
0396             throw new Zend_Pdf_Exception('File is not a PDF.');
0397         }
0398 
0399         $pdfVersion = substr($pdfVersionComment, 5);
0400         if (version_compare($pdfVersion, '0.9',  '<')  ||
0401             version_compare($pdfVersion, '1.61', '>=')
0402            ) {
0403             /**
0404              * @todo
0405              * To support PDF versions 1.5 (Acrobat 6) and PDF version 1.7 (Acrobat 7)
0406              * Stream compression filter must be implemented (for compressed object streams).
0407              * Cross reference streams must be implemented
0408              */
0409             // require_once 'Zend/Pdf/Exception.php';
0410             throw new Zend_Pdf_Exception(sprintf('Unsupported PDF version. Zend_Pdf supports PDF 1.0-1.4. Current version - \'%f\'', $pdfVersion));
0411         }
0412         $this->_pdfVersion = $pdfVersion;
0413 
0414         $this->_stringParser->offset = strrpos($this->_stringParser->data, '%%EOF');
0415         if ($this->_stringParser->offset === false ||
0416             strlen($this->_stringParser->data) - $this->_stringParser->offset > 7) {
0417             // require_once 'Zend/Pdf/Exception.php';
0418             throw new Zend_Pdf_Exception('Pdf file syntax error. End-of-fle marker expected at the end of file.');
0419         }
0420 
0421         $this->_stringParser->offset--;
0422         /**
0423          * Go to end of cross-reference table offset
0424          */
0425         while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&&
0426                ($this->_stringParser->offset > 0)) {
0427             $this->_stringParser->offset--;
0428         }
0429         /**
0430          * Go to the start of cross-reference table offset
0431          */
0432         while ( (!Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) ))&&
0433                ($this->_stringParser->offset > 0)) {
0434             $this->_stringParser->offset--;
0435         }
0436         /**
0437          * Go to the end of 'startxref' keyword
0438          */
0439         while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&&
0440                ($this->_stringParser->offset > 0)) {
0441             $this->_stringParser->offset--;
0442         }
0443         /**
0444          * Go to the white space (eol marker) before 'startxref' keyword
0445          */
0446         $this->_stringParser->offset -= 9;
0447 
0448         $nextLexeme = $this->_stringParser->readLexeme();
0449         if ($nextLexeme != 'startxref') {
0450             // require_once 'Zend/Pdf/Exception.php';
0451             throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. \'startxref\' keyword expected. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme)));
0452         }
0453 
0454         $startXref = $this->_stringParser->readLexeme();
0455         if (!ctype_digit($startXref)) {
0456             // require_once 'Zend/Pdf/Exception.php';
0457             throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. Cross-reference table offset must contain only digits. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme)));
0458         }
0459 
0460         $this->_trailer = $this->_loadXRefTable($startXref);
0461         $factory->setObjectCount($this->_trailer->Size->value);
0462     }
0463 
0464 
0465     /**
0466      * Object destructor
0467      */
0468     public function __destruct()
0469     {
0470         $this->_stringParser->cleanUp();
0471     }
0472 }