File indexing completed on 2025-01-19 05:21:23
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Pdf 0017 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0018 * @license http://framework.zend.com/license/new-bsd New BSD License 0019 * @version $Id$ 0020 */ 0021 0022 /** Internally used classes */ 0023 // require_once 'Zend/Pdf/Element.php'; 0024 // require_once 'Zend/Pdf/Element/Numeric.php'; 0025 0026 0027 /** Zend_Pdf_StringParser */ 0028 // require_once 'Zend/Pdf/StringParser.php'; 0029 0030 0031 /** 0032 * PDF file parser 0033 * 0034 * @package Zend_Pdf 0035 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0036 * @license http://framework.zend.com/license/new-bsd New BSD License 0037 */ 0038 class Zend_Pdf_Parser 0039 { 0040 /** 0041 * String parser 0042 * 0043 * @var Zend_Pdf_StringParser 0044 */ 0045 private $_stringParser; 0046 0047 /** 0048 * Last PDF file trailer 0049 * 0050 * @var Zend_Pdf_Trailer_Keeper 0051 */ 0052 private $_trailer; 0053 0054 /** 0055 * PDF version specified in the file header 0056 * 0057 * @var string 0058 */ 0059 private $_pdfVersion; 0060 0061 0062 /** 0063 * Get length of source PDF 0064 * 0065 * @return integer 0066 */ 0067 public function getPDFLength() 0068 { 0069 return strlen($this->_stringParser->data); 0070 } 0071 0072 /** 0073 * Get PDF String 0074 * 0075 * @return string 0076 */ 0077 public function getPDFString() 0078 { 0079 return $this->_stringParser->data; 0080 } 0081 0082 /** 0083 * PDF version specified in the file header 0084 * 0085 * @return string 0086 */ 0087 public function getPDFVersion() 0088 { 0089 return $this->_pdfVersion; 0090 } 0091 0092 /** 0093 * Load XReference table and referenced objects 0094 * 0095 * @param integer $offset 0096 * @throws Zend_Pdf_Exception 0097 * @return Zend_Pdf_Trailer_Keeper 0098 */ 0099 private function _loadXRefTable($offset) 0100 { 0101 $this->_stringParser->offset = $offset; 0102 0103 // require_once 'Zend/Pdf/Element/Reference/Table.php'; 0104 $refTable = new Zend_Pdf_Element_Reference_Table(); 0105 // require_once 'Zend/Pdf/Element/Reference/Context.php'; 0106 $context = new Zend_Pdf_Element_Reference_Context($this->_stringParser, $refTable); 0107 $this->_stringParser->setContext($context); 0108 0109 $nextLexeme = $this->_stringParser->readLexeme(); 0110 if ($nextLexeme == 'xref') { 0111 /** 0112 * Common cross-reference table 0113 */ 0114 $this->_stringParser->skipWhiteSpace(); 0115 while ( ($nextLexeme = $this->_stringParser->readLexeme()) != 'trailer' ) { 0116 if (!ctype_digit($nextLexeme)) { 0117 // require_once 'Zend/Pdf/Exception.php'; 0118 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($nextLexeme))); 0119 } 0120 $objNum = (int)$nextLexeme; 0121 0122 $refCount = $this->_stringParser->readLexeme(); 0123 if (!ctype_digit($refCount)) { 0124 // require_once 'Zend/Pdf/Exception.php'; 0125 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($refCount))); 0126 } 0127 0128 $this->_stringParser->skipWhiteSpace(); 0129 while ($refCount > 0) { 0130 $objectOffset = substr($this->_stringParser->data, $this->_stringParser->offset, 10); 0131 if (!ctype_digit($objectOffset)) { 0132 // require_once 'Zend/Pdf/Exception.php'; 0133 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset)); 0134 } 0135 // Force $objectOffset to be treated as decimal instead of octal number 0136 for ($numStart = 0; $numStart < strlen($objectOffset)-1; $numStart++) { 0137 if ($objectOffset[$numStart] != '0') { 0138 break; 0139 } 0140 } 0141 $objectOffset = substr($objectOffset, $numStart); 0142 $this->_stringParser->offset += 10; 0143 0144 if (strpos("\x00\t\n\f\r ", $this->_stringParser->data[$this->_stringParser->offset]) === false) { 0145 // require_once 'Zend/Pdf/Exception.php'; 0146 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset)); 0147 } 0148 $this->_stringParser->offset++; 0149 0150 $genNumber = substr($this->_stringParser->data, $this->_stringParser->offset, 5); 0151 if (!ctype_digit($objectOffset)) { 0152 // require_once 'Zend/Pdf/Exception.php'; 0153 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset)); 0154 } 0155 // Force $objectOffset to be treated as decimal instead of octal number 0156 for ($numStart = 0; $numStart < strlen($genNumber)-1; $numStart++) { 0157 if ($genNumber[$numStart] != '0') { 0158 break; 0159 } 0160 } 0161 $genNumber = substr($genNumber, $numStart); 0162 $this->_stringParser->offset += 5; 0163 0164 if (strpos("\x00\t\n\f\r ", $this->_stringParser->data[$this->_stringParser->offset]) === false) { 0165 // require_once 'Zend/Pdf/Exception.php'; 0166 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset)); 0167 } 0168 $this->_stringParser->offset++; 0169 0170 $inUseKey = $this->_stringParser->data[$this->_stringParser->offset]; 0171 $this->_stringParser->offset++; 0172 0173 switch ($inUseKey) { 0174 case 'f': 0175 // free entry 0176 unset( $this->_refTable[$objNum . ' ' . $genNumber . ' R'] ); 0177 $refTable->addReference($objNum . ' ' . $genNumber . ' R', 0178 $objectOffset, 0179 false); 0180 break; 0181 0182 case 'n': 0183 // in-use entry 0184 0185 $refTable->addReference($objNum . ' ' . $genNumber . ' R', 0186 $objectOffset, 0187 true); 0188 } 0189 0190 if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) { 0191 // require_once 'Zend/Pdf/Exception.php'; 0192 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset)); 0193 } 0194 $this->_stringParser->offset++; 0195 if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) { 0196 // require_once 'Zend/Pdf/Exception.php'; 0197 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset)); 0198 } 0199 $this->_stringParser->offset++; 0200 0201 $refCount--; 0202 $objNum++; 0203 } 0204 } 0205 0206 $trailerDictOffset = $this->_stringParser->offset; 0207 $trailerDict = $this->_stringParser->readElement(); 0208 if (!$trailerDict instanceof Zend_Pdf_Element_Dictionary) { 0209 // require_once 'Zend/Pdf/Exception.php'; 0210 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Dictionary expected after \'trailer\' keyword.', $trailerDictOffset)); 0211 } 0212 } else { 0213 $xrefStream = $this->_stringParser->getObject($offset, $context); 0214 0215 if (!$xrefStream instanceof Zend_Pdf_Element_Object_Stream) { 0216 // require_once 'Zend/Pdf/Exception.php'; 0217 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference stream expected.', $offset)); 0218 } 0219 0220 $trailerDict = $xrefStream->dictionary; 0221 if ($trailerDict->Type->value != 'XRef') { 0222 // require_once 'Zend/Pdf/Exception.php'; 0223 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference stream object must have /Type property assigned to /XRef.', $offset)); 0224 } 0225 if ($trailerDict->W === null || $trailerDict->W->getType() != Zend_Pdf_Element::TYPE_ARRAY) { 0226 // require_once 'Zend/Pdf/Exception.php'; 0227 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary doesn\'t have W entry or it\'s not an array.', $offset)); 0228 } 0229 0230 $entryField1Size = $trailerDict->W->items[0]->value; 0231 $entryField2Size = $trailerDict->W->items[1]->value; 0232 $entryField3Size = $trailerDict->W->items[2]->value; 0233 0234 if ($entryField2Size == 0 || $entryField3Size == 0) { 0235 // require_once 'Zend/Pdf/Exception.php'; 0236 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Wrong W dictionary entry. Only type field of stream entries has default value and could be zero length.', $offset)); 0237 } 0238 0239 $xrefStreamData = $xrefStream->value; 0240 0241 if ($trailerDict->Index !== null) { 0242 if ($trailerDict->Index->getType() != Zend_Pdf_Element::TYPE_ARRAY) { 0243 // require_once 'Zend/Pdf/Exception.php'; 0244 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary Index entry must be an array.', $offset)); 0245 } 0246 $sections = count($trailerDict->Index->items)/2; 0247 } else { 0248 $sections = 1; 0249 } 0250 0251 $streamOffset = 0; 0252 0253 $size = $entryField1Size + $entryField2Size + $entryField3Size; 0254 $entries = strlen($xrefStreamData)/$size; 0255 0256 for ($count = 0; $count < $sections; $count++) { 0257 if ($trailerDict->Index !== null) { 0258 $objNum = $trailerDict->Index->items[$count*2 ]->value; 0259 $entries = $trailerDict->Index->items[$count*2 + 1]->value; 0260 } else { 0261 $objNum = 0; 0262 $entries = $trailerDict->Size->value; 0263 } 0264 0265 for ($count2 = 0; $count2 < $entries; $count2++) { 0266 if ($entryField1Size == 0) { 0267 $type = 1; 0268 } else if ($entryField1Size == 1) { // Optimyze one-byte field case 0269 $type = ord($xrefStreamData[$streamOffset++]); 0270 } else { 0271 $type = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField1Size); 0272 $streamOffset += $entryField1Size; 0273 } 0274 0275 if ($entryField2Size == 1) { // Optimyze one-byte field case 0276 $field2 = ord($xrefStreamData[$streamOffset++]); 0277 } else { 0278 $field2 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField2Size); 0279 $streamOffset += $entryField2Size; 0280 } 0281 0282 if ($entryField3Size == 1) { // Optimyze one-byte field case 0283 $field3 = ord($xrefStreamData[$streamOffset++]); 0284 } else { 0285 $field3 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField3Size); 0286 $streamOffset += $entryField3Size; 0287 } 0288 0289 switch ($type) { 0290 case 0: 0291 // Free object 0292 $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, false); 0293 // Debug output: 0294 // echo "Free object - $objNum $field3 R, next free - $field2\n"; 0295 break; 0296 0297 case 1: 0298 // In use object 0299 $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, true); 0300 // Debug output: 0301 // echo "In-use object - $objNum $field3 R, offset - $field2\n"; 0302 break; 0303 0304 case 2: 0305 // Object in an object stream 0306 // Debug output: 0307 // echo "Compressed object - $objNum 0 R, object stream - $field2 0 R, offset - $field3\n"; 0308 break; 0309 } 0310 0311 $objNum++; 0312 } 0313 } 0314 0315 // $streamOffset . ' ' . strlen($xrefStreamData) . "\n"; 0316 // "$entries\n"; 0317 // require_once 'Zend/Pdf/Exception.php'; 0318 throw new Zend_Pdf_Exception('Cross-reference streams are not supported yet.'); 0319 } 0320 0321 0322 // require_once 'Zend/Pdf/Trailer/Keeper.php'; 0323 $trailerObj = new Zend_Pdf_Trailer_Keeper($trailerDict, $context); 0324 if ($trailerDict->Prev instanceof Zend_Pdf_Element_Numeric || 0325 $trailerDict->Prev instanceof Zend_Pdf_Element_Reference ) { 0326 $trailerObj->setPrev($this->_loadXRefTable($trailerDict->Prev->value)); 0327 $context->getRefTable()->setParent($trailerObj->getPrev()->getRefTable()); 0328 } 0329 0330 /** 0331 * We set '/Prev' dictionary property to the current cross-reference section offset. 0332 * It doesn't correspond to the actual data, but is true when trailer will be used 0333 * as a trailer for next generated PDF section. 0334 */ 0335 $trailerObj->Prev = new Zend_Pdf_Element_Numeric($offset); 0336 0337 return $trailerObj; 0338 } 0339 0340 0341 /** 0342 * Get Trailer object 0343 * 0344 * @return Zend_Pdf_Trailer_Keeper 0345 */ 0346 public function getTrailer() 0347 { 0348 return $this->_trailer; 0349 } 0350 0351 /** 0352 * Object constructor 0353 * 0354 * Note: PHP duplicates string, which is sent by value, only of it's updated. 0355 * Thus we don't need to care about overhead 0356 * 0357 * @param mixed $source 0358 * @param Zend_Pdf_ElementFactory_Interface $factory 0359 * @param boolean $load 0360 * @throws Zend_Exception 0361 */ 0362 public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory, $load) 0363 { 0364 if ($load) { 0365 if (($pdfFile = @fopen($source, 'rb')) === false ) { 0366 // require_once 'Zend/Pdf/Exception.php'; 0367 throw new Zend_Pdf_Exception( "Can not open '$source' file for reading." ); 0368 } 0369 0370 $data = ''; 0371 $byteCount = filesize($source); 0372 while ($byteCount > 0 && !feof($pdfFile)) { 0373 $nextBlock = fread($pdfFile, $byteCount); 0374 if ($nextBlock === false) { 0375 // require_once 'Zend/Pdf/Exception.php'; 0376 throw new Zend_Pdf_Exception( "Error occured while '$source' file reading." ); 0377 } 0378 0379 $data .= $nextBlock; 0380 $byteCount -= strlen($nextBlock); 0381 } 0382 if ($byteCount != 0) { 0383 // require_once 'Zend/Pdf/Exception.php'; 0384 throw new Zend_Pdf_Exception( "Error occured while '$source' file reading." ); 0385 } 0386 fclose($pdfFile); 0387 0388 $this->_stringParser = new Zend_Pdf_StringParser($data, $factory); 0389 } else { 0390 $this->_stringParser = new Zend_Pdf_StringParser($source, $factory); 0391 } 0392 0393 $pdfVersionComment = $this->_stringParser->readComment(); 0394 if (substr($pdfVersionComment, 0, 5) != '%PDF-') { 0395 // require_once 'Zend/Pdf/Exception.php'; 0396 throw new Zend_Pdf_Exception('File is not a PDF.'); 0397 } 0398 0399 $pdfVersion = substr($pdfVersionComment, 5); 0400 if (version_compare($pdfVersion, '0.9', '<') || 0401 version_compare($pdfVersion, '1.61', '>=') 0402 ) { 0403 /** 0404 * @todo 0405 * To support PDF versions 1.5 (Acrobat 6) and PDF version 1.7 (Acrobat 7) 0406 * Stream compression filter must be implemented (for compressed object streams). 0407 * Cross reference streams must be implemented 0408 */ 0409 // require_once 'Zend/Pdf/Exception.php'; 0410 throw new Zend_Pdf_Exception(sprintf('Unsupported PDF version. Zend_Pdf supports PDF 1.0-1.4. Current version - \'%f\'', $pdfVersion)); 0411 } 0412 $this->_pdfVersion = $pdfVersion; 0413 0414 $this->_stringParser->offset = strrpos($this->_stringParser->data, '%%EOF'); 0415 if ($this->_stringParser->offset === false || 0416 strlen($this->_stringParser->data) - $this->_stringParser->offset > 7) { 0417 // require_once 'Zend/Pdf/Exception.php'; 0418 throw new Zend_Pdf_Exception('Pdf file syntax error. End-of-fle marker expected at the end of file.'); 0419 } 0420 0421 $this->_stringParser->offset--; 0422 /** 0423 * Go to end of cross-reference table offset 0424 */ 0425 while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&& 0426 ($this->_stringParser->offset > 0)) { 0427 $this->_stringParser->offset--; 0428 } 0429 /** 0430 * Go to the start of cross-reference table offset 0431 */ 0432 while ( (!Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) ))&& 0433 ($this->_stringParser->offset > 0)) { 0434 $this->_stringParser->offset--; 0435 } 0436 /** 0437 * Go to the end of 'startxref' keyword 0438 */ 0439 while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&& 0440 ($this->_stringParser->offset > 0)) { 0441 $this->_stringParser->offset--; 0442 } 0443 /** 0444 * Go to the white space (eol marker) before 'startxref' keyword 0445 */ 0446 $this->_stringParser->offset -= 9; 0447 0448 $nextLexeme = $this->_stringParser->readLexeme(); 0449 if ($nextLexeme != 'startxref') { 0450 // require_once 'Zend/Pdf/Exception.php'; 0451 throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. \'startxref\' keyword expected. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme))); 0452 } 0453 0454 $startXref = $this->_stringParser->readLexeme(); 0455 if (!ctype_digit($startXref)) { 0456 // require_once 'Zend/Pdf/Exception.php'; 0457 throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. Cross-reference table offset must contain only digits. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme))); 0458 } 0459 0460 $this->_trailer = $this->_loadXRefTable($startXref); 0461 $factory->setObjectCount($this->_trailer->Size->value); 0462 } 0463 0464 0465 /** 0466 * Object destructor 0467 */ 0468 public function __destruct() 0469 { 0470 $this->_stringParser->cleanUp(); 0471 } 0472 }