File indexing completed on 2024-12-22 05:36:57
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Pdf 0017 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0018 * @license http://framework.zend.com/license/new-bsd New BSD License 0019 * @version $Id$ 0020 */ 0021 0022 0023 /** Internally used classes */ 0024 // require_once 'Zend/Pdf/Element/Array.php'; 0025 // require_once 'Zend/Pdf/Element/String/Binary.php'; 0026 // require_once 'Zend/Pdf/Element/Boolean.php'; 0027 // require_once 'Zend/Pdf/Element/Dictionary.php'; 0028 // require_once 'Zend/Pdf/Element/Name.php'; 0029 // require_once 'Zend/Pdf/Element/Null.php'; 0030 // require_once 'Zend/Pdf/Element/Numeric.php'; 0031 // require_once 'Zend/Pdf/Element/Object.php'; 0032 // require_once 'Zend/Pdf/Element/Object/Stream.php'; 0033 // require_once 'Zend/Pdf/Element/Reference.php'; 0034 // require_once 'Zend/Pdf/Element/String.php'; 0035 0036 0037 /** 0038 * PDF string parser 0039 * 0040 * @package Zend_Pdf 0041 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0042 * @license http://framework.zend.com/license/new-bsd New BSD License 0043 */ 0044 class Zend_Pdf_StringParser 0045 { 0046 /** 0047 * Source PDF 0048 * 0049 * @var string 0050 */ 0051 public $data = ''; 0052 0053 /** 0054 * Current position in a data 0055 * 0056 * @var integer 0057 */ 0058 public $offset = 0; 0059 0060 /** 0061 * Current reference context 0062 * 0063 * @var Zend_Pdf_Element_Reference_Context 0064 */ 0065 private $_context = null; 0066 0067 /** 0068 * Array of elements of the currently parsed object/trailer 0069 * 0070 * @var array 0071 */ 0072 private $_elements = array(); 0073 0074 /** 0075 * PDF objects factory. 0076 * 0077 * @var Zend_Pdf_ElementFactory_Interface 0078 */ 0079 private $_objFactory = null; 0080 0081 0082 /** 0083 * Clean up resources. 0084 * 0085 * Clear current state to remove cyclic object references 0086 */ 0087 public function cleanUp() 0088 { 0089 $this->_context = null; 0090 $this->_elements = array(); 0091 $this->_objFactory = null; 0092 } 0093 0094 /** 0095 * Character with code $chCode is white space 0096 * 0097 * @param integer $chCode 0098 * @return boolean 0099 */ 0100 public static function isWhiteSpace($chCode) 0101 { 0102 if ($chCode == 0x00 || // null character 0103 $chCode == 0x09 || // Tab 0104 $chCode == 0x0A || // Line feed 0105 $chCode == 0x0C || // Form Feed 0106 $chCode == 0x0D || // Carriage return 0107 $chCode == 0x20 // Space 0108 ) { 0109 return true; 0110 } else { 0111 return false; 0112 } 0113 } 0114 0115 0116 /** 0117 * Character with code $chCode is a delimiter character 0118 * 0119 * @param integer $chCode 0120 * @return boolean 0121 */ 0122 public static function isDelimiter($chCode ) 0123 { 0124 if ($chCode == 0x28 || // '(' 0125 $chCode == 0x29 || // ')' 0126 $chCode == 0x3C || // '<' 0127 $chCode == 0x3E || // '>' 0128 $chCode == 0x5B || // '[' 0129 $chCode == 0x5D || // ']' 0130 $chCode == 0x7B || // '{' 0131 $chCode == 0x7D || // '}' 0132 $chCode == 0x2F || // '/' 0133 $chCode == 0x25 // '%' 0134 ) { 0135 return true; 0136 } else { 0137 return false; 0138 } 0139 } 0140 0141 0142 /** 0143 * Skip white space 0144 * 0145 * @param boolean $skipComment 0146 */ 0147 public function skipWhiteSpace($skipComment = true) 0148 { 0149 if ($skipComment) { 0150 while (true) { 0151 $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset); 0152 0153 if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') { 0154 // Skip comment 0155 $this->offset += strcspn($this->data, "\r\n", $this->offset); 0156 } else { 0157 // Non white space character not equal to '%' is found 0158 return; 0159 } 0160 } 0161 } else { 0162 $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset); 0163 } 0164 0165 // /** Original (non-optimized) implementation. */ 0166 // 0167 // while ($this->offset < strlen($this->data)) { 0168 // if (strpos("\x00\t\n\f\r ", $this->data[$this->offset]) !== false) { 0169 // $this->offset++; 0170 // } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%' 0171 // $this->skipComment(); 0172 // } else { 0173 // return; 0174 // } 0175 // } 0176 } 0177 0178 0179 /** 0180 * Skip comment 0181 */ 0182 public function skipComment() 0183 { 0184 while ($this->offset < strlen($this->data)) 0185 { 0186 if (ord($this->data[$this->offset]) != 0x0A || // Line feed 0187 ord($this->data[$this->offset]) != 0x0d // Carriage return 0188 ) { 0189 $this->offset++; 0190 } else { 0191 return; 0192 } 0193 } 0194 } 0195 0196 0197 /** 0198 * Read comment line 0199 * 0200 * @return string 0201 */ 0202 public function readComment() 0203 { 0204 $this->skipWhiteSpace(false); 0205 0206 /** Check if it's a comment line */ 0207 if ($this->data[$this->offset] != '%') { 0208 return ''; 0209 } 0210 0211 for ($start = $this->offset; 0212 $this->offset < strlen($this->data); 0213 $this->offset++) { 0214 if (ord($this->data[$this->offset]) == 0x0A || // Line feed 0215 ord($this->data[$this->offset]) == 0x0d // Carriage return 0216 ) { 0217 break; 0218 } 0219 } 0220 0221 return substr($this->data, $start, $this->offset-$start); 0222 } 0223 0224 0225 /** 0226 * Returns next lexeme from a pdf stream 0227 * 0228 * @return string 0229 */ 0230 public function readLexeme() 0231 { 0232 // $this->skipWhiteSpace(); 0233 while (true) { 0234 $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset); 0235 0236 if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') { 0237 $this->offset += strcspn($this->data, "\r\n", $this->offset); 0238 } else { 0239 break; 0240 } 0241 } 0242 0243 if ($this->offset >= strlen($this->data)) { 0244 return ''; 0245 } 0246 0247 if ( /* self::isDelimiter( ord($this->data[$start]) ) */ 0248 strpos('()<>[]{}/%', $this->data[$this->offset]) !== false ) { 0249 0250 switch (substr($this->data, $this->offset, 2)) { 0251 case '<<': 0252 $this->offset += 2; 0253 return '<<'; 0254 break; 0255 0256 case '>>': 0257 $this->offset += 2; 0258 return '>>'; 0259 break; 0260 0261 default: 0262 return $this->data[$this->offset++]; 0263 break; 0264 } 0265 } else { 0266 $start = $this->offset; 0267 $compare = ''; 0268 if( version_compare( phpversion(), '5.2.5' ) >= 0) { 0269 $compare = "()<>[]{}/%\x00\t\n\f\r "; 0270 } else { 0271 $compare = "()<>[]{}/%\x00\t\n\r "; 0272 } 0273 0274 $this->offset += strcspn($this->data, $compare, $this->offset); 0275 0276 return substr($this->data, $start, $this->offset - $start); 0277 } 0278 } 0279 0280 0281 /** 0282 * Read elemental object from a PDF stream 0283 * 0284 * @return Zend_Pdf_Element 0285 * @throws Zend_Pdf_Exception 0286 */ 0287 public function readElement($nextLexeme = null) 0288 { 0289 if ($nextLexeme === null) { 0290 $nextLexeme = $this->readLexeme(); 0291 } 0292 0293 /** 0294 * Note: readElement() method is a public method and could be invoked from other classes. 0295 * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care 0296 * about _elements member management. 0297 */ 0298 switch ($nextLexeme) { 0299 case '(': 0300 return ($this->_elements[] = $this->_readString()); 0301 0302 case '<': 0303 return ($this->_elements[] = $this->_readBinaryString()); 0304 0305 case '/': 0306 return ($this->_elements[] = new Zend_Pdf_Element_Name( 0307 Zend_Pdf_Element_Name::unescape( $this->readLexeme() ) 0308 )); 0309 0310 case '[': 0311 return ($this->_elements[] = $this->_readArray()); 0312 0313 case '<<': 0314 return ($this->_elements[] = $this->_readDictionary()); 0315 0316 case ')': 0317 // fall through to next case 0318 case '>': 0319 // fall through to next case 0320 case ']': 0321 // fall through to next case 0322 case '>>': 0323 // fall through to next case 0324 case '{': 0325 // fall through to next case 0326 case '}': 0327 // require_once 'Zend/Pdf/Exception.php'; 0328 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.', 0329 $this->offset)); 0330 0331 default: 0332 if (strcasecmp($nextLexeme, 'true') == 0) { 0333 return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true)); 0334 } else if (strcasecmp($nextLexeme, 'false') == 0) { 0335 return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false)); 0336 } else if (strcasecmp($nextLexeme, 'null') == 0) { 0337 return ($this->_elements[] = new Zend_Pdf_Element_Null()); 0338 } 0339 0340 $ref = $this->_readReference($nextLexeme); 0341 if ($ref !== null) { 0342 return ($this->_elements[] = $ref); 0343 } 0344 0345 return ($this->_elements[] = $this->_readNumeric($nextLexeme)); 0346 } 0347 } 0348 0349 0350 /** 0351 * Read string PDF object 0352 * Also reads trailing ')' from a pdf stream 0353 * 0354 * @return Zend_Pdf_Element_String 0355 * @throws Zend_Pdf_Exception 0356 */ 0357 private function _readString() 0358 { 0359 $start = $this->offset; 0360 $openedBrackets = 1; 0361 0362 $this->offset += strcspn($this->data, '()\\', $this->offset); 0363 0364 while ($this->offset < strlen($this->data)) { 0365 switch (ord( $this->data[$this->offset] )) { 0366 case 0x28: // '(' - opened bracket in the string, needs balanced pair. 0367 $this->offset++; 0368 $openedBrackets++; 0369 break; 0370 0371 case 0x29: // ')' - pair to the opened bracket 0372 $this->offset++; 0373 $openedBrackets--; 0374 break; 0375 0376 case 0x5C: // '\\' - escape sequence, skip next char from a check 0377 $this->offset += 2; 0378 } 0379 0380 if ($openedBrackets == 0) { 0381 break; // end of string 0382 } 0383 0384 $this->offset += strcspn($this->data, '()\\', $this->offset); 0385 } 0386 if ($openedBrackets != 0) { 0387 // require_once 'Zend/Pdf/Exception.php'; 0388 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start)); 0389 } 0390 0391 return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data, 0392 $start, 0393 $this->offset - $start - 1) )); 0394 } 0395 0396 0397 /** 0398 * Read binary string PDF object 0399 * Also reads trailing '>' from a pdf stream 0400 * 0401 * @return Zend_Pdf_Element_String_Binary 0402 * @throws Zend_Pdf_Exception 0403 */ 0404 private function _readBinaryString() 0405 { 0406 $start = $this->offset; 0407 0408 $this->offset += strspn($this->data, "\x00\t\n\f\r 0123456789abcdefABCDEF", $this->offset); 0409 0410 if ($this->offset >= strlen($this->data) - 1) { 0411 // require_once 'Zend/Pdf/Exception.php'; 0412 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while reading binary string. Offset - 0x%X. \'>\' expected.', $start)); 0413 } 0414 0415 if ($this->data[$this->offset++] != '>') { 0416 // require_once 'Zend/Pdf/Exception.php'; 0417 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset)); 0418 } 0419 0420 return new Zend_Pdf_Element_String_Binary( 0421 Zend_Pdf_Element_String_Binary::unescape( substr($this->data, 0422 $start, 0423 $this->offset - $start - 1) )); 0424 } 0425 0426 0427 /** 0428 * Read array PDF object 0429 * Also reads trailing ']' from a pdf stream 0430 * 0431 * @return Zend_Pdf_Element_Array 0432 * @throws Zend_Pdf_Exception 0433 */ 0434 private function _readArray() 0435 { 0436 $elements = array(); 0437 0438 while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) { 0439 if ($nextLexeme != ']') { 0440 $elements[] = $this->readElement($nextLexeme); 0441 } else { 0442 return new Zend_Pdf_Element_Array($elements); 0443 } 0444 } 0445 0446 // require_once 'Zend/Pdf/Exception.php'; 0447 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset)); 0448 } 0449 0450 0451 /** 0452 * Read dictionary PDF object 0453 * Also reads trailing '>>' from a pdf stream 0454 * 0455 * @return Zend_Pdf_Element_Dictionary 0456 * @throws Zend_Pdf_Exception 0457 */ 0458 private function _readDictionary() 0459 { 0460 $dictionary = new Zend_Pdf_Element_Dictionary(); 0461 0462 while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) { 0463 if ($nextLexeme != '>>') { 0464 $nameStart = $this->offset - strlen($nextLexeme); 0465 0466 $name = $this->readElement($nextLexeme); 0467 $value = $this->readElement(); 0468 0469 if (!$name instanceof Zend_Pdf_Element_Name) { 0470 // require_once 'Zend/Pdf/Exception.php'; 0471 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart)); 0472 } 0473 0474 $dictionary->add($name, $value); 0475 } else { 0476 return $dictionary; 0477 } 0478 } 0479 0480 // require_once 'Zend/Pdf/Exception.php'; 0481 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset)); 0482 } 0483 0484 0485 /** 0486 * Read reference PDF object 0487 * 0488 * @param string $nextLexeme 0489 * @return Zend_Pdf_Element_Reference 0490 */ 0491 private function _readReference($nextLexeme = null) 0492 { 0493 $start = $this->offset; 0494 0495 if ($nextLexeme === null) { 0496 $objNum = $this->readLexeme(); 0497 } else { 0498 $objNum = $nextLexeme; 0499 } 0500 if (!ctype_digit($objNum)) { // it's not a reference 0501 $this->offset = $start; 0502 return null; 0503 } 0504 0505 $genNum = $this->readLexeme(); 0506 if (!ctype_digit($genNum)) { // it's not a reference 0507 $this->offset = $start; 0508 return null; 0509 } 0510 0511 $rMark = $this->readLexeme(); 0512 if ($rMark != 'R') { // it's not a reference 0513 $this->offset = $start; 0514 return null; 0515 } 0516 0517 $ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve()); 0518 0519 return $ref; 0520 } 0521 0522 0523 /** 0524 * Read numeric PDF object 0525 * 0526 * @param string $nextLexeme 0527 * @return Zend_Pdf_Element_Numeric 0528 */ 0529 private function _readNumeric($nextLexeme = null) 0530 { 0531 if ($nextLexeme === null) { 0532 $nextLexeme = $this->readLexeme(); 0533 } 0534 0535 return new Zend_Pdf_Element_Numeric($nextLexeme); 0536 } 0537 0538 0539 /** 0540 * Read inderect object from a PDF stream 0541 * 0542 * @param integer $offset 0543 * @param Zend_Pdf_Element_Reference_Context $context 0544 * @return Zend_Pdf_Element_Object 0545 */ 0546 public function getObject($offset, Zend_Pdf_Element_Reference_Context $context) 0547 { 0548 if ($offset === null ) { 0549 return new Zend_Pdf_Element_Null(); 0550 } 0551 0552 // Save current offset to make getObject() reentrant 0553 $offsetSave = $this->offset; 0554 0555 $this->offset = $offset; 0556 $this->_context = $context; 0557 $this->_elements = array(); 0558 0559 $objNum = $this->readLexeme(); 0560 if (!ctype_digit($objNum)) { 0561 // require_once 'Zend/Pdf/Exception.php'; 0562 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum))); 0563 } 0564 0565 $genNum = $this->readLexeme(); 0566 if (!ctype_digit($genNum)) { 0567 // require_once 'Zend/Pdf/Exception.php'; 0568 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum))); 0569 } 0570 0571 $objKeyword = $this->readLexeme(); 0572 if ($objKeyword != 'obj') { 0573 // require_once 'Zend/Pdf/Exception.php'; 0574 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword))); 0575 } 0576 0577 $objValue = $this->readElement(); 0578 0579 $nextLexeme = $this->readLexeme(); 0580 0581 if( $nextLexeme == 'endobj' ) { 0582 /** 0583 * Object is not generated by factory (thus it's not marked as modified object). 0584 * But factory is assigned to the obect. 0585 */ 0586 $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve()); 0587 0588 foreach ($this->_elements as $element) { 0589 $element->setParentObject($obj); 0590 } 0591 0592 // Restore offset value 0593 $this->offset = $offsetSave; 0594 0595 return $obj; 0596 } 0597 0598 /** 0599 * It's a stream object 0600 */ 0601 if ($nextLexeme != 'stream') { 0602 // require_once 'Zend/Pdf/Exception.php'; 0603 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme))); 0604 } 0605 0606 if (!$objValue instanceof Zend_Pdf_Element_Dictionary) { 0607 // require_once 'Zend/Pdf/Exception.php'; 0608 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme))); 0609 } 0610 0611 /** 0612 * References are automatically dereferenced at this moment. 0613 */ 0614 $streamLength = $objValue->Length->value; 0615 0616 /** 0617 * 'stream' keyword must be followed by either cr-lf sequence or lf character only. 0618 * This restriction gives the possibility to recognize all cases exactly 0619 */ 0620 if ($this->data[$this->offset] == "\r" && 0621 $this->data[$this->offset + 1] == "\n" ) { 0622 $this->offset += 2; 0623 } else if ($this->data[$this->offset] == "\n" ) { 0624 $this->offset++; 0625 } else { 0626 // require_once 'Zend/Pdf/Exception.php'; 0627 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme))); 0628 } 0629 0630 $dataOffset = $this->offset; 0631 0632 $this->offset += $streamLength; 0633 0634 $nextLexeme = $this->readLexeme(); 0635 if ($nextLexeme != 'endstream') { 0636 // require_once 'Zend/Pdf/Exception.php'; 0637 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme))); 0638 } 0639 0640 $nextLexeme = $this->readLexeme(); 0641 if ($nextLexeme != 'endobj') { 0642 // require_once 'Zend/Pdf/Exception.php'; 0643 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme))); 0644 } 0645 0646 $obj = new Zend_Pdf_Element_Object_Stream(substr($this->data, 0647 $dataOffset, 0648 $streamLength), 0649 (int)$objNum, 0650 (int)$genNum, 0651 $this->_objFactory->resolve(), 0652 $objValue); 0653 0654 foreach ($this->_elements as $element) { 0655 $element->setParentObject($obj); 0656 } 0657 0658 // Restore offset value 0659 $this->offset = $offsetSave; 0660 0661 return $obj; 0662 } 0663 0664 0665 /** 0666 * Get length of source string 0667 * 0668 * @return integer 0669 */ 0670 public function getLength() 0671 { 0672 return strlen($this->data); 0673 } 0674 0675 /** 0676 * Get source string 0677 * 0678 * @return string 0679 */ 0680 public function getString() 0681 { 0682 return $this->data; 0683 } 0684 0685 0686 /** 0687 * Parse integer value from a binary stream 0688 * 0689 * @param string $stream 0690 * @param integer $offset 0691 * @param integer $size 0692 * @return integer 0693 */ 0694 public static function parseIntFromStream($stream, $offset, $size) 0695 { 0696 $value = 0; 0697 for ($count = 0; $count < $size; $count++) { 0698 $value *= 256; 0699 $value += ord($stream[$offset + $count]); 0700 } 0701 0702 return $value; 0703 } 0704 0705 0706 0707 /** 0708 * Set current context 0709 * 0710 * @param Zend_Pdf_Element_Reference_Context $context 0711 */ 0712 public function setContext(Zend_Pdf_Element_Reference_Context $context) 0713 { 0714 $this->_context = $context; 0715 } 0716 0717 /** 0718 * Object constructor 0719 * 0720 * Note: PHP duplicates string, which is sent by value, only of it's updated. 0721 * Thus we don't need to care about overhead 0722 * 0723 * @param string $pdfString 0724 * @param Zend_Pdf_ElementFactory_Interface $factory 0725 */ 0726 public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory) 0727 { 0728 $this->data = $source; 0729 $this->_objFactory = $factory; 0730 } 0731 }