File indexing completed on 2024-12-22 05:36:57

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Pdf
0017  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0018  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0019  * @version    $Id$
0020  */
0021 
0022 
0023 /** Internally used classes */
0024 // require_once 'Zend/Pdf/Element/Array.php';
0025 // require_once 'Zend/Pdf/Element/String/Binary.php';
0026 // require_once 'Zend/Pdf/Element/Boolean.php';
0027 // require_once 'Zend/Pdf/Element/Dictionary.php';
0028 // require_once 'Zend/Pdf/Element/Name.php';
0029 // require_once 'Zend/Pdf/Element/Null.php';
0030 // require_once 'Zend/Pdf/Element/Numeric.php';
0031 // require_once 'Zend/Pdf/Element/Object.php';
0032 // require_once 'Zend/Pdf/Element/Object/Stream.php';
0033 // require_once 'Zend/Pdf/Element/Reference.php';
0034 // require_once 'Zend/Pdf/Element/String.php';
0035 
0036 
0037 /**
0038  * PDF string parser
0039  *
0040  * @package    Zend_Pdf
0041  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0042  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0043  */
0044 class Zend_Pdf_StringParser
0045 {
0046     /**
0047      * Source PDF
0048      *
0049      * @var string
0050      */
0051     public $data = '';
0052 
0053     /**
0054      * Current position in a data
0055      *
0056      * @var integer
0057      */
0058     public $offset = 0;
0059 
0060     /**
0061      * Current reference context
0062      *
0063      * @var Zend_Pdf_Element_Reference_Context
0064      */
0065     private $_context = null;
0066 
0067     /**
0068      * Array of elements of the currently parsed object/trailer
0069      *
0070      * @var array
0071      */
0072     private $_elements = array();
0073 
0074     /**
0075      * PDF objects factory.
0076      *
0077      * @var Zend_Pdf_ElementFactory_Interface
0078      */
0079     private $_objFactory = null;
0080 
0081 
0082     /**
0083      * Clean up resources.
0084      *
0085      * Clear current state to remove cyclic object references
0086      */
0087     public function cleanUp()
0088     {
0089         $this->_context = null;
0090         $this->_elements = array();
0091         $this->_objFactory = null;
0092     }
0093 
0094     /**
0095      * Character with code $chCode is white space
0096      *
0097      * @param integer $chCode
0098      * @return boolean
0099      */
0100     public static function isWhiteSpace($chCode)
0101     {
0102         if ($chCode == 0x00 || // null character
0103             $chCode == 0x09 || // Tab
0104             $chCode == 0x0A || // Line feed
0105             $chCode == 0x0C || // Form Feed
0106             $chCode == 0x0D || // Carriage return
0107             $chCode == 0x20    // Space
0108            ) {
0109             return true;
0110         } else {
0111             return false;
0112         }
0113     }
0114 
0115 
0116     /**
0117      * Character with code $chCode is a delimiter character
0118      *
0119      * @param integer $chCode
0120      * @return boolean
0121      */
0122     public static function isDelimiter($chCode )
0123     {
0124         if ($chCode == 0x28 || // '('
0125             $chCode == 0x29 || // ')'
0126             $chCode == 0x3C || // '<'
0127             $chCode == 0x3E || // '>'
0128             $chCode == 0x5B || // '['
0129             $chCode == 0x5D || // ']'
0130             $chCode == 0x7B || // '{'
0131             $chCode == 0x7D || // '}'
0132             $chCode == 0x2F || // '/'
0133             $chCode == 0x25    // '%'
0134            ) {
0135             return true;
0136         } else {
0137             return false;
0138         }
0139     }
0140 
0141 
0142     /**
0143      * Skip white space
0144      *
0145      * @param boolean $skipComment
0146      */
0147     public function skipWhiteSpace($skipComment = true)
0148     {
0149         if ($skipComment) {
0150             while (true) {
0151                 $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
0152 
0153                 if ($this->offset < strlen($this->data)  &&  $this->data[$this->offset] == '%') {
0154                     // Skip comment
0155                     $this->offset += strcspn($this->data, "\r\n", $this->offset);
0156                 } else {
0157                     // Non white space character not equal to '%' is found
0158                     return;
0159                 }
0160             }
0161         } else {
0162             $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
0163         }
0164 
0165 //        /** Original (non-optimized) implementation. */
0166 //
0167 //        while ($this->offset < strlen($this->data)) {
0168 //            if (strpos("\x00\t\n\f\r ", $this->data[$this->offset]) !== false) {
0169 //                $this->offset++;
0170 //            } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%'
0171 //                $this->skipComment();
0172 //            } else {
0173 //                return;
0174 //            }
0175 //        }
0176     }
0177 
0178 
0179     /**
0180      * Skip comment
0181      */
0182     public function skipComment()
0183     {
0184         while ($this->offset < strlen($this->data))
0185         {
0186             if (ord($this->data[$this->offset]) != 0x0A || // Line feed
0187                 ord($this->data[$this->offset]) != 0x0d    // Carriage return
0188                ) {
0189                 $this->offset++;
0190             } else {
0191                 return;
0192             }
0193         }
0194     }
0195 
0196 
0197     /**
0198      * Read comment line
0199      *
0200      * @return string
0201      */
0202     public function readComment()
0203     {
0204         $this->skipWhiteSpace(false);
0205 
0206         /** Check if it's a comment line */
0207         if ($this->data[$this->offset] != '%') {
0208             return '';
0209         }
0210 
0211         for ($start = $this->offset;
0212              $this->offset < strlen($this->data);
0213              $this->offset++) {
0214             if (ord($this->data[$this->offset]) == 0x0A || // Line feed
0215                 ord($this->data[$this->offset]) == 0x0d    // Carriage return
0216                ) {
0217                 break;
0218             }
0219         }
0220 
0221         return substr($this->data, $start, $this->offset-$start);
0222     }
0223 
0224 
0225     /**
0226      * Returns next lexeme from a pdf stream
0227      *
0228      * @return string
0229      */
0230     public function readLexeme()
0231     {
0232         // $this->skipWhiteSpace();
0233         while (true) {
0234             $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
0235 
0236             if ($this->offset < strlen($this->data)  &&  $this->data[$this->offset] == '%') {
0237                 $this->offset += strcspn($this->data, "\r\n", $this->offset);
0238             } else {
0239                 break;
0240             }
0241         }
0242 
0243         if ($this->offset >= strlen($this->data)) {
0244             return '';
0245         }
0246 
0247         if ( /* self::isDelimiter( ord($this->data[$start]) ) */
0248              strpos('()<>[]{}/%', $this->data[$this->offset]) !== false ) {
0249 
0250             switch (substr($this->data, $this->offset, 2)) {
0251                 case '<<':
0252                     $this->offset += 2;
0253                     return '<<';
0254                     break;
0255 
0256                 case '>>':
0257                     $this->offset += 2;
0258                     return '>>';
0259                     break;
0260 
0261                 default:
0262                     return $this->data[$this->offset++];
0263                     break;
0264             }
0265         } else {
0266             $start = $this->offset;
0267             $compare = '';
0268             if( version_compare( phpversion(), '5.2.5' ) >= 0) {
0269                 $compare = "()<>[]{}/%\x00\t\n\f\r ";
0270             } else {
0271                 $compare = "()<>[]{}/%\x00\t\n\r ";
0272             }
0273 
0274             $this->offset += strcspn($this->data, $compare, $this->offset);
0275 
0276             return substr($this->data, $start, $this->offset - $start);
0277         }
0278     }
0279 
0280 
0281     /**
0282      * Read elemental object from a PDF stream
0283      *
0284      * @return Zend_Pdf_Element
0285      * @throws Zend_Pdf_Exception
0286      */
0287     public function readElement($nextLexeme = null)
0288     {
0289         if ($nextLexeme === null) {
0290             $nextLexeme = $this->readLexeme();
0291         }
0292 
0293         /**
0294          * Note: readElement() method is a public method and could be invoked from other classes.
0295          * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care
0296          * about _elements member management.
0297          */
0298         switch ($nextLexeme) {
0299             case '(':
0300                 return ($this->_elements[] = $this->_readString());
0301 
0302             case '<':
0303                 return ($this->_elements[] = $this->_readBinaryString());
0304 
0305             case '/':
0306                 return ($this->_elements[] = new Zend_Pdf_Element_Name(
0307                                                     Zend_Pdf_Element_Name::unescape( $this->readLexeme() )
0308                                                                       ));
0309 
0310             case '[':
0311                 return ($this->_elements[] = $this->_readArray());
0312 
0313             case '<<':
0314                 return ($this->_elements[] = $this->_readDictionary());
0315 
0316             case ')':
0317                 // fall through to next case
0318             case '>':
0319                 // fall through to next case
0320             case ']':
0321                 // fall through to next case
0322             case '>>':
0323                 // fall through to next case
0324             case '{':
0325                 // fall through to next case
0326             case '}':
0327                 // require_once 'Zend/Pdf/Exception.php';
0328                 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.',
0329                                                 $this->offset));
0330 
0331             default:
0332                 if (strcasecmp($nextLexeme, 'true') == 0) {
0333                     return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true));
0334                 } else if (strcasecmp($nextLexeme, 'false') == 0) {
0335                     return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false));
0336                 } else if (strcasecmp($nextLexeme, 'null') == 0) {
0337                     return ($this->_elements[] = new Zend_Pdf_Element_Null());
0338                 }
0339 
0340                 $ref = $this->_readReference($nextLexeme);
0341                 if ($ref !== null) {
0342                     return ($this->_elements[] = $ref);
0343                 }
0344 
0345                 return ($this->_elements[] = $this->_readNumeric($nextLexeme));
0346         }
0347     }
0348 
0349 
0350     /**
0351      * Read string PDF object
0352      * Also reads trailing ')' from a pdf stream
0353      *
0354      * @return Zend_Pdf_Element_String
0355      * @throws Zend_Pdf_Exception
0356      */
0357     private function _readString()
0358     {
0359         $start = $this->offset;
0360         $openedBrackets = 1;
0361 
0362         $this->offset += strcspn($this->data, '()\\', $this->offset);
0363 
0364         while ($this->offset < strlen($this->data)) {
0365             switch (ord( $this->data[$this->offset] )) {
0366                 case 0x28: // '(' - opened bracket in the string, needs balanced pair.
0367                     $this->offset++;
0368                     $openedBrackets++;
0369                     break;
0370 
0371                 case 0x29: // ')' - pair to the opened bracket
0372                     $this->offset++;
0373                     $openedBrackets--;
0374                     break;
0375 
0376                 case 0x5C: // '\\' - escape sequence, skip next char from a check
0377                     $this->offset += 2;
0378             }
0379 
0380             if ($openedBrackets == 0) {
0381                 break; // end of string
0382             }
0383 
0384             $this->offset += strcspn($this->data, '()\\', $this->offset);
0385         }
0386         if ($openedBrackets != 0) {
0387             // require_once 'Zend/Pdf/Exception.php';
0388             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start));
0389         }
0390 
0391         return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data,
0392                                                                                      $start,
0393                                                                                      $this->offset - $start - 1) ));
0394     }
0395 
0396 
0397     /**
0398      * Read binary string PDF object
0399      * Also reads trailing '>' from a pdf stream
0400      *
0401      * @return Zend_Pdf_Element_String_Binary
0402      * @throws Zend_Pdf_Exception
0403      */
0404     private function _readBinaryString()
0405     {
0406         $start = $this->offset;
0407 
0408         $this->offset += strspn($this->data, "\x00\t\n\f\r 0123456789abcdefABCDEF", $this->offset);
0409 
0410         if ($this->offset >= strlen($this->data) - 1) {
0411             // require_once 'Zend/Pdf/Exception.php';
0412             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while reading binary string. Offset - 0x%X. \'>\' expected.', $start));
0413         }
0414 
0415         if ($this->data[$this->offset++] != '>') {
0416             // require_once 'Zend/Pdf/Exception.php';
0417             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset));
0418         }
0419 
0420         return new Zend_Pdf_Element_String_Binary(
0421                        Zend_Pdf_Element_String_Binary::unescape( substr($this->data,
0422                                                                         $start,
0423                                                                         $this->offset - $start - 1) ));
0424     }
0425 
0426 
0427     /**
0428      * Read array PDF object
0429      * Also reads trailing ']' from a pdf stream
0430      *
0431      * @return Zend_Pdf_Element_Array
0432      * @throws Zend_Pdf_Exception
0433      */
0434     private function _readArray()
0435     {
0436         $elements = array();
0437 
0438         while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
0439             if ($nextLexeme != ']') {
0440                 $elements[] = $this->readElement($nextLexeme);
0441             } else {
0442                 return new Zend_Pdf_Element_Array($elements);
0443             }
0444         }
0445 
0446         // require_once 'Zend/Pdf/Exception.php';
0447         throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset));
0448     }
0449 
0450 
0451     /**
0452      * Read dictionary PDF object
0453      * Also reads trailing '>>' from a pdf stream
0454      *
0455      * @return Zend_Pdf_Element_Dictionary
0456      * @throws Zend_Pdf_Exception
0457      */
0458     private function _readDictionary()
0459     {
0460         $dictionary = new Zend_Pdf_Element_Dictionary();
0461 
0462         while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
0463             if ($nextLexeme != '>>') {
0464                 $nameStart = $this->offset - strlen($nextLexeme);
0465 
0466                 $name  = $this->readElement($nextLexeme);
0467                 $value = $this->readElement();
0468 
0469                 if (!$name instanceof Zend_Pdf_Element_Name) {
0470                     // require_once 'Zend/Pdf/Exception.php';
0471                     throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart));
0472                 }
0473 
0474                 $dictionary->add($name, $value);
0475             } else {
0476                 return $dictionary;
0477             }
0478         }
0479 
0480         // require_once 'Zend/Pdf/Exception.php';
0481         throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset));
0482     }
0483 
0484 
0485     /**
0486      * Read reference PDF object
0487      *
0488      * @param string $nextLexeme
0489      * @return Zend_Pdf_Element_Reference
0490      */
0491     private function _readReference($nextLexeme = null)
0492     {
0493         $start = $this->offset;
0494 
0495         if ($nextLexeme === null) {
0496             $objNum = $this->readLexeme();
0497         } else {
0498             $objNum = $nextLexeme;
0499         }
0500         if (!ctype_digit($objNum)) { // it's not a reference
0501             $this->offset = $start;
0502             return null;
0503         }
0504 
0505         $genNum = $this->readLexeme();
0506         if (!ctype_digit($genNum)) { // it's not a reference
0507             $this->offset = $start;
0508             return null;
0509         }
0510 
0511         $rMark  = $this->readLexeme();
0512         if ($rMark != 'R') { // it's not a reference
0513             $this->offset = $start;
0514             return null;
0515         }
0516 
0517         $ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve());
0518 
0519         return $ref;
0520     }
0521 
0522 
0523     /**
0524      * Read numeric PDF object
0525      *
0526      * @param string $nextLexeme
0527      * @return Zend_Pdf_Element_Numeric
0528      */
0529     private function _readNumeric($nextLexeme = null)
0530     {
0531         if ($nextLexeme === null) {
0532             $nextLexeme = $this->readLexeme();
0533         }
0534 
0535         return new Zend_Pdf_Element_Numeric($nextLexeme);
0536     }
0537 
0538 
0539     /**
0540      * Read inderect object from a PDF stream
0541      *
0542      * @param integer $offset
0543      * @param Zend_Pdf_Element_Reference_Context $context
0544      * @return Zend_Pdf_Element_Object
0545      */
0546     public function getObject($offset, Zend_Pdf_Element_Reference_Context $context)
0547     {
0548         if ($offset === null ) {
0549             return new Zend_Pdf_Element_Null();
0550         }
0551 
0552         // Save current offset to make getObject() reentrant
0553         $offsetSave = $this->offset;
0554 
0555         $this->offset    = $offset;
0556         $this->_context  = $context;
0557         $this->_elements = array();
0558 
0559         $objNum = $this->readLexeme();
0560         if (!ctype_digit($objNum)) {
0561             // require_once 'Zend/Pdf/Exception.php';
0562             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum)));
0563         }
0564 
0565         $genNum = $this->readLexeme();
0566         if (!ctype_digit($genNum)) {
0567             // require_once 'Zend/Pdf/Exception.php';
0568             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum)));
0569         }
0570 
0571         $objKeyword = $this->readLexeme();
0572         if ($objKeyword != 'obj') {
0573             // require_once 'Zend/Pdf/Exception.php';
0574             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword)));
0575         }
0576 
0577         $objValue = $this->readElement();
0578 
0579         $nextLexeme = $this->readLexeme();
0580 
0581         if( $nextLexeme == 'endobj' ) {
0582             /**
0583              * Object is not generated by factory (thus it's not marked as modified object).
0584              * But factory is assigned to the obect.
0585              */
0586             $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve());
0587 
0588             foreach ($this->_elements as $element) {
0589                 $element->setParentObject($obj);
0590             }
0591 
0592             // Restore offset value
0593             $this->offset = $offsetSave;
0594 
0595             return $obj;
0596         }
0597 
0598         /**
0599          * It's a stream object
0600          */
0601         if ($nextLexeme != 'stream') {
0602             // require_once 'Zend/Pdf/Exception.php';
0603             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme)));
0604         }
0605 
0606         if (!$objValue instanceof Zend_Pdf_Element_Dictionary) {
0607             // require_once 'Zend/Pdf/Exception.php';
0608             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme)));
0609         }
0610 
0611         /**
0612          * References are automatically dereferenced at this moment.
0613          */
0614         $streamLength = $objValue->Length->value;
0615 
0616         /**
0617          * 'stream' keyword must be followed by either cr-lf sequence or lf character only.
0618          * This restriction gives the possibility to recognize all cases exactly
0619          */
0620         if ($this->data[$this->offset] == "\r" &&
0621             $this->data[$this->offset + 1] == "\n"    ) {
0622             $this->offset += 2;
0623         } else if ($this->data[$this->offset] == "\n"    ) {
0624             $this->offset++;
0625         } else {
0626             // require_once 'Zend/Pdf/Exception.php';
0627             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme)));
0628         }
0629 
0630         $dataOffset = $this->offset;
0631 
0632         $this->offset += $streamLength;
0633 
0634         $nextLexeme = $this->readLexeme();
0635         if ($nextLexeme != 'endstream') {
0636             // require_once 'Zend/Pdf/Exception.php';
0637             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme)));
0638         }
0639 
0640         $nextLexeme = $this->readLexeme();
0641         if ($nextLexeme != 'endobj') {
0642             // require_once 'Zend/Pdf/Exception.php';
0643             throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme)));
0644         }
0645 
0646         $obj = new Zend_Pdf_Element_Object_Stream(substr($this->data,
0647                                                          $dataOffset,
0648                                                          $streamLength),
0649                                                   (int)$objNum,
0650                                                   (int)$genNum,
0651                                                   $this->_objFactory->resolve(),
0652                                                   $objValue);
0653 
0654         foreach ($this->_elements as $element) {
0655             $element->setParentObject($obj);
0656         }
0657 
0658         // Restore offset value
0659         $this->offset = $offsetSave;
0660 
0661         return $obj;
0662     }
0663 
0664 
0665     /**
0666      * Get length of source string
0667      *
0668      * @return integer
0669      */
0670     public function getLength()
0671     {
0672         return strlen($this->data);
0673     }
0674 
0675     /**
0676      * Get source string
0677      *
0678      * @return string
0679      */
0680     public function getString()
0681     {
0682         return $this->data;
0683     }
0684 
0685 
0686     /**
0687      * Parse integer value from a binary stream
0688      *
0689      * @param string $stream
0690      * @param integer $offset
0691      * @param integer $size
0692      * @return integer
0693      */
0694     public static function parseIntFromStream($stream, $offset, $size)
0695     {
0696         $value = 0;
0697         for ($count = 0; $count < $size; $count++) {
0698             $value *= 256;
0699             $value += ord($stream[$offset + $count]);
0700         }
0701 
0702         return $value;
0703     }
0704 
0705 
0706 
0707     /**
0708      * Set current context
0709      *
0710      * @param Zend_Pdf_Element_Reference_Context $context
0711      */
0712     public function setContext(Zend_Pdf_Element_Reference_Context $context)
0713     {
0714         $this->_context = $context;
0715     }
0716 
0717     /**
0718      * Object constructor
0719      *
0720      * Note: PHP duplicates string, which is sent by value, only of it's updated.
0721      * Thus we don't need to care about overhead
0722      *
0723      * @param string $pdfString
0724      * @param Zend_Pdf_ElementFactory_Interface $factory
0725      */
0726     public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory)
0727     {
0728         $this->data         = $source;
0729         $this->_objFactory  = $factory;
0730     }
0731 }