File indexing completed on 2024-12-22 05:36:54

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Pdf
0017  * @subpackage Fonts
0018  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0019  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0020  * @version    $Id$
0021  */
0022 
0023 /** Zend_Pdf_Cmap */
0024 // require_once 'Zend/Pdf/Cmap.php';
0025 
0026 
0027 /**
0028  * Implements the "segment mapping to delta values" character map (type 4).
0029  *
0030  * This is the Microsoft standard mapping table type for OpenType fonts. It
0031  * provides the ability to cover multiple contiguous ranges of the Unicode
0032  * character set, with the exception of Unicode Surrogates (U+D800 - U+DFFF).
0033  *
0034  * @package    Zend_Pdf
0035  * @subpackage Fonts
0036  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0037  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0038  */
0039 class Zend_Pdf_Cmap_SegmentToDelta extends Zend_Pdf_Cmap
0040 {
0041   /**** Instance Variables ****/
0042 
0043 
0044     /**
0045      * The number of segments in the table.
0046      * @var integer
0047      */
0048     protected $_segmentCount = 0;
0049 
0050     /**
0051      * The size of the binary search range for segments.
0052      * @var integer
0053      */
0054     protected $_searchRange = 0;
0055 
0056     /**
0057      * The number of binary search steps required to cover the entire search
0058      * range.
0059      * @var integer
0060      */
0061     protected $_searchIterations = 0;
0062 
0063     /**
0064      * Array of ending character codes for each segment.
0065      * @var array
0066      */
0067     protected $_segmentTableEndCodes = array();
0068 
0069     /**
0070      * The ending character code for the segment at the end of the low search
0071      * range.
0072      * @var integer
0073      */
0074     protected $_searchRangeEndCode = 0;
0075 
0076     /**
0077      * Array of starting character codes for each segment.
0078      * @var array
0079      */
0080     protected $_segmentTableStartCodes = array();
0081 
0082     /**
0083      * Array of character code to glyph delta values for each segment.
0084      * @var array
0085      */
0086     protected $_segmentTableIdDeltas = array();
0087 
0088     /**
0089      * Array of offsets into the glyph index array for each segment.
0090      * @var array
0091      */
0092     protected $_segmentTableIdRangeOffsets = array();
0093 
0094     /**
0095      * Glyph index array. Stores glyph numbers, used with range offset.
0096      * @var array
0097      */
0098     protected $_glyphIndexArray = array();
0099 
0100 
0101 
0102   /**** Public Interface ****/
0103 
0104 
0105   /* Concrete Class Implementation */
0106 
0107     /**
0108      * Returns an array of glyph numbers corresponding to the Unicode characters.
0109      *
0110      * If a particular character doesn't exist in this font, the special 'missing
0111      * character glyph' will be substituted.
0112      *
0113      * See also {@link glyphNumberForCharacter()}.
0114      *
0115      * @param array $characterCodes Array of Unicode character codes (code points).
0116      * @return array Array of glyph numbers.
0117      */
0118     public function glyphNumbersForCharacters($characterCodes)
0119     {
0120         $glyphNumbers = array();
0121         foreach ($characterCodes as $key => $characterCode) {
0122 
0123             /* These tables only cover the 16-bit character range.
0124              */
0125             if ($characterCode > 0xffff) {
0126                 $glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
0127                 continue;
0128             }
0129 
0130             /* Determine where to start the binary search. The segments are
0131              * ordered from lowest-to-highest. We are looking for the first
0132              * segment whose end code is greater than or equal to our character
0133              * code.
0134              *
0135              * If the end code at the top of the search range is larger, then
0136              * our target is probably below it.
0137              *
0138              * If it is smaller, our target is probably above it, so move the
0139              * search range to the end of the segment list.
0140              */
0141             if ($this->_searchRangeEndCode >= $characterCode) {
0142                 $searchIndex = $this->_searchRange;
0143             } else {
0144                 $searchIndex = $this->_segmentCount;
0145             }
0146 
0147             /* Now do a binary search to find the first segment whose end code
0148              * is greater or equal to our character code. No matter the number
0149              * of segments (there may be hundreds in a large font), we will only
0150              * need to perform $this->_searchIterations.
0151              */
0152             for ($i = 1; $i <= $this->_searchIterations; $i++) {
0153                 if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) {
0154                     $subtableIndex = $searchIndex;
0155                     $searchIndex -= $this->_searchRange >> $i;
0156                 } else {
0157                     $searchIndex += $this->_searchRange >> $i;
0158                 }
0159             }
0160 
0161             /* If the segment's start code is greater than our character code,
0162              * that character is not represented in this font. Move on.
0163              */
0164             if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) {
0165                 $glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
0166                 continue;
0167             }
0168 
0169             if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) {
0170                 /* This segment uses a simple mapping from character code to
0171                  * glyph number.
0172                  */
0173                 $glyphNumbers[$key] = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536;
0174 
0175             } else {
0176                 /* This segment relies on the glyph index array to determine the
0177                  * glyph number. The calculation below determines the correct
0178                  * index into that array. It's a little odd because the range
0179                  * offset in the font file is designed to quickly provide an
0180                  * address of the index in the raw binary data instead of the
0181                  * index itself. Since we've parsed the data into arrays, we
0182                  * must process it a bit differently.
0183                  */
0184                 $glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] +
0185                                $this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount +
0186                                $subtableIndex - 1);
0187                 $glyphNumbers[$key] = $this->_glyphIndexArray[$glyphIndex];
0188 
0189             }
0190 
0191         }
0192         return $glyphNumbers;
0193     }
0194 
0195     /**
0196      * Returns the glyph number corresponding to the Unicode character.
0197      *
0198      * If a particular character doesn't exist in this font, the special 'missing
0199      * character glyph' will be substituted.
0200      *
0201      * See also {@link glyphNumbersForCharacters()} which is optimized for bulk
0202      * operations.
0203      *
0204      * @param integer $characterCode Unicode character code (code point).
0205      * @return integer Glyph number.
0206      */
0207     public function glyphNumberForCharacter($characterCode)
0208     {
0209         /* This code is pretty much a copy of glyphNumbersForCharacters().
0210          * See that method for inline documentation.
0211          */
0212 
0213         if ($characterCode > 0xffff) {
0214             return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
0215         }
0216 
0217         if ($this->_searchRangeEndCode >= $characterCode) {
0218             $searchIndex = $this->_searchRange;
0219         } else {
0220             $searchIndex = $this->_segmentCount;
0221         }
0222 
0223         for ($i = 1; $i <= $this->_searchIterations; $i++) {
0224             if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) {
0225                 $subtableIndex = $searchIndex;
0226                 $searchIndex -= $this->_searchRange >> $i;
0227             } else {
0228                 $searchIndex += $this->_searchRange >> $i;
0229             }
0230         }
0231 
0232         if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) {
0233             return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
0234         }
0235 
0236         if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) {
0237             $glyphNumber = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536;
0238         } else {
0239             $glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] +
0240                            $this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount +
0241                            $subtableIndex - 1);
0242             $glyphNumber = $this->_glyphIndexArray[$glyphIndex];
0243         }
0244         return $glyphNumber;
0245     }
0246 
0247     /**
0248      * Returns an array containing the Unicode characters that have entries in
0249      * this character map.
0250      *
0251      * @return array Unicode character codes.
0252      */
0253     public function getCoveredCharacters()
0254     {
0255         $characterCodes = array();
0256         for ($i = 1; $i <= $this->_segmentCount; $i++) {
0257             for ($code = $this->_segmentTableStartCodes[$i]; $code <= $this->_segmentTableEndCodes[$i]; $code++) {
0258                 $characterCodes[] = $code;
0259             }
0260         }
0261         return $characterCodes;
0262     }
0263 
0264 
0265     /**
0266      * Returns an array containing the glyphs numbers that have entries in this character map.
0267      * Keys are Unicode character codes (integers)
0268      *
0269      * This functionality is partially covered by glyphNumbersForCharacters(getCoveredCharacters())
0270      * call, but this method do it in more effective way (prepare complete list instead of searching
0271      * glyph for each character code).
0272      *
0273      * @internal
0274      * @return array Array representing <Unicode character code> => <glyph number> pairs.
0275      */
0276     public function getCoveredCharactersGlyphs()
0277     {
0278         $glyphNumbers = array();
0279 
0280         for ($segmentNum = 1; $segmentNum <= $this->_segmentCount; $segmentNum++) {
0281             if ($this->_segmentTableIdRangeOffsets[$segmentNum] == 0) {
0282                 $delta = $this->_segmentTableIdDeltas[$segmentNum];
0283 
0284                 for ($code =  $this->_segmentTableStartCodes[$segmentNum];
0285                      $code <= $this->_segmentTableEndCodes[$segmentNum];
0286                      $code++) {
0287                     $glyphNumbers[$code] = ($code + $delta) % 65536;
0288                 }
0289             } else {
0290                 $code       = $this->_segmentTableStartCodes[$segmentNum];
0291                 $glyphIndex = $this->_segmentTableIdRangeOffsets[$segmentNum] - ($this->_segmentCount - $segmentNum) - 1;
0292 
0293                 while ($code <= $this->_segmentTableEndCodes[$segmentNum]) {
0294                     $glyphNumbers[$code] = $this->_glyphIndexArray[$glyphIndex];
0295 
0296                     $code++;
0297                     $glyphIndex++;
0298                 }
0299             }
0300         }
0301 
0302         return $glyphNumbers;
0303     }
0304 
0305 
0306 
0307   /* Object Lifecycle */
0308 
0309     /**
0310      * Object constructor
0311      *
0312      * Parses the raw binary table data. Throws an exception if the table is
0313      * malformed.
0314      *
0315      * @param string $cmapData Raw binary cmap table data.
0316      * @throws Zend_Pdf_Exception
0317      */
0318     public function __construct($cmapData)
0319     {
0320         /* Sanity check: The table should be at least 23 bytes in size.
0321          */
0322         $actualLength = strlen($cmapData);
0323         if ($actualLength < 23) {
0324             // require_once 'Zend/Pdf/Exception.php';
0325             throw new Zend_Pdf_Exception('Insufficient table data',
0326                                          Zend_Pdf_Exception::CMAP_TABLE_DATA_TOO_SMALL);
0327         }
0328 
0329         /* Sanity check: Make sure this is right data for this table type.
0330          */
0331         $type = $this->_extractUInt2($cmapData, 0);
0332         if ($type != Zend_Pdf_Cmap::TYPE_SEGMENT_TO_DELTA) {
0333             // require_once 'Zend/Pdf/Exception.php';
0334             throw new Zend_Pdf_Exception('Wrong cmap table type',
0335                                          Zend_Pdf_Exception::CMAP_WRONG_TABLE_TYPE);
0336         }
0337 
0338         $length = $this->_extractUInt2($cmapData, 2);
0339         if ($length != $actualLength) {
0340             // require_once 'Zend/Pdf/Exception.php';
0341             throw new Zend_Pdf_Exception("Table length ($length) does not match actual length ($actualLength)",
0342                                          Zend_Pdf_Exception::CMAP_WRONG_TABLE_LENGTH);
0343         }
0344 
0345         /* Mapping tables should be language-independent. The font may not work
0346          * as expected if they are not. Unfortunately, many font files in the
0347          * wild incorrectly record a language ID in this field, so we can't
0348          * call this a failure.
0349          */
0350         $language = $this->_extractUInt2($cmapData, 4);
0351         if ($language != 0) {
0352             // Record a warning here somehow?
0353         }
0354 
0355         /* These two values are stored premultiplied by two which is convienent
0356          * when using the binary data directly, but we're parsing it out to
0357          * native PHP data types, so divide by two.
0358          */
0359         $this->_segmentCount = $this->_extractUInt2($cmapData, 6) >> 1;
0360         $this->_searchRange  = $this->_extractUInt2($cmapData, 8) >> 1;
0361 
0362         $this->_searchIterations = $this->_extractUInt2($cmapData, 10) + 1;
0363 
0364         $offset = 14;
0365         for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
0366             $this->_segmentTableEndCodes[$i] = $this->_extractUInt2($cmapData, $offset);
0367         }
0368 
0369         $this->_searchRangeEndCode = $this->_segmentTableEndCodes[$this->_searchRange];
0370 
0371         $offset += 2;    // reserved bytes
0372 
0373         for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
0374             $this->_segmentTableStartCodes[$i] = $this->_extractUInt2($cmapData, $offset);
0375         }
0376 
0377         for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
0378             $this->_segmentTableIdDeltas[$i] = $this->_extractInt2($cmapData, $offset);    // signed
0379         }
0380 
0381         /* The range offset helps determine the index into the glyph index array.
0382          * Like the segment count and search range above, it's stored as a byte
0383          * multiple in the font, so divide by two as we extract the values.
0384          */
0385         for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
0386             $this->_segmentTableIdRangeOffsets[$i] = $this->_extractUInt2($cmapData, $offset) >> 1;
0387         }
0388 
0389         /* The size of the glyph index array varies by font and depends on the
0390          * extent of the usage of range offsets versus deltas. Some fonts may
0391          * not have any entries in this array.
0392          */
0393         for (; $offset < $length; $offset += 2) {
0394             $this->_glyphIndexArray[] = $this->_extractUInt2($cmapData, $offset);
0395         }
0396 
0397         /* Sanity check: After reading all of the data, we should be at the end
0398          * of the table.
0399          */
0400         if ($offset != $length) {
0401             // require_once 'Zend/Pdf/Exception.php';
0402             throw new Zend_Pdf_Exception("Ending offset ($offset) does not match length ($length)",
0403                                          Zend_Pdf_Exception::CMAP_FINAL_OFFSET_NOT_LENGTH);
0404         }
0405     }
0406 
0407 }