File indexing completed on 2024-12-22 05:36:54
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Pdf 0017 * @subpackage Fonts 0018 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0019 * @license http://framework.zend.com/license/new-bsd New BSD License 0020 * @version $Id$ 0021 */ 0022 0023 /** Zend_Pdf_Cmap */ 0024 // require_once 'Zend/Pdf/Cmap.php'; 0025 0026 0027 /** 0028 * Implements the "segment mapping to delta values" character map (type 4). 0029 * 0030 * This is the Microsoft standard mapping table type for OpenType fonts. It 0031 * provides the ability to cover multiple contiguous ranges of the Unicode 0032 * character set, with the exception of Unicode Surrogates (U+D800 - U+DFFF). 0033 * 0034 * @package Zend_Pdf 0035 * @subpackage Fonts 0036 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0037 * @license http://framework.zend.com/license/new-bsd New BSD License 0038 */ 0039 class Zend_Pdf_Cmap_SegmentToDelta extends Zend_Pdf_Cmap 0040 { 0041 /**** Instance Variables ****/ 0042 0043 0044 /** 0045 * The number of segments in the table. 0046 * @var integer 0047 */ 0048 protected $_segmentCount = 0; 0049 0050 /** 0051 * The size of the binary search range for segments. 0052 * @var integer 0053 */ 0054 protected $_searchRange = 0; 0055 0056 /** 0057 * The number of binary search steps required to cover the entire search 0058 * range. 0059 * @var integer 0060 */ 0061 protected $_searchIterations = 0; 0062 0063 /** 0064 * Array of ending character codes for each segment. 0065 * @var array 0066 */ 0067 protected $_segmentTableEndCodes = array(); 0068 0069 /** 0070 * The ending character code for the segment at the end of the low search 0071 * range. 0072 * @var integer 0073 */ 0074 protected $_searchRangeEndCode = 0; 0075 0076 /** 0077 * Array of starting character codes for each segment. 0078 * @var array 0079 */ 0080 protected $_segmentTableStartCodes = array(); 0081 0082 /** 0083 * Array of character code to glyph delta values for each segment. 0084 * @var array 0085 */ 0086 protected $_segmentTableIdDeltas = array(); 0087 0088 /** 0089 * Array of offsets into the glyph index array for each segment. 0090 * @var array 0091 */ 0092 protected $_segmentTableIdRangeOffsets = array(); 0093 0094 /** 0095 * Glyph index array. Stores glyph numbers, used with range offset. 0096 * @var array 0097 */ 0098 protected $_glyphIndexArray = array(); 0099 0100 0101 0102 /**** Public Interface ****/ 0103 0104 0105 /* Concrete Class Implementation */ 0106 0107 /** 0108 * Returns an array of glyph numbers corresponding to the Unicode characters. 0109 * 0110 * If a particular character doesn't exist in this font, the special 'missing 0111 * character glyph' will be substituted. 0112 * 0113 * See also {@link glyphNumberForCharacter()}. 0114 * 0115 * @param array $characterCodes Array of Unicode character codes (code points). 0116 * @return array Array of glyph numbers. 0117 */ 0118 public function glyphNumbersForCharacters($characterCodes) 0119 { 0120 $glyphNumbers = array(); 0121 foreach ($characterCodes as $key => $characterCode) { 0122 0123 /* These tables only cover the 16-bit character range. 0124 */ 0125 if ($characterCode > 0xffff) { 0126 $glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH; 0127 continue; 0128 } 0129 0130 /* Determine where to start the binary search. The segments are 0131 * ordered from lowest-to-highest. We are looking for the first 0132 * segment whose end code is greater than or equal to our character 0133 * code. 0134 * 0135 * If the end code at the top of the search range is larger, then 0136 * our target is probably below it. 0137 * 0138 * If it is smaller, our target is probably above it, so move the 0139 * search range to the end of the segment list. 0140 */ 0141 if ($this->_searchRangeEndCode >= $characterCode) { 0142 $searchIndex = $this->_searchRange; 0143 } else { 0144 $searchIndex = $this->_segmentCount; 0145 } 0146 0147 /* Now do a binary search to find the first segment whose end code 0148 * is greater or equal to our character code. No matter the number 0149 * of segments (there may be hundreds in a large font), we will only 0150 * need to perform $this->_searchIterations. 0151 */ 0152 for ($i = 1; $i <= $this->_searchIterations; $i++) { 0153 if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) { 0154 $subtableIndex = $searchIndex; 0155 $searchIndex -= $this->_searchRange >> $i; 0156 } else { 0157 $searchIndex += $this->_searchRange >> $i; 0158 } 0159 } 0160 0161 /* If the segment's start code is greater than our character code, 0162 * that character is not represented in this font. Move on. 0163 */ 0164 if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) { 0165 $glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH; 0166 continue; 0167 } 0168 0169 if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) { 0170 /* This segment uses a simple mapping from character code to 0171 * glyph number. 0172 */ 0173 $glyphNumbers[$key] = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536; 0174 0175 } else { 0176 /* This segment relies on the glyph index array to determine the 0177 * glyph number. The calculation below determines the correct 0178 * index into that array. It's a little odd because the range 0179 * offset in the font file is designed to quickly provide an 0180 * address of the index in the raw binary data instead of the 0181 * index itself. Since we've parsed the data into arrays, we 0182 * must process it a bit differently. 0183 */ 0184 $glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] + 0185 $this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount + 0186 $subtableIndex - 1); 0187 $glyphNumbers[$key] = $this->_glyphIndexArray[$glyphIndex]; 0188 0189 } 0190 0191 } 0192 return $glyphNumbers; 0193 } 0194 0195 /** 0196 * Returns the glyph number corresponding to the Unicode character. 0197 * 0198 * If a particular character doesn't exist in this font, the special 'missing 0199 * character glyph' will be substituted. 0200 * 0201 * See also {@link glyphNumbersForCharacters()} which is optimized for bulk 0202 * operations. 0203 * 0204 * @param integer $characterCode Unicode character code (code point). 0205 * @return integer Glyph number. 0206 */ 0207 public function glyphNumberForCharacter($characterCode) 0208 { 0209 /* This code is pretty much a copy of glyphNumbersForCharacters(). 0210 * See that method for inline documentation. 0211 */ 0212 0213 if ($characterCode > 0xffff) { 0214 return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH; 0215 } 0216 0217 if ($this->_searchRangeEndCode >= $characterCode) { 0218 $searchIndex = $this->_searchRange; 0219 } else { 0220 $searchIndex = $this->_segmentCount; 0221 } 0222 0223 for ($i = 1; $i <= $this->_searchIterations; $i++) { 0224 if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) { 0225 $subtableIndex = $searchIndex; 0226 $searchIndex -= $this->_searchRange >> $i; 0227 } else { 0228 $searchIndex += $this->_searchRange >> $i; 0229 } 0230 } 0231 0232 if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) { 0233 return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH; 0234 } 0235 0236 if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) { 0237 $glyphNumber = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536; 0238 } else { 0239 $glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] + 0240 $this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount + 0241 $subtableIndex - 1); 0242 $glyphNumber = $this->_glyphIndexArray[$glyphIndex]; 0243 } 0244 return $glyphNumber; 0245 } 0246 0247 /** 0248 * Returns an array containing the Unicode characters that have entries in 0249 * this character map. 0250 * 0251 * @return array Unicode character codes. 0252 */ 0253 public function getCoveredCharacters() 0254 { 0255 $characterCodes = array(); 0256 for ($i = 1; $i <= $this->_segmentCount; $i++) { 0257 for ($code = $this->_segmentTableStartCodes[$i]; $code <= $this->_segmentTableEndCodes[$i]; $code++) { 0258 $characterCodes[] = $code; 0259 } 0260 } 0261 return $characterCodes; 0262 } 0263 0264 0265 /** 0266 * Returns an array containing the glyphs numbers that have entries in this character map. 0267 * Keys are Unicode character codes (integers) 0268 * 0269 * This functionality is partially covered by glyphNumbersForCharacters(getCoveredCharacters()) 0270 * call, but this method do it in more effective way (prepare complete list instead of searching 0271 * glyph for each character code). 0272 * 0273 * @internal 0274 * @return array Array representing <Unicode character code> => <glyph number> pairs. 0275 */ 0276 public function getCoveredCharactersGlyphs() 0277 { 0278 $glyphNumbers = array(); 0279 0280 for ($segmentNum = 1; $segmentNum <= $this->_segmentCount; $segmentNum++) { 0281 if ($this->_segmentTableIdRangeOffsets[$segmentNum] == 0) { 0282 $delta = $this->_segmentTableIdDeltas[$segmentNum]; 0283 0284 for ($code = $this->_segmentTableStartCodes[$segmentNum]; 0285 $code <= $this->_segmentTableEndCodes[$segmentNum]; 0286 $code++) { 0287 $glyphNumbers[$code] = ($code + $delta) % 65536; 0288 } 0289 } else { 0290 $code = $this->_segmentTableStartCodes[$segmentNum]; 0291 $glyphIndex = $this->_segmentTableIdRangeOffsets[$segmentNum] - ($this->_segmentCount - $segmentNum) - 1; 0292 0293 while ($code <= $this->_segmentTableEndCodes[$segmentNum]) { 0294 $glyphNumbers[$code] = $this->_glyphIndexArray[$glyphIndex]; 0295 0296 $code++; 0297 $glyphIndex++; 0298 } 0299 } 0300 } 0301 0302 return $glyphNumbers; 0303 } 0304 0305 0306 0307 /* Object Lifecycle */ 0308 0309 /** 0310 * Object constructor 0311 * 0312 * Parses the raw binary table data. Throws an exception if the table is 0313 * malformed. 0314 * 0315 * @param string $cmapData Raw binary cmap table data. 0316 * @throws Zend_Pdf_Exception 0317 */ 0318 public function __construct($cmapData) 0319 { 0320 /* Sanity check: The table should be at least 23 bytes in size. 0321 */ 0322 $actualLength = strlen($cmapData); 0323 if ($actualLength < 23) { 0324 // require_once 'Zend/Pdf/Exception.php'; 0325 throw new Zend_Pdf_Exception('Insufficient table data', 0326 Zend_Pdf_Exception::CMAP_TABLE_DATA_TOO_SMALL); 0327 } 0328 0329 /* Sanity check: Make sure this is right data for this table type. 0330 */ 0331 $type = $this->_extractUInt2($cmapData, 0); 0332 if ($type != Zend_Pdf_Cmap::TYPE_SEGMENT_TO_DELTA) { 0333 // require_once 'Zend/Pdf/Exception.php'; 0334 throw new Zend_Pdf_Exception('Wrong cmap table type', 0335 Zend_Pdf_Exception::CMAP_WRONG_TABLE_TYPE); 0336 } 0337 0338 $length = $this->_extractUInt2($cmapData, 2); 0339 if ($length != $actualLength) { 0340 // require_once 'Zend/Pdf/Exception.php'; 0341 throw new Zend_Pdf_Exception("Table length ($length) does not match actual length ($actualLength)", 0342 Zend_Pdf_Exception::CMAP_WRONG_TABLE_LENGTH); 0343 } 0344 0345 /* Mapping tables should be language-independent. The font may not work 0346 * as expected if they are not. Unfortunately, many font files in the 0347 * wild incorrectly record a language ID in this field, so we can't 0348 * call this a failure. 0349 */ 0350 $language = $this->_extractUInt2($cmapData, 4); 0351 if ($language != 0) { 0352 // Record a warning here somehow? 0353 } 0354 0355 /* These two values are stored premultiplied by two which is convienent 0356 * when using the binary data directly, but we're parsing it out to 0357 * native PHP data types, so divide by two. 0358 */ 0359 $this->_segmentCount = $this->_extractUInt2($cmapData, 6) >> 1; 0360 $this->_searchRange = $this->_extractUInt2($cmapData, 8) >> 1; 0361 0362 $this->_searchIterations = $this->_extractUInt2($cmapData, 10) + 1; 0363 0364 $offset = 14; 0365 for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) { 0366 $this->_segmentTableEndCodes[$i] = $this->_extractUInt2($cmapData, $offset); 0367 } 0368 0369 $this->_searchRangeEndCode = $this->_segmentTableEndCodes[$this->_searchRange]; 0370 0371 $offset += 2; // reserved bytes 0372 0373 for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) { 0374 $this->_segmentTableStartCodes[$i] = $this->_extractUInt2($cmapData, $offset); 0375 } 0376 0377 for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) { 0378 $this->_segmentTableIdDeltas[$i] = $this->_extractInt2($cmapData, $offset); // signed 0379 } 0380 0381 /* The range offset helps determine the index into the glyph index array. 0382 * Like the segment count and search range above, it's stored as a byte 0383 * multiple in the font, so divide by two as we extract the values. 0384 */ 0385 for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) { 0386 $this->_segmentTableIdRangeOffsets[$i] = $this->_extractUInt2($cmapData, $offset) >> 1; 0387 } 0388 0389 /* The size of the glyph index array varies by font and depends on the 0390 * extent of the usage of range offsets versus deltas. Some fonts may 0391 * not have any entries in this array. 0392 */ 0393 for (; $offset < $length; $offset += 2) { 0394 $this->_glyphIndexArray[] = $this->_extractUInt2($cmapData, $offset); 0395 } 0396 0397 /* Sanity check: After reading all of the data, we should be at the end 0398 * of the table. 0399 */ 0400 if ($offset != $length) { 0401 // require_once 'Zend/Pdf/Exception.php'; 0402 throw new Zend_Pdf_Exception("Ending offset ($offset) does not match length ($length)", 0403 Zend_Pdf_Exception::CMAP_FINAL_OFFSET_NOT_LENGTH); 0404 } 0405 } 0406 0407 }