File indexing completed on 2025-01-19 03:55:17
0001 /*****************************************************************************/ 0002 // Copyright 2006-2019 Adobe Systems Incorporated 0003 // All Rights Reserved. 0004 // 0005 // NOTICE: Adobe permits you to use, modify, and distribute this file in 0006 // accordance with the terms of the Adobe license agreement accompanying it. 0007 /*****************************************************************************/ 0008 0009 #include "dng_string.h" 0010 0011 #include "dng_assertions.h" 0012 #include "dng_exceptions.h" 0013 #include "dng_flags.h" 0014 #include "dng_mutex.h" 0015 #include "dng_utils.h" 0016 #include "dng_safe_arithmetic.h" 0017 0018 #if qMacOS 0019 #include <CoreServices/CoreServices.h> 0020 #endif 0021 0022 #if qWinOS 0023 # include <windows.h> 0024 # ifndef SORT_DIGITSASNUMBERS 0025 // NOTE: SORT_DIGITSASNUMBERS is available since win7 0026 # define SORT_DIGITSASNUMBERS 8 0027 # endif 0028 #endif 0029 0030 #if qLinux || qiPhone || qAndroid 0031 #include <ctype.h> // for isdigit 0032 #endif 0033 0034 /*****************************************************************************/ 0035 0036 const uint32 kREPLACEMENT_CHARACTER = 0x0000FFFD; 0037 0038 /*****************************************************************************/ 0039 0040 // Returns the length of the zero-terminated string 's'. Throws a dng_exception 0041 // if the length of 's' is too large to be represented as a uint32. 0042 0043 static uint32 strlenAsUint32 (const char *s) 0044 { 0045 0046 uint32 lengthAsUint32 = 0; 0047 0048 ConvertUnsigned (strlen (s), &lengthAsUint32); 0049 0050 return lengthAsUint32; 0051 0052 } 0053 0054 /*****************************************************************************/ 0055 0056 // Checks whether there is enough space left in the buffer pointed to by 0057 // 'currentPos' to write at least 'space' elements of type T (to positions 0058 // currentPos[0] through currentPos[space - 1]. Throws a dng_exception if 0059 // there is not enough space left in the buffer. 'bufferEnd' should point one 0060 // element beyond the end of the buffer. For example, if the buffer is "T 0061 // buffer[3];", then bufferEnd should point to T + 3. 0062 0063 template <class T> 0064 static void CheckSpaceLeftInBuffer(const T *currentPos, 0065 const T *bufferEnd, 0066 size_t space) 0067 { 0068 0069 if (bufferEnd < currentPos || static_cast<size_t> (bufferEnd - currentPos) < space) 0070 { 0071 ThrowMemoryFull ("Buffer overrun"); 0072 } 0073 0074 } 0075 0076 /*****************************************************************************/ 0077 0078 #if qMacOS 0079 0080 static void Assign_Multibyte (dng_string &dngString, 0081 const char *otherString, 0082 TextEncoding encoding) 0083 { 0084 0085 dng_safe_uint32 aSize (strlenAsUint32 (otherString)); 0086 0087 if (aSize.Get () > 0) 0088 { 0089 0090 dng_safe_uint32 aBufSize = aSize * 6u + 256u; 0091 0092 dng_memory_data aBuf (aBufSize + 1u); 0093 0094 UnicodeMapping aMapping; 0095 0096 aMapping.unicodeEncoding = ::CreateTextEncoding (kTextEncodingUnicodeV3_0, 0097 kUnicodeNoSubset, 0098 kUnicodeUTF8Format); 0099 0100 aMapping.otherEncoding = encoding; 0101 aMapping.mappingVersion = kUnicodeUseLatestMapping; 0102 0103 TextToUnicodeInfo aInfo = NULL; 0104 0105 if (::CreateTextToUnicodeInfo (&aMapping, &aInfo) == noErr) 0106 { 0107 0108 ByteCount aInput = 0; 0109 ByteCount aOutput = 0; 0110 0111 ::ConvertFromTextToUnicode (aInfo, 0112 aSize.Get (), 0113 otherString, 0114 kUnicodeUseFallbacksMask | 0115 kUnicodeLooseMappingsMask, 0116 0, 0117 NULL, 0118 NULL, 0119 NULL, 0120 aBufSize.Get (), 0121 &aInput, 0122 &aOutput, 0123 (UniChar *) aBuf.Buffer ()); 0124 0125 ::DisposeTextToUnicodeInfo (&aInfo); 0126 0127 if (aOutput > 0 && aOutput <= aBufSize.Get ()) 0128 { 0129 0130 char *aBufChar = aBuf.Buffer_char (); 0131 0132 aBufChar [aOutput] = 0; 0133 0134 dngString.Set (aBufChar); 0135 0136 return; 0137 0138 } 0139 0140 } 0141 0142 } 0143 0144 dngString.Clear (); 0145 0146 } 0147 0148 static uint32 Extract_Multibyte (const dng_string &dngString, 0149 dng_memory_data &buffer, 0150 TextEncoding encoding) 0151 { 0152 0153 dng_safe_uint32 aSize (dngString.Length ()); 0154 0155 if (aSize.Get () > 0) 0156 { 0157 0158 dng_safe_uint32 aBufSize = aSize * 2u + 256u; 0159 0160 dng_memory_data tempBuffer (aBufSize); 0161 0162 UnicodeMapping aMapping; 0163 0164 aMapping.unicodeEncoding = ::CreateTextEncoding (kTextEncodingUnicodeV3_0, 0165 kUnicodeNoSubset, 0166 kUnicodeUTF8Format); 0167 0168 aMapping.otherEncoding = encoding; 0169 aMapping.mappingVersion = kUnicodeUseLatestMapping; 0170 0171 UnicodeToTextInfo aInfo = NULL; 0172 0173 if (::CreateUnicodeToTextInfo (&aMapping, &aInfo) == noErr) 0174 { 0175 0176 ByteCount aInput = 0; 0177 ByteCount aOutput = 0; 0178 0179 ::ConvertFromUnicodeToText (aInfo, 0180 aSize.Get (), 0181 (const UniChar *) dngString.Get (), 0182 kUnicodeUseFallbacksMask | 0183 kUnicodeLooseMappingsMask | 0184 kUnicodeDefaultDirectionMask, 0185 0, 0186 NULL, 0187 NULL, 0188 NULL, 0189 aBufSize.Get (), 0190 &aInput, 0191 &aOutput, 0192 tempBuffer.Buffer_char ()); 0193 0194 ::DisposeUnicodeToTextInfo (&aInfo); 0195 0196 if (aOutput > 0) 0197 { 0198 0199 uint32 aOutputAsUint32 = 0; 0200 0201 ConvertUnsigned (aOutput, &aOutputAsUint32); 0202 0203 buffer.Allocate (dng_safe_uint32 (aOutputAsUint32) + 1u); 0204 0205 memcpy (buffer.Buffer (), 0206 tempBuffer.Buffer (), 0207 aOutputAsUint32); 0208 0209 buffer.Buffer_char () [aOutputAsUint32] = 0; 0210 0211 return aOutputAsUint32; 0212 0213 } 0214 0215 } 0216 0217 } 0218 0219 buffer.Allocate (1); 0220 0221 buffer.Buffer_char () [0] = 0; 0222 0223 return 0; 0224 0225 } 0226 0227 static void Assign_SystemEncoding (dng_string &dngString, 0228 const char *otherString) 0229 { 0230 0231 TextEncoding aEncoding; 0232 0233 ::UpgradeScriptInfoToTextEncoding (smSystemScript, 0234 kTextLanguageDontCare, 0235 kTextRegionDontCare, 0236 NULL, 0237 &aEncoding); 0238 0239 Assign_Multibyte (dngString, 0240 otherString, 0241 aEncoding); 0242 0243 } 0244 0245 static uint32 Extract_SystemEncoding (const dng_string &dngString, 0246 dng_memory_data &buffer) 0247 { 0248 0249 TextEncoding aEncoding; 0250 0251 ::UpgradeScriptInfoToTextEncoding (smSystemScript, 0252 kTextLanguageDontCare, 0253 kTextRegionDontCare, 0254 NULL, 0255 &aEncoding); 0256 0257 return Extract_Multibyte (dngString, 0258 buffer, 0259 aEncoding); 0260 0261 } 0262 0263 static void Assign_JIS_X208_1990 (dng_string &dngString, 0264 const char *otherString) 0265 { 0266 0267 Assign_Multibyte (dngString, 0268 otherString, 0269 kTextEncodingJIS_X0208_90); 0270 0271 } 0272 0273 #endif 0274 0275 /*****************************************************************************/ 0276 0277 #if qWinOS 0278 0279 static void Assign_Multibyte (dng_string &dngString, 0280 const char *otherString, 0281 UINT encoding) 0282 { 0283 0284 DNG_ASSERT (sizeof (WCHAR) == 2, "WCHAR must be 2 bytes"); 0285 0286 const dng_safe_uint32 otherStringLen (strlenAsUint32 (otherString)); 0287 0288 const dng_safe_int32 aSize (otherStringLen); 0289 0290 if (aSize.Get () > 0) 0291 { 0292 0293 dng_safe_uint32 aBufCharsUint32 = otherStringLen * 3u + 128u; 0294 0295 dng_safe_int32 aBufChars (aBufCharsUint32); 0296 0297 dng_safe_uint32 bytesToAllocate = (aBufCharsUint32 + 1u) * 2u; 0298 0299 dng_memory_data aBuf (bytesToAllocate); 0300 0301 int aResult = ::MultiByteToWideChar (encoding, 0302 0, 0303 otherString, 0304 aSize.Get (), 0305 (WCHAR *) aBuf.Buffer (), 0306 aBufChars.Get ()); 0307 0308 if (aResult > 0 && aResult <= aBufChars.Get ()) 0309 { 0310 0311 uint16 * aUTF16 = aBuf.Buffer_uint16 (); 0312 0313 aUTF16 [aResult] = 0; 0314 0315 dngString.Set_UTF16 (aUTF16); 0316 0317 return; 0318 0319 } 0320 0321 } 0322 0323 dngString.Clear (); 0324 0325 } 0326 0327 static uint32 Extract_Multibyte (const dng_string &dngString, 0328 dng_memory_data &buffer, 0329 UINT encoding) 0330 { 0331 0332 DNG_ASSERT (sizeof (WCHAR) == 2, "WCHAR must be 2 bytes"); 0333 0334 dng_memory_data sBuffer; 0335 0336 int aCount = dngString.Get_UTF16 (sBuffer); 0337 0338 if (aCount < 0) 0339 { 0340 return 0; 0341 } 0342 0343 dng_safe_uint32 aCountAsUint32 (static_cast<uint32> (aCount)); 0344 0345 dng_safe_uint32 dBufSize = aCountAsUint32 * 2u + 256u; 0346 0347 dng_memory_data dBuffer (dBufSize); 0348 0349 int aResult = ::WideCharToMultiByte (encoding, 0350 0, 0351 (WCHAR *) sBuffer.Buffer (), 0352 aCount, 0353 dBuffer.Buffer_char (), 0354 dBufSize.Get (), 0355 NULL, 0356 NULL); 0357 0358 if (aResult < 0) 0359 aResult = 0; 0360 0361 dng_safe_uint32 aResultAsUint32 (static_cast<uint32> (aResult)); 0362 0363 buffer.Allocate (aResultAsUint32 + 1u); 0364 0365 memcpy (buffer.Buffer (), 0366 dBuffer.Buffer (), 0367 aResult); 0368 0369 buffer.Buffer_char () [aResult] = 0; 0370 0371 return aResultAsUint32.Get (); 0372 0373 } 0374 0375 static void Assign_SystemEncoding (dng_string &dngString, 0376 const char *otherString) 0377 { 0378 0379 Assign_Multibyte (dngString, 0380 otherString, 0381 ::GetACP ()); 0382 0383 } 0384 0385 static uint32 Extract_SystemEncoding (const dng_string &dngString, 0386 dng_memory_data &buffer) 0387 { 0388 0389 return Extract_Multibyte (dngString, 0390 buffer, 0391 ::GetACP ()); 0392 0393 } 0394 0395 static void Assign_JIS_X208_1990 (dng_string &dngString, 0396 const char *otherString) 0397 { 0398 0399 // From MSDN documentation: 20932 = JIS X 0208-1990 & 0121-1990 0400 0401 const UINT kJIS = 20932; 0402 0403 Assign_Multibyte (dngString, 0404 otherString, 0405 kJIS); 0406 0407 } 0408 0409 #endif 0410 0411 /*****************************************************************************/ 0412 0413 static bool IsASCII (const char *s) 0414 { 0415 0416 if (!s) 0417 { 0418 0419 return true; 0420 0421 } 0422 0423 while (true) 0424 { 0425 0426 uint8 c = (uint8) *(s++); 0427 0428 if (c == 0) 0429 { 0430 0431 break; 0432 0433 } 0434 0435 if (c & 0x80) 0436 { 0437 0438 return false; 0439 0440 } 0441 0442 } 0443 0444 return true; 0445 0446 } 0447 0448 /*****************************************************************************/ 0449 0450 dng_string::dng_string () 0451 0452 : fData () 0453 0454 { 0455 0456 } 0457 0458 /*****************************************************************************/ 0459 0460 dng_string::dng_string (const dng_string &s) 0461 0462 : fData () 0463 0464 { 0465 0466 Set (s.Get ()); 0467 0468 } 0469 0470 /*****************************************************************************/ 0471 0472 dng_string & dng_string::operator= (const dng_string &s) 0473 { 0474 0475 if (this != &s) 0476 { 0477 0478 Set (s.Get ()); 0479 0480 } 0481 0482 return *this; 0483 0484 } 0485 0486 /*****************************************************************************/ 0487 0488 dng_string::~dng_string () 0489 { 0490 0491 } 0492 0493 /*****************************************************************************/ 0494 0495 const char * dng_string::Get () const 0496 { 0497 0498 if (fData.Buffer ()) 0499 { 0500 0501 return fData.Buffer_char (); 0502 0503 } 0504 0505 return ""; 0506 0507 } 0508 0509 /*****************************************************************************/ 0510 0511 bool dng_string::IsASCII () const 0512 { 0513 0514 return ::IsASCII (Get ()); 0515 0516 } 0517 0518 /*****************************************************************************/ 0519 0520 void dng_string::Set (const char *s) 0521 { 0522 0523 // Measure the new length. 0524 0525 uint32 newLen = (s != NULL ? strlenAsUint32 (s) : 0); 0526 0527 // If it is a NULL string, then clear the buffer. 0528 0529 if (newLen == 0) 0530 { 0531 0532 fData.Clear (); 0533 0534 } 0535 0536 // Else we need to copy the bytes. 0537 0538 else 0539 { 0540 0541 uint32 oldLen = Length (); 0542 0543 // We might be setting this string to a sub-string of itself, 0544 // so don't reallocate the data unless the string is getting 0545 // longer. 0546 0547 if (newLen > oldLen) 0548 { 0549 0550 fData.Clear (); 0551 0552 fData.Allocate (dng_safe_uint32 (newLen) + 1u); 0553 0554 } 0555 0556 char *d = fData.Buffer_char (); 0557 0558 for (uint32 k = 0; k <= newLen; k++) 0559 { 0560 0561 d [k] = s [k]; 0562 0563 } 0564 0565 } 0566 0567 } 0568 0569 /*****************************************************************************/ 0570 0571 void dng_string::Set_ASCII (const char *s) 0572 { 0573 0574 if (::IsASCII (s)) 0575 { 0576 0577 Set (s); 0578 0579 } 0580 0581 else 0582 { 0583 0584 Set_SystemEncoding (s); 0585 0586 } 0587 0588 } 0589 0590 /*****************************************************************************/ 0591 0592 void dng_string::Set_UTF8 (const char *s) 0593 { 0594 0595 dng_safe_uint32 len (strlenAsUint32 (s)); 0596 0597 const char *sEnd = s + len.Get (); 0598 0599 // Worst case expansion is 1-byte characters expanding to 0600 // replacement character, which requires 3 bytes. 0601 0602 const dng_safe_uint32 destBufferLength = len * 3u + 1u; 0603 0604 dng_memory_data buffer (destBufferLength); 0605 0606 uint8 *d = buffer.Buffer_uint8 (); 0607 uint8 * const destEnd = d + destBufferLength.Get (); 0608 0609 while (s < sEnd) 0610 { 0611 0612 uint32 aChar = DecodeUTF8 (s, (uint32) (sEnd - s)); 0613 0614 if (aChar > 0x7FFFFFFF) 0615 { 0616 aChar = kREPLACEMENT_CHARACTER; 0617 } 0618 0619 #if qDNGValidate 0620 0621 if (aChar == kREPLACEMENT_CHARACTER) 0622 { 0623 ReportWarning ("Expected UTF-8 value is not valid UTF-8 (or contains a kREPLACEMENT_CHARACTER)"); 0624 } 0625 0626 #endif 0627 0628 if (aChar < 0x00000080) 0629 { 0630 CheckSpaceLeftInBuffer (d, destEnd, 1); 0631 *(d++) = (uint8) aChar; 0632 } 0633 0634 else if (aChar < 0x00000800) 0635 { 0636 CheckSpaceLeftInBuffer (d, destEnd, 2); 0637 *(d++) = (uint8) ((aChar >> 6) | 0x000000C0); 0638 *(d++) = (uint8) ((aChar & 0x0000003F) | 0x00000080); 0639 } 0640 0641 else if (aChar < 0x00010000) 0642 { 0643 CheckSpaceLeftInBuffer (d, destEnd, 3); 0644 *(d++) = (uint8) ( (aChar >> 12) | 0x000000E0); 0645 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 0646 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 0647 } 0648 0649 else if (aChar < 0x00200000) 0650 { 0651 CheckSpaceLeftInBuffer (d, destEnd, 4); 0652 *(d++) = (uint8) ( (aChar >> 18) | 0x000000F0); 0653 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 0654 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 0655 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 0656 } 0657 0658 else if (aChar < 0x04000000) 0659 { 0660 CheckSpaceLeftInBuffer (d, destEnd, 5); 0661 *(d++) = (uint8) ( (aChar >> 24) | 0x000000F8); 0662 *(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080); 0663 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 0664 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 0665 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 0666 } 0667 0668 else 0669 { 0670 CheckSpaceLeftInBuffer (d, destEnd, 6); 0671 *(d++) = (uint8) ( (aChar >> 30) | 0x000000FC); 0672 *(d++) = (uint8) (((aChar >> 24) & 0x0000003F) | 0x00000080); 0673 *(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080); 0674 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 0675 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 0676 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 0677 } 0678 0679 } 0680 0681 CheckSpaceLeftInBuffer (d, destEnd, 1); 0682 *d = 0; 0683 0684 Set (buffer.Buffer_char ()); 0685 0686 } 0687 0688 /*****************************************************************************/ 0689 0690 uint32 dng_string::Get_SystemEncoding (dng_memory_data &buffer) const 0691 { 0692 0693 if (IsASCII ()) 0694 { 0695 0696 dng_safe_uint32 len (Length ()); 0697 0698 const dng_safe_uint32 destBufferLength = len + 1u; 0699 0700 buffer.Allocate (destBufferLength); 0701 0702 memcpy (buffer.Buffer (), Get (), destBufferLength.Get ()); 0703 0704 return len.Get (); 0705 0706 } 0707 0708 else 0709 { 0710 0711 #if qMacOS || qWinOS 0712 0713 return Extract_SystemEncoding (*this, buffer); 0714 0715 #else 0716 0717 // Fallback logic to force the string to ASCII. 0718 0719 dng_string temp (*this); 0720 0721 temp.ForceASCII (); 0722 0723 return temp.Get_SystemEncoding (buffer); 0724 0725 #endif 0726 0727 } 0728 0729 } 0730 0731 /*****************************************************************************/ 0732 0733 void dng_string::Set_SystemEncoding (const char *s) 0734 { 0735 0736 if (::IsASCII (s)) 0737 { 0738 0739 Set (s); 0740 0741 } 0742 0743 else 0744 { 0745 0746 #if qMacOS || qWinOS 0747 0748 Assign_SystemEncoding (*this, s); 0749 0750 #else 0751 0752 // Fallback logic that just grabs the ASCII characters and 0753 // ignores the non-ASCII characters. 0754 0755 dng_safe_uint32 len = strlenAsUint32 (s); 0756 0757 const dng_safe_uint32 destBufferLength = len + 1u; 0758 0759 dng_memory_data buffer (destBufferLength); 0760 0761 uint8 *d = buffer.Buffer_uint8 (); 0762 uint8 * const destEnd = d + destBufferLength.Get (); 0763 0764 while (*s) 0765 { 0766 0767 uint8 c = (uint8) *(s++); 0768 0769 if ((c & 0x80) == 0) 0770 { 0771 0772 CheckSpaceLeftInBuffer (d, destEnd, 1); 0773 *(d++) = c; 0774 0775 } 0776 0777 } 0778 0779 CheckSpaceLeftInBuffer (d, destEnd, 1); 0780 *d = 0; 0781 0782 Set (buffer.Buffer_char ()); 0783 0784 #endif 0785 0786 } 0787 0788 } 0789 0790 /*****************************************************************************/ 0791 0792 bool dng_string::ValidSystemEncoding () const 0793 { 0794 0795 if (IsASCII ()) 0796 { 0797 0798 return true; 0799 0800 } 0801 0802 dng_memory_data buffer; 0803 0804 Get_SystemEncoding (buffer); 0805 0806 dng_string temp; 0807 0808 temp.Set_SystemEncoding (buffer.Buffer_char ()); 0809 0810 return (*this == temp); 0811 0812 } 0813 0814 /*****************************************************************************/ 0815 0816 void dng_string::Set_JIS_X208_1990 (const char *s) 0817 { 0818 0819 if (::IsASCII (s)) 0820 { 0821 0822 Set (s); 0823 0824 } 0825 0826 else 0827 { 0828 0829 #if qMacOS || qWinOS 0830 0831 Assign_JIS_X208_1990 (*this, s); 0832 0833 #else 0834 0835 // Fallback to the ASCII extraction logic. 0836 0837 Set_SystemEncoding (s); 0838 0839 #endif 0840 0841 } 0842 0843 } 0844 0845 /*****************************************************************************/ 0846 0847 uint32 dng_string::DecodeUTF8 (const char *&s, 0848 uint32 maxBytes, 0849 bool *isValid) 0850 { 0851 0852 static const uint8 gUTF8Bytes [256] = 0853 { 0854 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 0855 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 0856 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 0857 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 0858 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0859 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0860 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 0861 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,6,6 0862 }; 0863 0864 if (isValid) 0865 { 0866 *isValid = true; 0867 } 0868 0869 const uint8 *nBuf = (const uint8 *) s; 0870 0871 uint32 aChar = nBuf [0]; 0872 0873 uint32 aSize = gUTF8Bytes [aChar]; 0874 0875 if (aSize > maxBytes) 0876 { 0877 0878 s += maxBytes; 0879 0880 if (isValid) 0881 { 0882 *isValid = false; 0883 } 0884 0885 return kREPLACEMENT_CHARACTER; 0886 0887 } 0888 0889 s += aSize; 0890 0891 for (uint32 extra = 1; extra < aSize; extra++) 0892 { 0893 0894 if ((nBuf [extra] & 0xC0) != 0x80) 0895 { 0896 0897 if (isValid) 0898 { 0899 *isValid = false; 0900 } 0901 0902 return kREPLACEMENT_CHARACTER; 0903 0904 } 0905 0906 } 0907 0908 switch (aSize) 0909 { 0910 0911 case 0: 0912 { 0913 0914 s++; // Don't get stuck in infinite loop 0915 0916 if (isValid) 0917 { 0918 *isValid = false; 0919 } 0920 0921 return kREPLACEMENT_CHARACTER; 0922 0923 } 0924 0925 case 1: 0926 { 0927 0928 return aChar; 0929 0930 } 0931 0932 case 2: 0933 { 0934 0935 aChar = ((aChar << 6) + nBuf [1]) - (uint32) 0x00003080UL; 0936 0937 break; 0938 0939 } 0940 0941 case 3: 0942 { 0943 0944 aChar = ((((aChar << 6) + nBuf [1]) 0945 << 6) + nBuf [2]) - (uint32) 0x000E2080UL; 0946 0947 break; 0948 0949 } 0950 0951 case 4: 0952 { 0953 0954 aChar = ((((((aChar << 6) + nBuf [1]) 0955 << 6) + nBuf [2]) 0956 << 6) + nBuf [3]) - (uint32) 0x03C82080UL; 0957 0958 break; 0959 0960 } 0961 0962 case 5: 0963 { 0964 0965 aChar = ((((((((aChar << 6) + nBuf [1]) 0966 << 6) + nBuf [2]) 0967 << 6) + nBuf [3]) 0968 << 6) + nBuf [4]) - (uint32) 0xFA082080UL; 0969 0970 break; 0971 0972 } 0973 0974 case 6: 0975 { 0976 0977 aChar = ((((((((((aChar << 6) + nBuf [1]) 0978 << 6) + nBuf [2]) 0979 << 6) + nBuf [3]) 0980 << 6) + nBuf [4]) 0981 << 6) + nBuf [5]) - (uint32) 0x82082080UL; 0982 0983 break; 0984 0985 } 0986 0987 } 0988 0989 if (aChar < 0x7F || aChar > 0x0010FFFF) 0990 { 0991 0992 if (isValid) 0993 { 0994 *isValid = false; 0995 } 0996 0997 return kREPLACEMENT_CHARACTER; 0998 0999 } 1000 1001 return aChar; 1002 1003 } 1004 1005 /*****************************************************************************/ 1006 1007 bool dng_string::IsUTF8 (const char *s) 1008 { 1009 1010 uint32 len = strlenAsUint32 (s); 1011 1012 const char *sEnd = s + len; 1013 1014 while (s < sEnd) 1015 { 1016 1017 bool isValid = true; 1018 1019 (void) DecodeUTF8 (s, (uint32) (sEnd - s), &isValid); 1020 1021 if (!isValid) 1022 { 1023 return false; 1024 } 1025 1026 } 1027 1028 return true; 1029 1030 } 1031 1032 /*****************************************************************************/ 1033 1034 void dng_string::Set_UTF8_or_System (const char *s) 1035 { 1036 1037 if (::IsASCII (s)) 1038 { 1039 1040 Set (s); 1041 1042 } 1043 1044 else if (IsUTF8 (s)) 1045 { 1046 1047 Set_UTF8 (s); 1048 1049 } 1050 1051 else 1052 { 1053 1054 Set_SystemEncoding (s); 1055 1056 } 1057 1058 } 1059 1060 /*****************************************************************************/ 1061 1062 uint32 dng_string::Get_UTF16 (dng_memory_data &buffer) const 1063 { 1064 1065 dng_safe_uint32 count = 0u; 1066 1067 const char *sPtr = Get (); 1068 1069 while (*sPtr) 1070 { 1071 1072 uint32 x = DecodeUTF8 (sPtr); 1073 1074 if (x <= 0x0000FFFF || 1075 x > 0x0010FFFF) 1076 { 1077 1078 count += 1u; 1079 1080 } 1081 1082 else 1083 { 1084 1085 count += 2u; 1086 1087 } 1088 1089 } 1090 1091 const dng_safe_uint32 destBufferLength = count + 1u; 1092 1093 buffer.Allocate (destBufferLength.Get (), 1094 sizeof (uint16)); 1095 1096 uint16 *dPtr = buffer.Buffer_uint16 (); 1097 uint16 * const destEnd = dPtr + destBufferLength.Get (); 1098 1099 sPtr = Get (); 1100 1101 while (*sPtr) 1102 { 1103 1104 uint32 x = DecodeUTF8 (sPtr); 1105 1106 if (x <= 0x0000FFFF) 1107 { 1108 1109 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 1110 *(dPtr++) = (uint16) x; 1111 1112 } 1113 1114 else if (x > 0x0010FFFF) 1115 { 1116 1117 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 1118 *(dPtr++) = (uint16) kREPLACEMENT_CHARACTER; 1119 1120 } 1121 1122 else 1123 { 1124 1125 x -= 0x00010000; 1126 1127 CheckSpaceLeftInBuffer (dPtr, destEnd, 2); 1128 *(dPtr++) = (uint16) ((x >> 10 ) + 0x0000D800); 1129 *(dPtr++) = (uint16) ((x & 0x000003FF) + 0x0000DC00); 1130 1131 } 1132 1133 } 1134 1135 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 1136 *dPtr = 0; 1137 1138 return count.Get (); 1139 1140 } 1141 1142 /*****************************************************************************/ 1143 1144 void dng_string::Set_UTF16 (const uint16 *s) 1145 { 1146 1147 if (!s) 1148 { 1149 Clear (); 1150 return; 1151 } 1152 1153 bool swap = false; 1154 1155 if (s [0] == 0xFFFE) // Swapped byte order marker 1156 { 1157 swap = true; 1158 s++; 1159 } 1160 1161 else if (s [0] == 0xFEFF) // Non-swapped byte order marker 1162 { 1163 s++; 1164 } 1165 1166 dng_safe_uint32 length16 (0u); 1167 1168 while (s [length16.Get ()] != 0) 1169 { 1170 length16 += 1u; 1171 } 1172 1173 const uint16 *sEnd = s + length16.Get (); 1174 1175 const dng_safe_uint32 destBufferSize = length16 * 6u + 1u; 1176 1177 dng_memory_data buffer (destBufferSize); 1178 1179 uint8 *d = buffer.Buffer_uint8 (); 1180 uint8 * const destEnd = d + destBufferSize.Get (); 1181 1182 while (s < sEnd) 1183 { 1184 1185 uint32 aChar = *s++; 1186 1187 if (swap) 1188 { 1189 aChar = ((aChar << 8) | (aChar >> 8)) & 0x0000FFFF; 1190 } 1191 1192 if ((aChar >= 0x0000D800) && (aChar <= 0x0000DBFF) && (s < sEnd)) 1193 { 1194 1195 uint32 aLow = *s; 1196 1197 if (swap) 1198 { 1199 aLow = ((aLow << 8) | (aLow >> 8)) & 0x0000FFFF; 1200 } 1201 1202 if ((aLow >= 0x0000DC00) && (aLow <= 0x0000DFFF)) 1203 { 1204 1205 aChar = ((aChar - 0x0000D800) << 10) + 1206 (aLow - 0x0000DC00) + 1207 0x00010000; 1208 1209 s++; 1210 1211 } 1212 1213 } 1214 1215 if (aChar > 0x7FFFFFFF) 1216 { 1217 aChar = kREPLACEMENT_CHARACTER; 1218 } 1219 1220 if (aChar < 0x00000080) 1221 { 1222 CheckSpaceLeftInBuffer (d, destEnd, 1); 1223 *(d++) = (uint8) aChar; 1224 } 1225 1226 else if (aChar < 0x00000800) 1227 { 1228 CheckSpaceLeftInBuffer (d, destEnd, 2); 1229 *(d++) = (uint8) ((aChar >> 6) | 0x000000C0); 1230 *(d++) = (uint8) ((aChar & 0x0000003F) | 0x00000080); 1231 } 1232 1233 else if (aChar < 0x00010000) 1234 { 1235 CheckSpaceLeftInBuffer (d, destEnd, 3); 1236 *(d++) = (uint8) ( (aChar >> 12) | 0x000000E0); 1237 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 1238 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 1239 } 1240 1241 else if (aChar < 0x00200000) 1242 { 1243 CheckSpaceLeftInBuffer (d, destEnd, 4); 1244 *(d++) = (uint8) ( (aChar >> 18) | 0x000000F0); 1245 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 1246 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 1247 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 1248 } 1249 1250 else if (aChar < 0x04000000) 1251 { 1252 CheckSpaceLeftInBuffer (d, destEnd, 5); 1253 *(d++) = (uint8) ( (aChar >> 24) | 0x000000F8); 1254 *(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080); 1255 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 1256 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 1257 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 1258 } 1259 1260 else 1261 { 1262 CheckSpaceLeftInBuffer (d, destEnd, 6); 1263 *(d++) = (uint8) ( (aChar >> 30) | 0x000000FC); 1264 *(d++) = (uint8) (((aChar >> 24) & 0x0000003F) | 0x00000080); 1265 *(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080); 1266 *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080); 1267 *(d++) = (uint8) (((aChar >> 6) & 0x0000003F) | 0x00000080); 1268 *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080); 1269 } 1270 1271 } 1272 1273 CheckSpaceLeftInBuffer (d, destEnd, 1); 1274 *d = 0; 1275 1276 Set (buffer.Buffer_char ()); 1277 1278 } 1279 1280 /*****************************************************************************/ 1281 1282 void dng_string::Clear () 1283 { 1284 1285 Set (NULL); 1286 1287 } 1288 1289 /*****************************************************************************/ 1290 1291 void dng_string::Truncate (uint32 maxBytes) 1292 { 1293 1294 uint32 len = Length (); 1295 1296 if (len > maxBytes) 1297 { 1298 1299 uint8 *s = fData.Buffer_uint8 (); 1300 1301 // Don't truncate on an extension character. Extensions characters 1302 // in UTF-8 have the 0x80 bit set and the 0x40 bit clear. 1303 1304 while (maxBytes > 0 && ((s [maxBytes]) & 0xC0) == 0x80) 1305 { 1306 1307 maxBytes--; 1308 1309 } 1310 1311 s [maxBytes] = 0; 1312 1313 } 1314 1315 } 1316 1317 /*****************************************************************************/ 1318 1319 bool dng_string::TrimTrailingBlanks () 1320 { 1321 1322 bool didTrim = false; 1323 1324 if (fData.Buffer ()) 1325 { 1326 1327 char *s = fData.Buffer_char (); 1328 1329 uint32 len = strlenAsUint32 (s); 1330 1331 while (len > 0 && s [len - 1] == ' ') 1332 { 1333 len--; 1334 didTrim = true; 1335 } 1336 1337 s [len] = 0; 1338 1339 } 1340 1341 return didTrim; 1342 1343 } 1344 1345 /*****************************************************************************/ 1346 1347 bool dng_string::TrimLeadingBlanks () 1348 { 1349 1350 bool didTrim = false; 1351 1352 const char *s = Get (); 1353 1354 while (*s == ' ') 1355 { 1356 s++; 1357 didTrim = true; 1358 } 1359 1360 if (didTrim) 1361 { 1362 Set (s); 1363 } 1364 1365 return didTrim; 1366 1367 } 1368 1369 /*****************************************************************************/ 1370 1371 bool dng_string::IsEmpty () const 1372 { 1373 1374 const char *s = Get (); 1375 1376 return *s == 0; 1377 1378 } 1379 1380 /*****************************************************************************/ 1381 1382 uint32 dng_string::Length () const 1383 { 1384 1385 const char *s = Get (); 1386 1387 return strlenAsUint32 (s); 1388 1389 } 1390 1391 /*****************************************************************************/ 1392 1393 bool dng_string::operator== (const dng_string &s) const 1394 { 1395 1396 const char *s1 = Get (); 1397 const char *s2 = s.Get (); 1398 1399 return strcmp (s1, s2) == 0; 1400 1401 } 1402 1403 /*****************************************************************************/ 1404 1405 bool dng_string::Matches (const char *t, 1406 const char *s, 1407 bool case_sensitive) 1408 { 1409 1410 while (*s != 0) 1411 { 1412 1413 char c1 = *(s++); 1414 char c2 = *(t++); 1415 1416 if (!case_sensitive) 1417 { 1418 c1 = ForceUppercase (c1); 1419 c2 = ForceUppercase (c2); 1420 } 1421 1422 if (c1 != c2) 1423 { 1424 return false; 1425 } 1426 1427 } 1428 1429 return (*t == 0); 1430 1431 } 1432 1433 /*****************************************************************************/ 1434 1435 bool dng_string::Matches (const char *s, 1436 bool case_sensitive) const 1437 { 1438 1439 return dng_string::Matches (Get (), s, case_sensitive); 1440 1441 } 1442 1443 /*****************************************************************************/ 1444 1445 bool dng_string::StartsWith (const char *s, 1446 bool case_sensitive) const 1447 { 1448 1449 const char *t = Get (); 1450 1451 while (*s != 0) 1452 { 1453 1454 char c1 = *(s++); 1455 char c2 = *(t++); 1456 1457 if (!case_sensitive) 1458 { 1459 c1 = ForceUppercase (c1); 1460 c2 = ForceUppercase (c2); 1461 } 1462 1463 if (c1 != c2) 1464 { 1465 return false; 1466 } 1467 1468 } 1469 1470 return true; 1471 1472 } 1473 1474 /*****************************************************************************/ 1475 1476 bool dng_string::EndsWith (const char *s, 1477 bool case_sensitive) const 1478 { 1479 1480 uint32 len1 = Length (); 1481 1482 uint32 len2 = strlenAsUint32 (s); 1483 1484 if (len1 < len2) 1485 { 1486 return false; 1487 } 1488 1489 const char *t = Get () + (len1 - len2); 1490 1491 while (*s != 0) 1492 { 1493 1494 char c1 = *(s++); 1495 char c2 = *(t++); 1496 1497 if (!case_sensitive) 1498 { 1499 c1 = ForceUppercase (c1); 1500 c2 = ForceUppercase (c2); 1501 } 1502 1503 if (c1 != c2) 1504 { 1505 return false; 1506 } 1507 1508 } 1509 1510 return true; 1511 1512 } 1513 1514 /*****************************************************************************/ 1515 1516 bool dng_string::Contains (const char *s, 1517 bool case_sensitive, 1518 int32 *match_offset) const 1519 { 1520 1521 if (match_offset) 1522 { 1523 *match_offset = -1; 1524 } 1525 1526 uint32 len1 = Length (); 1527 1528 uint32 len2 = strlenAsUint32 (s); 1529 1530 if (len1 < len2) 1531 { 1532 return false; 1533 } 1534 1535 uint32 offsets = len1 - len2; 1536 1537 for (uint32 offset = 0; offset <= offsets; offset++) 1538 { 1539 1540 const char *ss = s; 1541 const char *tt = Get () + offset; 1542 1543 while (*ss != 0) 1544 { 1545 1546 char c1 = *(ss++); 1547 char c2 = *(tt++); 1548 1549 if (!case_sensitive) 1550 { 1551 c1 = ForceUppercase (c1); 1552 c2 = ForceUppercase (c2); 1553 } 1554 1555 if (c1 != c2) 1556 { 1557 goto tryNextOffset; 1558 } 1559 1560 } 1561 1562 if (match_offset) 1563 { 1564 *match_offset = offset; 1565 } 1566 1567 return true; 1568 1569 tryNextOffset: ; 1570 1571 } 1572 1573 return false; 1574 1575 } 1576 1577 /*****************************************************************************/ 1578 1579 bool dng_string::Replace (const char *old_string, 1580 const char *new_string, 1581 bool case_sensitive) 1582 { 1583 1584 int32 match_offset = -1; 1585 1586 if (Contains (old_string, 1587 case_sensitive, 1588 &match_offset)) 1589 { 1590 1591 uint32 len1 = Length (); 1592 1593 uint32 len2 = strlenAsUint32 (old_string); 1594 uint32 len3 = strlenAsUint32 (new_string); 1595 1596 if (len2 == len3) 1597 { 1598 1599 DNG_REQUIRE (fData.Buffer_char (), "Bad string in dng_string::Replace"); 1600 1601 strncpy (fData.Buffer_char () + match_offset, 1602 new_string, 1603 len3); 1604 1605 } 1606 1607 else if (len2 > len3) 1608 { 1609 1610 DNG_REQUIRE (fData.Buffer_char (), "Bad string in dng_string::Replace"); 1611 1612 strncpy (fData.Buffer_char () + match_offset, 1613 new_string, 1614 len3); 1615 1616 const char *s = fData.Buffer_char () + match_offset + len2; 1617 char *d = fData.Buffer_char () + match_offset + len3; 1618 1619 uint32 extra = len1 - match_offset - len2 + 1; // + 1 for NULL termination 1620 1621 for (uint32 j = 0; j < extra; j++) 1622 { 1623 *(d++) = *(s++); 1624 } 1625 1626 } 1627 1628 else 1629 { 1630 1631 // "len1 - len2" cannot wrap around because we know that if this 1632 // string contains old_string, len1 >= len2 must hold. 1633 1634 dng_memory_data tempBuffer 1635 (dng_safe_uint32 (len1 - len2) + len3 + 1u); 1636 1637 if (match_offset) 1638 { 1639 1640 strncpy (tempBuffer.Buffer_char (), 1641 fData .Buffer_char (), 1642 match_offset); 1643 1644 } 1645 1646 if (len3) 1647 { 1648 1649 strncpy (tempBuffer.Buffer_char () + match_offset, 1650 new_string, 1651 len3); 1652 1653 } 1654 1655 uint32 extra = len1 - match_offset - len2 + 1; // + 1 for NULL termination 1656 1657 DNG_REQUIRE (fData.Buffer_char (), "Bad string in dng_string::Replace"); 1658 1659 strncpy (tempBuffer.Buffer_char () + match_offset + len3, 1660 fData .Buffer_char () + match_offset + len2, 1661 extra); 1662 1663 Set (tempBuffer.Buffer_char ()); 1664 1665 } 1666 1667 return true; 1668 1669 } 1670 1671 return false; 1672 1673 } 1674 1675 /*****************************************************************************/ 1676 1677 void dng_string::ReplaceChars (char oldChar, 1678 char newChar) 1679 { 1680 1681 if (fData.Buffer ()) 1682 { 1683 1684 uint32 len = Length (); 1685 1686 char *dPtr = fData.Buffer_char (); 1687 1688 for (uint32 j = 0; j < len; j++) 1689 { 1690 1691 if (dPtr [j] == oldChar) 1692 { 1693 1694 dPtr [j] = newChar; 1695 1696 } 1697 1698 } 1699 1700 } 1701 1702 } 1703 1704 /*****************************************************************************/ 1705 1706 bool dng_string::TrimLeading (const char *s, 1707 bool case_sensitive) 1708 { 1709 1710 if (StartsWith (s, case_sensitive)) 1711 { 1712 1713 Set (Get () + strlenAsUint32 (s)); 1714 1715 return true; 1716 1717 } 1718 1719 return false; 1720 1721 } 1722 1723 /*****************************************************************************/ 1724 1725 void dng_string::Append (const char *s) 1726 { 1727 1728 dng_safe_uint32 len2 (strlenAsUint32 (s)); 1729 1730 if (len2.Get ()) 1731 { 1732 1733 dng_safe_uint32 len1 (Length ()); 1734 1735 dng_memory_data temp (len1 + len2 + 1u); 1736 1737 char *buffer = temp.Buffer_char (); 1738 1739 if (len1.Get ()) 1740 { 1741 memcpy (buffer, Get (), len1.Get ()); 1742 } 1743 1744 memcpy (buffer + len1.Get (), s, (len2 + 1u).Get ()); 1745 1746 Set (buffer); 1747 1748 } 1749 1750 } 1751 1752 /*****************************************************************************/ 1753 1754 void dng_string::SetUppercase () 1755 { 1756 1757 if (fData.Buffer ()) 1758 { 1759 1760 uint32 len = Length (); 1761 1762 char *dPtr = fData.Buffer_char (); 1763 1764 for (uint32 j = 0; j < len; j++) 1765 { 1766 1767 char c = dPtr [j]; 1768 1769 if (c >= 'a' && c <= 'z') 1770 { 1771 1772 dPtr [j] = c - 'a' + 'A'; 1773 1774 } 1775 1776 } 1777 1778 } 1779 1780 } 1781 1782 /*****************************************************************************/ 1783 1784 void dng_string::SetLowercase () 1785 { 1786 1787 if (fData.Buffer ()) 1788 { 1789 1790 uint32 len = Length (); 1791 1792 char *dPtr = fData.Buffer_char (); 1793 1794 for (uint32 j = 0; j < len; j++) 1795 { 1796 1797 char c = dPtr [j]; 1798 1799 if (c >= 'A' && c <= 'Z') 1800 { 1801 1802 dPtr [j] = c - 'A' + 'a'; 1803 1804 } 1805 1806 } 1807 1808 } 1809 1810 } 1811 1812 /*****************************************************************************/ 1813 1814 void dng_string::SetLineEndings (char ending) 1815 { 1816 1817 if (fData.Buffer ()) 1818 { 1819 1820 const char *sPtr = fData.Buffer_char (); 1821 char *dPtr = fData.Buffer_char (); 1822 1823 while (*sPtr) 1824 { 1825 1826 char c = *(sPtr++); 1827 1828 char nc = sPtr [0]; 1829 1830 if ((c == '\r' && nc == '\n') || 1831 (c == '\n' && nc == '\r')) 1832 { 1833 1834 sPtr++; 1835 1836 if (ending) 1837 { 1838 *(dPtr++) = ending; 1839 } 1840 1841 } 1842 1843 else if (c == '\n' || 1844 c == '\r') 1845 { 1846 1847 if (ending) 1848 { 1849 *(dPtr++) = ending; 1850 } 1851 1852 } 1853 1854 else 1855 { 1856 1857 *(dPtr++) = c; 1858 1859 } 1860 1861 } 1862 1863 *dPtr = 0; 1864 1865 } 1866 1867 } 1868 1869 /*****************************************************************************/ 1870 1871 void dng_string::StripLowASCII () 1872 { 1873 1874 if (fData.Buffer ()) 1875 { 1876 1877 const char *sPtr = fData.Buffer_char (); 1878 char *dPtr = fData.Buffer_char (); 1879 1880 while (*sPtr) 1881 { 1882 1883 char c = *(sPtr++); 1884 1885 if (c == '\r' || c == '\n' || (uint8) c >= ' ') 1886 { 1887 1888 *(dPtr++) = c; 1889 1890 } 1891 1892 } 1893 1894 *dPtr = 0; 1895 1896 } 1897 1898 } 1899 1900 /*****************************************************************************/ 1901 1902 void dng_string::NormalizeAsCommaSeparatedNumbers () 1903 { 1904 1905 if (fData.Buffer ()) 1906 { 1907 1908 const char *sPtr = fData.Buffer_char (); 1909 char *dPtr = fData.Buffer_char (); 1910 1911 bool commaInserted = false; 1912 1913 while (*sPtr) 1914 { 1915 1916 uint32 c = DecodeUTF8 (sPtr); 1917 1918 // Support number formats such as "3", "+3.0", "-3.1416", "314.16e-2", 1919 // "0.31416E1", but no hex/octal number representations. 1920 1921 if (isdigit ((int) c) || c == '.' || c == '-' || c == '+' || c == 'e' || c == 'E') 1922 { 1923 1924 *(dPtr++) = (char) c; 1925 1926 if (commaInserted) 1927 { 1928 1929 commaInserted = false; 1930 1931 } 1932 1933 } 1934 1935 else if (!commaInserted) 1936 { 1937 1938 *(dPtr++) = ','; 1939 1940 commaInserted = true; 1941 1942 } 1943 1944 } 1945 1946 *dPtr = 0; 1947 1948 } 1949 1950 } 1951 1952 /******************************************************************************/ 1953 1954 // Unicode to low-ASCII strings table. 1955 1956 struct UnicodeToLowASCIIEntry 1957 { 1958 uint32 unicode; 1959 const char *ascii; 1960 }; 1961 1962 static const UnicodeToLowASCIIEntry kUnicodeToLowASCII [] = 1963 { 1964 { 0x00A0, " " }, 1965 { 0x00A1, "!" }, 1966 { 0x00A9, "(C)" }, 1967 { 0x00AA, "a" }, 1968 { 0x00AB, "<<" }, 1969 { 0x00AC, "!" }, 1970 { 0x00AE, "(R)" }, 1971 { 0x00B0, "dg" }, 1972 { 0x00B1, "+-" }, 1973 { 0x00B7, "." }, 1974 { 0x00BA, "o" }, 1975 { 0x00BB, ">>" }, 1976 { 0x00BF, "?" }, 1977 { 0x00C0, "A" }, 1978 { 0x00C1, "A" }, 1979 { 0x00C2, "A" }, 1980 { 0x00C3, "A" }, 1981 { 0x00C4, "A" }, 1982 { 0x00C5, "A" }, 1983 { 0x00C6, "AE" }, 1984 { 0x00C7, "C" }, 1985 { 0x00C8, "E" }, 1986 { 0x00C9, "E" }, 1987 { 0x00CA, "E" }, 1988 { 0x00CB, "E" }, 1989 { 0x00CC, "I" }, 1990 { 0x00CD, "I" }, 1991 { 0x00CE, "I" }, 1992 { 0x00CF, "I" }, 1993 { 0x00D1, "N" }, 1994 { 0x00D2, "O" }, 1995 { 0x00D3, "O" }, 1996 { 0x00D4, "O" }, 1997 { 0x00D5, "O" }, 1998 { 0x00D6, "O" }, 1999 { 0x00D8, "O" }, 2000 { 0x00D9, "U" }, 2001 { 0x00DA, "U" }, 2002 { 0x00DB, "U" }, 2003 { 0x00DC, "U" }, 2004 { 0x00DD, "Y" }, 2005 { 0x00E0, "a" }, 2006 { 0x00E1, "a" }, 2007 { 0x00E2, "a" }, 2008 { 0x00E3, "a" }, 2009 { 0x00E4, "a" }, 2010 { 0x00E5, "a" }, 2011 { 0x00E6, "ae" }, 2012 { 0x00E7, "c" }, 2013 { 0x00E8, "e" }, 2014 { 0x00E9, "e" }, 2015 { 0x00EA, "e" }, 2016 { 0x00EB, "e" }, 2017 { 0x00EC, "i" }, 2018 { 0x00ED, "i" }, 2019 { 0x00EE, "i" }, 2020 { 0x00EF, "i" }, 2021 { 0x00F1, "n" }, 2022 { 0x00F2, "o" }, 2023 { 0x00F3, "o" }, 2024 { 0x00F4, "o" }, 2025 { 0x00F5, "o" }, 2026 { 0x00F6, "o" }, 2027 { 0x00F7, "/" }, 2028 { 0x00F8, "o" }, 2029 { 0x00F9, "u" }, 2030 { 0x00FA, "u" }, 2031 { 0x00FB, "u" }, 2032 { 0x00FC, "u" }, 2033 { 0x00FD, "y" }, 2034 { 0x00FF, "y" }, 2035 { 0x0131, "i" }, 2036 { 0x0152, "OE" }, 2037 { 0x0153, "oe" }, 2038 { 0x0178, "Y" }, 2039 { 0x2013, "-" }, 2040 { 0x2014, "-" }, 2041 { 0x2018, "'" }, 2042 { 0x2019, "'" }, 2043 { 0x201A, "," }, 2044 { 0x201C, "\"" }, 2045 { 0x201D, "\"" }, 2046 { 0x201E, ",," }, 2047 { 0x2022, "." }, 2048 { 0x2026, "..." }, 2049 { 0x2039, "<" }, 2050 { 0x203A, ">" }, 2051 { 0x2044, "/" }, 2052 { 0x2122, "TM" }, 2053 { 0x2206, "d" }, 2054 { 0x2211, "S" }, 2055 { 0x2260, "!=" }, 2056 { 0x2264, "<=" }, 2057 { 0x2265, ">=" }, 2058 { 0x2318, "#" }, 2059 { 0xFB01, "fi" }, 2060 { 0xFB02, "fl" } 2061 }; 2062 2063 /******************************************************************************/ 2064 2065 void dng_string::ForceASCII () 2066 { 2067 2068 if (!IsASCII ()) 2069 { 2070 2071 dng_safe_uint32 tempBufferSize = dng_safe_uint32 (Length ()) * 3u + 1u; 2072 2073 dng_memory_data tempBuffer (tempBufferSize); 2074 2075 char *dPtr = tempBuffer.Buffer_char (); 2076 char * const destEnd = dPtr + tempBufferSize.Get (); 2077 2078 const char *sPtr = Get (); 2079 2080 while (*sPtr) 2081 { 2082 2083 uint32 x = DecodeUTF8 (sPtr); 2084 2085 if (x <= 0x007F) 2086 { 2087 2088 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 2089 *(dPtr++) = (char) x; 2090 2091 } 2092 2093 else 2094 { 2095 2096 const char *ascii = NULL; 2097 2098 const uint32 kTableEntrys = sizeof (kUnicodeToLowASCII ) / 2099 sizeof (kUnicodeToLowASCII [0]); 2100 2101 for (uint32 entry = 0; entry < kTableEntrys; entry++) 2102 { 2103 2104 if (kUnicodeToLowASCII [entry] . unicode == x) 2105 { 2106 2107 ascii = kUnicodeToLowASCII [entry] . ascii; 2108 2109 break; 2110 2111 } 2112 2113 } 2114 2115 if (ascii) 2116 { 2117 2118 while (*ascii) 2119 { 2120 2121 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 2122 *(dPtr++) = *(ascii++); 2123 2124 } 2125 2126 } 2127 2128 else 2129 { 2130 2131 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 2132 *(dPtr++) ='?'; 2133 2134 } 2135 2136 } 2137 2138 } 2139 2140 CheckSpaceLeftInBuffer (dPtr, destEnd, 1); 2141 *dPtr = 0; 2142 2143 Set (tempBuffer.Buffer_char ()); 2144 2145 } 2146 2147 } 2148 2149 /******************************************************************************/ 2150 2151 static dng_std_mutex gProtectUCCalls; 2152 2153 /******************************************************************************/ 2154 2155 int32 dng_string::Compare (const dng_string &s, 2156 bool digitsAsNumber) const 2157 { 2158 2159 #if qMacOS 2160 2161 { 2162 2163 dng_memory_data aStrA; 2164 dng_memory_data aStrB; 2165 2166 uint32 aLenA = this->Get_UTF16 (aStrA); 2167 uint32 aLenB = s .Get_UTF16 (aStrB); 2168 2169 if (aLenA > 0) 2170 { 2171 2172 if (aLenB > 0) 2173 { 2174 2175 // For some Mac OS versions anyway, UCCompareTextDefault is not 2176 // thread safe. 2177 2178 dng_lock_std_mutex lockMutex (gProtectUCCalls); 2179 2180 UCCollateOptions aOptions = kUCCollateStandardOptions | 2181 kUCCollatePunctuationSignificantMask; 2182 2183 if (digitsAsNumber) 2184 { 2185 2186 aOptions |= kUCCollateDigitsOverrideMask | 2187 kUCCollateDigitsAsNumberMask; 2188 2189 } 2190 2191 SInt32 aOrder = -1; 2192 2193 Boolean aEqual = false; 2194 2195 OSStatus searchStatus = ::UCCompareTextDefault (aOptions, 2196 aStrA.Buffer_uint16 (), 2197 aLenA, 2198 aStrB.Buffer_uint16 (), 2199 aLenB, 2200 &aEqual, 2201 &aOrder); 2202 2203 if (searchStatus == noErr) 2204 { 2205 2206 if (aEqual || (aOrder == 0)) 2207 { 2208 return 0; 2209 } 2210 2211 else 2212 { 2213 return (aOrder > 0) ? 1 : -1; 2214 } 2215 2216 } 2217 2218 else 2219 { 2220 2221 DNG_REPORT ("UCCompareTextDefault failed"); 2222 2223 return -1; 2224 2225 } 2226 2227 } 2228 2229 else 2230 { 2231 return 1; 2232 } 2233 2234 } 2235 2236 else 2237 { 2238 2239 if (aLenB > 0) 2240 { 2241 return -1; 2242 } 2243 2244 else 2245 { 2246 return 0; 2247 } 2248 2249 } 2250 2251 } 2252 2253 #elif qWinOS 2254 2255 { 2256 2257 dng_memory_data aStrA; 2258 dng_memory_data aStrB; 2259 2260 uint32 aLenA = this->Get_UTF16 (aStrA); 2261 uint32 aLenB = s .Get_UTF16 (aStrB); 2262 2263 if (aLenA > 0) 2264 { 2265 2266 if (aLenB > 0) 2267 { 2268 2269 LCID locale = LOCALE_SYSTEM_DEFAULT; 2270 2271 DWORD aFlags = NORM_IGNOREWIDTH; 2272 2273 if (digitsAsNumber) 2274 { 2275 aFlags |= SORT_DIGITSASNUMBERS; 2276 } 2277 2278 int aOrder = ::CompareStringW (locale, 2279 aFlags, 2280 (const WCHAR *) aStrA.Buffer_uint16 (), 2281 aLenA, 2282 (const WCHAR *) aStrB.Buffer_uint16 (), 2283 aLenB); 2284 2285 if (aOrder == CSTR_EQUAL) 2286 { 2287 return 0; 2288 } 2289 2290 else if (aOrder == CSTR_GREATER_THAN) 2291 { 2292 return 1; 2293 } 2294 2295 else 2296 { 2297 return -1; 2298 } 2299 2300 } 2301 2302 else 2303 { 2304 return 1; 2305 } 2306 2307 } 2308 2309 else 2310 { 2311 2312 if (aLenB > 0) 2313 { 2314 return -1; 2315 } 2316 else 2317 { 2318 return 0; 2319 } 2320 2321 } 2322 2323 } 2324 2325 #else 2326 2327 // Fallback to a pure Unicode sort order. 2328 2329 { 2330 2331 for (uint32 pass = 0; pass < 2; pass++) 2332 { 2333 2334 const char *aPtr = Get (); 2335 const char *bPtr = s.Get (); 2336 2337 while (*aPtr || *bPtr) 2338 { 2339 2340 if (!bPtr) 2341 { 2342 return 1; 2343 } 2344 2345 else if (!aPtr) 2346 { 2347 return -1; 2348 } 2349 2350 uint32 a = DecodeUTF8 (aPtr); 2351 uint32 b = DecodeUTF8 (bPtr); 2352 2353 // Ignore case on first compare pass. 2354 2355 if (pass == 0) 2356 { 2357 2358 if (a >= (uint32) 'a' && a <= (uint32) 'z') 2359 { 2360 a = a - (uint32) 'a' + (uint32) 'A'; 2361 } 2362 2363 if (b >= (uint32) 'a' && b <= (uint32) 'z') 2364 { 2365 b = b - (uint32) 'a' + (uint32) 'A'; 2366 } 2367 2368 } 2369 2370 if (digitsAsNumber) 2371 { 2372 2373 uint32 aNumber = 0; 2374 uint32 aDigits = 0; 2375 2376 if (a >= (uint32) '0' && a <= (uint32) '9') 2377 { 2378 2379 aNumber = a - (uint32) '0'; 2380 aDigits = 1; 2381 2382 while (aDigits < 6 && *aPtr >= '0' && *aPtr <= '9') 2383 { 2384 aNumber = aNumber * 10 + ((uint32) *aPtr - 2385 (uint32) '0'); 2386 aDigits++; 2387 aPtr++; 2388 } 2389 2390 } 2391 2392 uint32 bNumber = 0; 2393 uint32 bDigits = 0; 2394 2395 if (b >= (uint32) '0' && b <= (uint32) '9') 2396 { 2397 2398 bNumber = b - (uint32) '0'; 2399 bDigits = 1; 2400 2401 while (bDigits < 6 && *bPtr >= '0' && *bPtr <= '9') 2402 { 2403 bNumber = bNumber * 10 + ((uint32) *bPtr - 2404 (uint32) '0'); 2405 bDigits++; 2406 bPtr++; 2407 } 2408 2409 } 2410 2411 if (aDigits > 0 && bDigits > 0) 2412 { 2413 2414 if (aNumber > bNumber) 2415 { 2416 return 1; 2417 } 2418 2419 if (aNumber < bNumber) 2420 { 2421 return -1; 2422 } 2423 2424 if (aDigits > bDigits) 2425 { 2426 return 1; 2427 } 2428 2429 if (aDigits < bDigits) 2430 { 2431 return -1; 2432 } 2433 2434 continue; 2435 2436 } 2437 2438 } 2439 2440 if (a > b) 2441 { 2442 return 1; 2443 } 2444 2445 else if (a < b) 2446 { 2447 return -1; 2448 } 2449 2450 } 2451 2452 } 2453 2454 } 2455 2456 #endif 2457 2458 return 0; 2459 2460 } 2461 2462 /*****************************************************************************/