File indexing completed on 2025-01-19 03:55:17

0001 /*****************************************************************************/
0002 // Copyright 2006-2019 Adobe Systems Incorporated
0003 // All Rights Reserved.
0004 //
0005 // NOTICE:  Adobe permits you to use, modify, and distribute this file in
0006 // accordance with the terms of the Adobe license agreement accompanying it.
0007 /*****************************************************************************/
0008 
0009 #include "dng_string.h"
0010 
0011 #include "dng_assertions.h"
0012 #include "dng_exceptions.h"
0013 #include "dng_flags.h"
0014 #include "dng_mutex.h"
0015 #include "dng_utils.h"
0016 #include "dng_safe_arithmetic.h"
0017 
0018 #if qMacOS
0019 #include <CoreServices/CoreServices.h>
0020 #endif
0021 
0022 #if qWinOS
0023 #   include <windows.h>
0024 #   ifndef SORT_DIGITSASNUMBERS
0025 //  NOTE: SORT_DIGITSASNUMBERS is available since win7
0026 #       define SORT_DIGITSASNUMBERS 8
0027 #   endif
0028 #endif
0029 
0030 #if qLinux || qiPhone || qAndroid
0031 #include <ctype.h> // for isdigit
0032 #endif
0033 
0034 /*****************************************************************************/
0035 
0036 const uint32 kREPLACEMENT_CHARACTER = 0x0000FFFD;
0037 
0038 /*****************************************************************************/
0039 
0040 // Returns the length of the zero-terminated string 's'. Throws a dng_exception
0041 // if the length of 's' is too large to be represented as a uint32.
0042 
0043 static uint32 strlenAsUint32 (const char *s)
0044     {
0045 
0046     uint32 lengthAsUint32 = 0;
0047 
0048     ConvertUnsigned (strlen (s), &lengthAsUint32);
0049 
0050     return lengthAsUint32;
0051 
0052     }
0053 
0054 /*****************************************************************************/
0055 
0056 // Checks whether there is enough space left in the buffer pointed to by
0057 // 'currentPos' to write at least 'space' elements of type T (to positions
0058 // currentPos[0] through currentPos[space - 1]. Throws a dng_exception if
0059 // there is not enough space left in the buffer. 'bufferEnd' should point one
0060 // element beyond the end of the buffer. For example, if the buffer is "T
0061 // buffer[3];", then bufferEnd should point to T + 3.
0062 
0063 template <class T>
0064 static void CheckSpaceLeftInBuffer(const T *currentPos,
0065                                    const T *bufferEnd,
0066                                    size_t space)
0067     {
0068 
0069     if (bufferEnd < currentPos || static_cast<size_t> (bufferEnd - currentPos) < space)
0070         {
0071         ThrowMemoryFull ("Buffer overrun");
0072         }
0073 
0074     }
0075 
0076 /*****************************************************************************/
0077 
0078 #if qMacOS
0079 
0080 static void Assign_Multibyte (dng_string &dngString,
0081                               const char *otherString,
0082                               TextEncoding encoding)
0083     {
0084 
0085     dng_safe_uint32 aSize (strlenAsUint32 (otherString));
0086 
0087     if (aSize.Get () > 0)
0088         {
0089 
0090         dng_safe_uint32 aBufSize = aSize * 6u + 256u;
0091 
0092         dng_memory_data aBuf (aBufSize + 1u);
0093 
0094         UnicodeMapping aMapping;
0095 
0096         aMapping.unicodeEncoding = ::CreateTextEncoding (kTextEncodingUnicodeV3_0,
0097                                                          kUnicodeNoSubset,
0098                                                          kUnicodeUTF8Format);
0099 
0100         aMapping.otherEncoding   = encoding;
0101         aMapping.mappingVersion  = kUnicodeUseLatestMapping;
0102 
0103         TextToUnicodeInfo aInfo = NULL;
0104 
0105         if (::CreateTextToUnicodeInfo (&aMapping, &aInfo) == noErr)
0106             {
0107 
0108             ByteCount aInput  = 0;
0109             ByteCount aOutput = 0;
0110 
0111             ::ConvertFromTextToUnicode (aInfo,
0112                                         aSize.Get (),
0113                                         otherString,
0114                                         kUnicodeUseFallbacksMask |
0115                                         kUnicodeLooseMappingsMask,
0116                                         0,
0117                                         NULL,
0118                                         NULL,
0119                                         NULL,
0120                                         aBufSize.Get (),
0121                                         &aInput,
0122                                         &aOutput,
0123                                         (UniChar *) aBuf.Buffer ());
0124 
0125             ::DisposeTextToUnicodeInfo (&aInfo);
0126 
0127             if (aOutput > 0 && aOutput <= aBufSize.Get ())
0128                 {
0129 
0130                 char *aBufChar = aBuf.Buffer_char ();
0131 
0132                 aBufChar [aOutput] = 0;
0133 
0134                 dngString.Set (aBufChar);
0135 
0136                 return;
0137 
0138                 }
0139 
0140             }
0141 
0142         }
0143 
0144     dngString.Clear ();
0145 
0146     }
0147 
0148 static uint32 Extract_Multibyte (const dng_string &dngString,
0149                                  dng_memory_data &buffer,
0150                                  TextEncoding encoding)
0151     {
0152 
0153     dng_safe_uint32 aSize (dngString.Length ());
0154 
0155     if (aSize.Get () > 0)
0156         {
0157 
0158         dng_safe_uint32 aBufSize = aSize * 2u + 256u;
0159 
0160         dng_memory_data tempBuffer (aBufSize);
0161 
0162         UnicodeMapping aMapping;
0163 
0164         aMapping.unicodeEncoding = ::CreateTextEncoding (kTextEncodingUnicodeV3_0,
0165                                                          kUnicodeNoSubset,
0166                                                          kUnicodeUTF8Format);
0167 
0168         aMapping.otherEncoding   = encoding;
0169         aMapping.mappingVersion  = kUnicodeUseLatestMapping;
0170 
0171         UnicodeToTextInfo aInfo = NULL;
0172 
0173         if (::CreateUnicodeToTextInfo (&aMapping, &aInfo) == noErr)
0174             {
0175 
0176             ByteCount aInput  = 0;
0177             ByteCount aOutput = 0;
0178 
0179             ::ConvertFromUnicodeToText (aInfo,
0180                                         aSize.Get (),
0181                                         (const UniChar *) dngString.Get (),
0182                                         kUnicodeUseFallbacksMask  |
0183                                         kUnicodeLooseMappingsMask |
0184                                         kUnicodeDefaultDirectionMask,
0185                                         0,
0186                                         NULL,
0187                                         NULL,
0188                                         NULL,
0189                                         aBufSize.Get (),
0190                                         &aInput,
0191                                         &aOutput,
0192                                         tempBuffer.Buffer_char ());
0193 
0194             ::DisposeUnicodeToTextInfo (&aInfo);
0195 
0196             if (aOutput > 0)
0197                 {
0198 
0199                 uint32 aOutputAsUint32 = 0;
0200 
0201                 ConvertUnsigned (aOutput, &aOutputAsUint32);
0202 
0203                 buffer.Allocate (dng_safe_uint32 (aOutputAsUint32) + 1u);
0204 
0205                 memcpy (buffer.Buffer (),
0206                         tempBuffer.Buffer (),
0207                         aOutputAsUint32);
0208 
0209                 buffer.Buffer_char () [aOutputAsUint32] = 0;
0210 
0211                 return aOutputAsUint32;
0212 
0213                 }
0214 
0215             }
0216 
0217         }
0218 
0219     buffer.Allocate (1);
0220 
0221     buffer.Buffer_char () [0] = 0;
0222 
0223     return 0;
0224 
0225     }
0226 
0227 static void Assign_SystemEncoding (dng_string &dngString,
0228                                    const char *otherString)
0229     {
0230 
0231     TextEncoding aEncoding;
0232 
0233     ::UpgradeScriptInfoToTextEncoding (smSystemScript,
0234                                        kTextLanguageDontCare,
0235                                        kTextRegionDontCare,
0236                                        NULL,
0237                                        &aEncoding);
0238 
0239     Assign_Multibyte (dngString,
0240                       otherString,
0241                       aEncoding);
0242 
0243     }
0244 
0245 static uint32 Extract_SystemEncoding (const dng_string &dngString,
0246                                       dng_memory_data &buffer)
0247     {
0248 
0249     TextEncoding aEncoding;
0250 
0251     ::UpgradeScriptInfoToTextEncoding (smSystemScript,
0252                                        kTextLanguageDontCare,
0253                                        kTextRegionDontCare,
0254                                        NULL,
0255                                        &aEncoding);
0256 
0257     return Extract_Multibyte (dngString,
0258                               buffer,
0259                               aEncoding);
0260 
0261     }
0262 
0263 static void Assign_JIS_X208_1990 (dng_string &dngString,
0264                                   const char *otherString)
0265     {
0266 
0267     Assign_Multibyte (dngString,
0268                       otherString,
0269                       kTextEncodingJIS_X0208_90);
0270 
0271     }
0272 
0273 #endif
0274 
0275 /*****************************************************************************/
0276 
0277 #if qWinOS
0278 
0279 static void Assign_Multibyte (dng_string &dngString,
0280                               const char *otherString,
0281                               UINT encoding)
0282     {
0283 
0284     DNG_ASSERT (sizeof (WCHAR) == 2, "WCHAR must be 2 bytes");
0285 
0286     const dng_safe_uint32 otherStringLen (strlenAsUint32 (otherString));
0287 
0288     const dng_safe_int32 aSize (otherStringLen);
0289 
0290     if (aSize.Get () > 0)
0291         {
0292 
0293         dng_safe_uint32 aBufCharsUint32 = otherStringLen * 3u + 128u;
0294 
0295         dng_safe_int32 aBufChars (aBufCharsUint32);
0296 
0297         dng_safe_uint32 bytesToAllocate = (aBufCharsUint32 + 1u) * 2u;
0298 
0299         dng_memory_data aBuf (bytesToAllocate);
0300 
0301         int aResult = ::MultiByteToWideChar (encoding,
0302                                              0,
0303                                              otherString,
0304                                              aSize.Get (),
0305                                              (WCHAR *) aBuf.Buffer (),
0306                                              aBufChars.Get ());
0307 
0308         if (aResult > 0 && aResult <= aBufChars.Get ())
0309             {
0310 
0311             uint16 * aUTF16 = aBuf.Buffer_uint16 ();
0312 
0313             aUTF16 [aResult] = 0;
0314 
0315             dngString.Set_UTF16 (aUTF16);
0316 
0317             return;
0318 
0319             }
0320 
0321         }
0322 
0323     dngString.Clear ();
0324 
0325     }
0326 
0327 static uint32 Extract_Multibyte (const dng_string &dngString,
0328                                  dng_memory_data &buffer,
0329                                  UINT encoding)
0330     {
0331 
0332     DNG_ASSERT (sizeof (WCHAR) == 2, "WCHAR must be 2 bytes");
0333 
0334     dng_memory_data sBuffer;
0335 
0336     int aCount = dngString.Get_UTF16 (sBuffer);
0337 
0338     if (aCount < 0)
0339         {
0340         return 0;
0341         }
0342 
0343     dng_safe_uint32 aCountAsUint32 (static_cast<uint32> (aCount));
0344 
0345     dng_safe_uint32 dBufSize = aCountAsUint32 * 2u + 256u;
0346 
0347     dng_memory_data dBuffer (dBufSize);
0348 
0349     int aResult = ::WideCharToMultiByte (encoding,
0350                                          0,
0351                                          (WCHAR *) sBuffer.Buffer (),
0352                                          aCount,
0353                                          dBuffer.Buffer_char (),
0354                                          dBufSize.Get (),
0355                                          NULL,
0356                                          NULL);
0357 
0358     if (aResult < 0)
0359         aResult = 0;
0360 
0361     dng_safe_uint32 aResultAsUint32 (static_cast<uint32> (aResult));
0362 
0363     buffer.Allocate (aResultAsUint32 + 1u);
0364 
0365     memcpy (buffer.Buffer (),
0366             dBuffer.Buffer (),
0367             aResult);
0368 
0369     buffer.Buffer_char () [aResult] = 0;
0370 
0371     return aResultAsUint32.Get ();
0372 
0373     }
0374 
0375 static void Assign_SystemEncoding (dng_string &dngString,
0376                                    const char *otherString)
0377     {
0378 
0379     Assign_Multibyte (dngString,
0380                       otherString,
0381                       ::GetACP ());
0382 
0383     }
0384 
0385 static uint32 Extract_SystemEncoding (const dng_string &dngString,
0386                                       dng_memory_data &buffer)
0387     {
0388 
0389     return Extract_Multibyte (dngString,
0390                               buffer,
0391                               ::GetACP ());
0392 
0393     }
0394 
0395 static void Assign_JIS_X208_1990 (dng_string &dngString,
0396                                   const char *otherString)
0397     {
0398 
0399     // From MSDN documentation: 20932 = JIS X 0208-1990 & 0121-1990
0400 
0401     const UINT kJIS = 20932;
0402 
0403     Assign_Multibyte (dngString,
0404                       otherString,
0405                       kJIS);
0406 
0407     }
0408 
0409 #endif
0410 
0411 /*****************************************************************************/
0412 
0413 static bool IsASCII (const char *s)
0414     {
0415 
0416     if (!s)
0417         {
0418 
0419         return true;
0420 
0421         }
0422 
0423     while (true)
0424         {
0425 
0426         uint8 c = (uint8) *(s++);
0427 
0428         if (c == 0)
0429             {
0430 
0431             break;
0432 
0433             }
0434 
0435         if (c & 0x80)
0436             {
0437 
0438             return false;
0439 
0440             }
0441 
0442         }
0443 
0444     return true;
0445 
0446     }
0447 
0448 /*****************************************************************************/
0449 
0450 dng_string::dng_string ()
0451 
0452     :   fData ()
0453 
0454     {
0455 
0456     }
0457 
0458 /*****************************************************************************/
0459 
0460 dng_string::dng_string (const dng_string &s)
0461 
0462     :   fData ()
0463 
0464     {
0465 
0466     Set (s.Get ());
0467 
0468     }
0469 
0470 /*****************************************************************************/
0471 
0472 dng_string & dng_string::operator= (const dng_string &s)
0473     {
0474 
0475     if (this != &s)
0476         {
0477 
0478         Set (s.Get ());
0479 
0480         }
0481 
0482     return *this;
0483 
0484     }
0485 
0486 /*****************************************************************************/
0487 
0488 dng_string::~dng_string ()
0489     {
0490 
0491     }
0492 
0493 /*****************************************************************************/
0494 
0495 const char * dng_string::Get () const
0496     {
0497 
0498     if (fData.Buffer ())
0499         {
0500 
0501         return fData.Buffer_char ();
0502 
0503         }
0504 
0505     return "";
0506 
0507     }
0508 
0509 /*****************************************************************************/
0510 
0511 bool dng_string::IsASCII () const
0512     {
0513 
0514     return ::IsASCII (Get ());
0515 
0516     }
0517 
0518 /*****************************************************************************/
0519 
0520 void dng_string::Set (const char *s)
0521     {
0522 
0523     // Measure the new length.
0524 
0525     uint32 newLen = (s != NULL ? strlenAsUint32 (s) : 0);
0526 
0527     // If it is a NULL string, then clear the buffer.
0528 
0529     if (newLen == 0)
0530         {
0531 
0532         fData.Clear ();
0533 
0534         }
0535 
0536     // Else we need to copy the bytes.
0537 
0538     else
0539         {
0540 
0541         uint32 oldLen = Length ();
0542 
0543         // We might be setting this string to a sub-string of itself,
0544         // so don't reallocate the data unless the string is getting
0545         // longer.
0546 
0547         if (newLen > oldLen)
0548             {
0549 
0550             fData.Clear ();
0551 
0552             fData.Allocate (dng_safe_uint32 (newLen) + 1u);
0553 
0554             }
0555 
0556         char *d = fData.Buffer_char ();
0557 
0558         for (uint32 k = 0; k <= newLen; k++)
0559             {
0560 
0561             d [k] = s [k];
0562 
0563             }
0564 
0565         }
0566 
0567     }
0568 
0569 /*****************************************************************************/
0570 
0571 void dng_string::Set_ASCII (const char *s)
0572     {
0573 
0574     if (::IsASCII (s))
0575         {
0576 
0577         Set (s);
0578 
0579         }
0580 
0581     else
0582         {
0583 
0584         Set_SystemEncoding (s);
0585 
0586         }
0587 
0588     }
0589 
0590 /*****************************************************************************/
0591 
0592 void dng_string::Set_UTF8 (const char *s)
0593     {
0594 
0595     dng_safe_uint32 len (strlenAsUint32 (s));
0596 
0597     const char *sEnd = s + len.Get ();
0598 
0599     // Worst case expansion is 1-byte characters expanding to
0600     // replacement character, which requires 3 bytes.
0601 
0602     const dng_safe_uint32 destBufferLength = len * 3u + 1u;
0603 
0604     dng_memory_data buffer (destBufferLength);
0605 
0606     uint8 *d = buffer.Buffer_uint8 ();
0607     uint8 * const destEnd = d + destBufferLength.Get ();
0608 
0609     while (s < sEnd)
0610         {
0611 
0612         uint32 aChar = DecodeUTF8 (s, (uint32) (sEnd - s));
0613 
0614         if (aChar > 0x7FFFFFFF)
0615             {
0616             aChar = kREPLACEMENT_CHARACTER;
0617             }
0618 
0619         #if qDNGValidate
0620 
0621         if (aChar == kREPLACEMENT_CHARACTER)
0622             {
0623             ReportWarning ("Expected UTF-8 value is not valid UTF-8 (or contains a kREPLACEMENT_CHARACTER)");
0624             }
0625 
0626         #endif
0627 
0628         if (aChar < 0x00000080)
0629             {
0630             CheckSpaceLeftInBuffer (d, destEnd, 1);
0631             *(d++) = (uint8) aChar;
0632             }
0633 
0634         else if (aChar < 0x00000800)
0635             {
0636             CheckSpaceLeftInBuffer (d, destEnd, 2);
0637             *(d++) = (uint8) ((aChar >> 6) | 0x000000C0);
0638             *(d++) = (uint8) ((aChar & 0x0000003F) | 0x00000080);
0639             }
0640 
0641         else if (aChar < 0x00010000)
0642             {
0643             CheckSpaceLeftInBuffer (d, destEnd, 3);
0644             *(d++) = (uint8) ( (aChar >> 12) | 0x000000E0);
0645             *(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
0646             *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
0647             }
0648 
0649         else if (aChar < 0x00200000)
0650             {
0651             CheckSpaceLeftInBuffer (d, destEnd, 4);
0652             *(d++) = (uint8) ( (aChar >> 18) | 0x000000F0);
0653             *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
0654             *(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
0655             *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
0656             }
0657 
0658         else if (aChar < 0x04000000)
0659             {
0660             CheckSpaceLeftInBuffer (d, destEnd, 5);
0661             *(d++) = (uint8) ( (aChar >> 24) | 0x000000F8);
0662             *(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080);
0663             *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
0664             *(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
0665             *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
0666             }
0667 
0668         else
0669             {
0670             CheckSpaceLeftInBuffer (d, destEnd, 6);
0671             *(d++) = (uint8) ( (aChar >> 30) | 0x000000FC);
0672             *(d++) = (uint8) (((aChar >> 24) & 0x0000003F) | 0x00000080);
0673             *(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080);
0674             *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
0675             *(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
0676             *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
0677             }
0678 
0679         }
0680 
0681     CheckSpaceLeftInBuffer (d, destEnd, 1);
0682     *d = 0;
0683 
0684     Set (buffer.Buffer_char ());
0685 
0686     }
0687 
0688 /*****************************************************************************/
0689 
0690 uint32 dng_string::Get_SystemEncoding (dng_memory_data &buffer) const
0691     {
0692 
0693     if (IsASCII ())
0694         {
0695 
0696         dng_safe_uint32 len (Length ());
0697 
0698         const dng_safe_uint32 destBufferLength = len + 1u;
0699 
0700         buffer.Allocate (destBufferLength);
0701 
0702         memcpy (buffer.Buffer (), Get (), destBufferLength.Get ());
0703 
0704         return len.Get ();
0705 
0706         }
0707 
0708     else
0709         {
0710 
0711         #if qMacOS || qWinOS
0712 
0713         return Extract_SystemEncoding (*this, buffer);
0714 
0715         #else
0716 
0717         // Fallback logic to force the string to ASCII.
0718 
0719         dng_string temp (*this);
0720 
0721         temp.ForceASCII ();
0722 
0723         return temp.Get_SystemEncoding (buffer);
0724 
0725         #endif
0726 
0727         }
0728 
0729     }
0730 
0731 /*****************************************************************************/
0732 
0733 void dng_string::Set_SystemEncoding (const char *s)
0734     {
0735 
0736     if (::IsASCII (s))
0737         {
0738 
0739         Set (s);
0740 
0741         }
0742 
0743     else
0744         {
0745 
0746         #if qMacOS || qWinOS
0747 
0748         Assign_SystemEncoding (*this, s);
0749 
0750         #else
0751 
0752         // Fallback logic that just grabs the ASCII characters and
0753         // ignores the non-ASCII characters.
0754 
0755         dng_safe_uint32 len = strlenAsUint32 (s);
0756 
0757         const dng_safe_uint32 destBufferLength = len + 1u;
0758 
0759         dng_memory_data buffer (destBufferLength);
0760 
0761         uint8 *d = buffer.Buffer_uint8 ();
0762         uint8 * const destEnd = d + destBufferLength.Get ();
0763 
0764         while (*s)
0765             {
0766 
0767             uint8 c = (uint8) *(s++);
0768 
0769             if ((c & 0x80) == 0)
0770                 {
0771 
0772                 CheckSpaceLeftInBuffer (d, destEnd, 1);
0773                 *(d++) = c;
0774 
0775                 }
0776 
0777             }
0778 
0779         CheckSpaceLeftInBuffer (d, destEnd, 1);
0780         *d = 0;
0781 
0782         Set (buffer.Buffer_char ());
0783 
0784         #endif
0785 
0786         }
0787 
0788     }
0789 
0790 /*****************************************************************************/
0791 
0792 bool dng_string::ValidSystemEncoding () const
0793     {
0794 
0795     if (IsASCII ())
0796         {
0797 
0798         return true;
0799 
0800         }
0801 
0802     dng_memory_data buffer;
0803 
0804     Get_SystemEncoding (buffer);
0805 
0806     dng_string temp;
0807 
0808     temp.Set_SystemEncoding (buffer.Buffer_char ());
0809 
0810     return (*this == temp);
0811 
0812     }
0813 
0814 /*****************************************************************************/
0815 
0816 void dng_string::Set_JIS_X208_1990 (const char *s)
0817     {
0818 
0819     if (::IsASCII (s))
0820         {
0821 
0822         Set (s);
0823 
0824         }
0825 
0826     else
0827         {
0828 
0829         #if qMacOS || qWinOS
0830 
0831         Assign_JIS_X208_1990 (*this, s);
0832 
0833         #else
0834 
0835         // Fallback to the ASCII extraction logic.
0836 
0837         Set_SystemEncoding (s);
0838 
0839         #endif
0840 
0841         }
0842 
0843     }
0844 
0845 /*****************************************************************************/
0846 
0847 uint32 dng_string::DecodeUTF8 (const char *&s,
0848                                uint32 maxBytes,
0849                                bool *isValid)
0850     {
0851 
0852     static const uint8 gUTF8Bytes [256] =
0853         {
0854         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0855         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0856         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0857         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0858         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0859         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0860         2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
0861         3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,6,6
0862         };
0863 
0864     if (isValid)
0865         {
0866         *isValid = true;
0867         }
0868 
0869     const uint8 *nBuf = (const uint8 *) s;
0870 
0871     uint32 aChar = nBuf [0];
0872 
0873     uint32 aSize = gUTF8Bytes [aChar];
0874 
0875     if (aSize > maxBytes)
0876         {
0877 
0878         s += maxBytes;
0879 
0880         if (isValid)
0881             {
0882             *isValid = false;
0883             }
0884 
0885         return kREPLACEMENT_CHARACTER;
0886 
0887         }
0888 
0889     s += aSize;
0890 
0891     for (uint32 extra = 1; extra < aSize; extra++)
0892         {
0893 
0894         if ((nBuf [extra] & 0xC0) != 0x80)
0895             {
0896 
0897             if (isValid)
0898                 {
0899                 *isValid = false;
0900                 }
0901 
0902             return kREPLACEMENT_CHARACTER;
0903 
0904             }
0905 
0906         }
0907 
0908     switch (aSize)
0909         {
0910 
0911         case 0:
0912             {
0913 
0914             s++;        // Don't get stuck in infinite loop
0915 
0916             if (isValid)
0917                 {
0918                 *isValid = false;
0919                 }
0920 
0921             return kREPLACEMENT_CHARACTER;
0922 
0923             }
0924 
0925         case 1:
0926             {
0927 
0928             return aChar;
0929 
0930             }
0931 
0932         case 2:
0933             {
0934 
0935             aChar = ((aChar << 6) + nBuf [1]) - (uint32) 0x00003080UL;
0936 
0937             break;
0938 
0939             }
0940 
0941         case 3:
0942             {
0943 
0944             aChar =  ((((aChar << 6) + nBuf [1])
0945                                << 6) + nBuf [2]) - (uint32) 0x000E2080UL;
0946 
0947             break;
0948 
0949             }
0950 
0951         case 4:
0952             {
0953 
0954             aChar = ((((((aChar << 6) + nBuf [1])
0955                                 << 6) + nBuf [2])
0956                                 << 6) + nBuf [3]) - (uint32) 0x03C82080UL;
0957 
0958             break;
0959 
0960             }
0961 
0962         case 5:
0963             {
0964 
0965             aChar = ((((((((aChar << 6) + nBuf [1])
0966                                   << 6) + nBuf [2])
0967                                   << 6) + nBuf [3])
0968                                   << 6) + nBuf [4]) - (uint32) 0xFA082080UL;
0969 
0970             break;
0971 
0972             }
0973 
0974         case 6:
0975             {
0976 
0977             aChar = ((((((((((aChar << 6) + nBuf [1])
0978                                     << 6) + nBuf [2])
0979                                     << 6) + nBuf [3])
0980                                     << 6) + nBuf [4])
0981                                     << 6) + nBuf [5]) - (uint32) 0x82082080UL;
0982 
0983             break;
0984 
0985             }
0986 
0987         }
0988 
0989     if (aChar < 0x7F || aChar > 0x0010FFFF)
0990         {
0991 
0992         if (isValid)
0993             {
0994             *isValid = false;
0995             }
0996 
0997         return kREPLACEMENT_CHARACTER;
0998 
0999         }
1000 
1001     return aChar;
1002 
1003     }
1004 
1005 /*****************************************************************************/
1006 
1007 bool dng_string::IsUTF8 (const char *s)
1008     {
1009 
1010     uint32 len = strlenAsUint32 (s);
1011 
1012     const char *sEnd = s + len;
1013 
1014     while (s < sEnd)
1015         {
1016 
1017         bool isValid = true;
1018 
1019         (void) DecodeUTF8 (s, (uint32) (sEnd - s), &isValid);
1020 
1021         if (!isValid)
1022             {
1023             return false;
1024             }
1025 
1026         }
1027 
1028     return true;
1029 
1030     }
1031 
1032 /*****************************************************************************/
1033 
1034 void dng_string::Set_UTF8_or_System (const char *s)
1035     {
1036 
1037     if (::IsASCII (s))
1038         {
1039 
1040         Set (s);
1041 
1042         }
1043 
1044     else if (IsUTF8 (s))
1045         {
1046 
1047         Set_UTF8 (s);
1048 
1049         }
1050 
1051     else
1052         {
1053 
1054         Set_SystemEncoding (s);
1055 
1056         }
1057 
1058     }
1059 
1060 /*****************************************************************************/
1061 
1062 uint32 dng_string::Get_UTF16 (dng_memory_data &buffer) const
1063     {
1064 
1065     dng_safe_uint32 count = 0u;
1066 
1067     const char *sPtr = Get ();
1068 
1069     while (*sPtr)
1070         {
1071 
1072         uint32 x = DecodeUTF8 (sPtr);
1073 
1074         if (x <= 0x0000FFFF ||
1075             x >  0x0010FFFF)
1076             {
1077 
1078             count += 1u;
1079 
1080             }
1081 
1082         else
1083             {
1084 
1085             count += 2u;
1086 
1087             }
1088 
1089         }
1090 
1091     const dng_safe_uint32 destBufferLength = count + 1u;
1092 
1093     buffer.Allocate (destBufferLength.Get (),
1094                      sizeof (uint16));
1095 
1096     uint16 *dPtr = buffer.Buffer_uint16 ();
1097     uint16 * const destEnd = dPtr + destBufferLength.Get ();
1098 
1099     sPtr = Get ();
1100 
1101     while (*sPtr)
1102         {
1103 
1104         uint32 x = DecodeUTF8 (sPtr);
1105 
1106         if (x <= 0x0000FFFF)
1107             {
1108 
1109             CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
1110             *(dPtr++) = (uint16) x;
1111 
1112             }
1113 
1114         else if (x > 0x0010FFFF)
1115             {
1116 
1117             CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
1118             *(dPtr++) = (uint16) kREPLACEMENT_CHARACTER;
1119 
1120             }
1121 
1122         else
1123             {
1124 
1125             x -= 0x00010000;
1126 
1127             CheckSpaceLeftInBuffer (dPtr, destEnd, 2);
1128             *(dPtr++) = (uint16) ((x >> 10       ) + 0x0000D800);
1129             *(dPtr++) = (uint16) ((x & 0x000003FF) + 0x0000DC00);
1130 
1131             }
1132 
1133         }
1134 
1135     CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
1136     *dPtr = 0;
1137 
1138     return count.Get ();
1139 
1140     }
1141 
1142 /*****************************************************************************/
1143 
1144 void dng_string::Set_UTF16 (const uint16 *s)
1145     {
1146 
1147     if (!s)
1148         {
1149         Clear ();
1150         return;
1151         }
1152 
1153     bool swap = false;
1154 
1155     if (s [0] == 0xFFFE)        // Swapped byte order marker
1156         {
1157         swap = true;
1158         s++;
1159         }
1160 
1161     else if (s [0] == 0xFEFF)   // Non-swapped byte order marker
1162         {
1163         s++;
1164         }
1165 
1166     dng_safe_uint32 length16 (0u);
1167 
1168     while (s [length16.Get ()] != 0)
1169         {
1170         length16 += 1u;
1171         }
1172 
1173     const uint16 *sEnd = s + length16.Get ();
1174 
1175     const dng_safe_uint32 destBufferSize = length16 * 6u + 1u;
1176 
1177     dng_memory_data buffer (destBufferSize);
1178 
1179     uint8 *d = buffer.Buffer_uint8 ();
1180     uint8 * const destEnd = d + destBufferSize.Get ();
1181 
1182     while (s < sEnd)
1183         {
1184 
1185         uint32 aChar = *s++;
1186 
1187         if (swap)
1188             {
1189             aChar = ((aChar << 8) | (aChar >> 8)) & 0x0000FFFF;
1190             }
1191 
1192         if ((aChar >= 0x0000D800) && (aChar <= 0x0000DBFF) && (s < sEnd))
1193             {
1194 
1195             uint32 aLow = *s;
1196 
1197             if (swap)
1198                 {
1199                 aLow = ((aLow << 8) | (aLow >> 8)) & 0x0000FFFF;
1200                 }
1201 
1202             if ((aLow >= 0x0000DC00) && (aLow <= 0x0000DFFF))
1203                 {
1204 
1205                 aChar = ((aChar - 0x0000D800) << 10) +
1206                         (aLow - 0x0000DC00) +
1207                         0x00010000;
1208 
1209                 s++;
1210 
1211                 }
1212 
1213             }
1214 
1215         if (aChar > 0x7FFFFFFF)
1216             {
1217             aChar = kREPLACEMENT_CHARACTER;
1218             }
1219 
1220         if (aChar < 0x00000080)
1221             {
1222             CheckSpaceLeftInBuffer (d, destEnd, 1);
1223             *(d++) = (uint8) aChar;
1224             }
1225 
1226         else if (aChar < 0x00000800)
1227             {
1228             CheckSpaceLeftInBuffer (d, destEnd, 2);
1229             *(d++) = (uint8) ((aChar >> 6) | 0x000000C0);
1230             *(d++) = (uint8) ((aChar & 0x0000003F) | 0x00000080);
1231             }
1232 
1233         else if (aChar < 0x00010000)
1234             {
1235             CheckSpaceLeftInBuffer (d, destEnd, 3);
1236             *(d++) = (uint8) ( (aChar >> 12) | 0x000000E0);
1237             *(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
1238             *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
1239             }
1240 
1241         else if (aChar < 0x00200000)
1242             {
1243             CheckSpaceLeftInBuffer (d, destEnd, 4);
1244             *(d++) = (uint8) ( (aChar >> 18) | 0x000000F0);
1245             *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
1246             *(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
1247             *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
1248             }
1249 
1250         else if (aChar < 0x04000000)
1251             {
1252             CheckSpaceLeftInBuffer (d, destEnd, 5);
1253             *(d++) = (uint8) ( (aChar >> 24) | 0x000000F8);
1254             *(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080);
1255             *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
1256             *(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
1257             *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
1258             }
1259 
1260         else
1261             {
1262             CheckSpaceLeftInBuffer (d, destEnd, 6);
1263             *(d++) = (uint8) ( (aChar >> 30) | 0x000000FC);
1264             *(d++) = (uint8) (((aChar >> 24) & 0x0000003F) | 0x00000080);
1265             *(d++) = (uint8) (((aChar >> 18) & 0x0000003F) | 0x00000080);
1266             *(d++) = (uint8) (((aChar >> 12) & 0x0000003F) | 0x00000080);
1267             *(d++) = (uint8) (((aChar >>  6) & 0x0000003F) | 0x00000080);
1268             *(d++) = (uint8) ( (aChar & 0x0000003F) | 0x00000080);
1269             }
1270 
1271         }
1272 
1273     CheckSpaceLeftInBuffer (d, destEnd, 1);
1274     *d = 0;
1275 
1276     Set (buffer.Buffer_char ());
1277 
1278     }
1279 
1280 /*****************************************************************************/
1281 
1282 void dng_string::Clear ()
1283     {
1284 
1285     Set (NULL);
1286 
1287     }
1288 
1289 /*****************************************************************************/
1290 
1291 void dng_string::Truncate (uint32 maxBytes)
1292     {
1293 
1294     uint32 len = Length ();
1295 
1296     if (len > maxBytes)
1297         {
1298 
1299         uint8 *s = fData.Buffer_uint8 ();
1300 
1301         // Don't truncate on an extension character.  Extensions characters
1302         // in UTF-8 have the 0x80 bit set and the 0x40 bit clear.
1303 
1304         while (maxBytes > 0 && ((s [maxBytes]) & 0xC0) == 0x80)
1305             {
1306 
1307             maxBytes--;
1308 
1309             }
1310 
1311         s [maxBytes] = 0;
1312 
1313         }
1314 
1315     }
1316 
1317 /*****************************************************************************/
1318 
1319 bool dng_string::TrimTrailingBlanks ()
1320     {
1321 
1322     bool didTrim = false;
1323 
1324     if (fData.Buffer ())
1325         {
1326 
1327         char *s = fData.Buffer_char ();
1328 
1329         uint32 len = strlenAsUint32 (s);
1330 
1331         while (len > 0 && s [len - 1] == ' ')
1332             {
1333             len--;
1334             didTrim = true;
1335             }
1336 
1337         s [len] = 0;
1338 
1339         }
1340 
1341     return didTrim;
1342 
1343     }
1344 
1345 /*****************************************************************************/
1346 
1347 bool dng_string::TrimLeadingBlanks ()
1348     {
1349 
1350     bool didTrim = false;
1351 
1352     const char *s = Get ();
1353 
1354     while (*s == ' ')
1355         {
1356         s++;
1357         didTrim = true;
1358         }
1359 
1360     if (didTrim)
1361         {
1362         Set (s);
1363         }
1364 
1365     return didTrim;
1366 
1367     }
1368 
1369 /*****************************************************************************/
1370 
1371 bool dng_string::IsEmpty () const
1372     {
1373 
1374     const char *s = Get ();
1375 
1376     return *s == 0;
1377 
1378     }
1379 
1380 /*****************************************************************************/
1381 
1382 uint32 dng_string::Length () const
1383     {
1384 
1385     const char *s = Get ();
1386 
1387     return strlenAsUint32 (s);
1388 
1389     }
1390 
1391 /*****************************************************************************/
1392 
1393 bool dng_string::operator== (const dng_string &s) const
1394     {
1395 
1396     const char *s1 =   Get ();
1397     const char *s2 = s.Get ();
1398 
1399     return strcmp (s1, s2) == 0;
1400 
1401     }
1402 
1403 /*****************************************************************************/
1404 
1405 bool dng_string::Matches (const char *t,
1406                           const char *s,
1407                           bool case_sensitive)
1408     {
1409 
1410     while (*s != 0)
1411         {
1412 
1413         char c1 = *(s++);
1414         char c2 = *(t++);
1415 
1416         if (!case_sensitive)
1417             {
1418             c1 = ForceUppercase (c1);
1419             c2 = ForceUppercase (c2);
1420             }
1421 
1422         if (c1 != c2)
1423             {
1424             return false;
1425             }
1426 
1427         }
1428 
1429     return (*t == 0);
1430 
1431     }
1432 
1433 /*****************************************************************************/
1434 
1435 bool dng_string::Matches (const char *s,
1436                           bool case_sensitive) const
1437     {
1438 
1439     return dng_string::Matches (Get (), s, case_sensitive);
1440 
1441     }
1442 
1443 /*****************************************************************************/
1444 
1445 bool dng_string::StartsWith (const char *s,
1446                              bool case_sensitive) const
1447     {
1448 
1449     const char *t = Get ();
1450 
1451     while (*s != 0)
1452         {
1453 
1454         char c1 = *(s++);
1455         char c2 = *(t++);
1456 
1457         if (!case_sensitive)
1458             {
1459             c1 = ForceUppercase (c1);
1460             c2 = ForceUppercase (c2);
1461             }
1462 
1463         if (c1 != c2)
1464             {
1465             return false;
1466             }
1467 
1468         }
1469 
1470     return true;
1471 
1472     }
1473 
1474 /*****************************************************************************/
1475 
1476 bool dng_string::EndsWith (const char *s,
1477                            bool case_sensitive) const
1478     {
1479 
1480     uint32 len1 = Length ();
1481 
1482     uint32 len2 = strlenAsUint32 (s);
1483 
1484     if (len1 < len2)
1485         {
1486         return false;
1487         }
1488 
1489     const char *t = Get () + (len1 - len2);
1490 
1491     while (*s != 0)
1492         {
1493 
1494         char c1 = *(s++);
1495         char c2 = *(t++);
1496 
1497         if (!case_sensitive)
1498             {
1499             c1 = ForceUppercase (c1);
1500             c2 = ForceUppercase (c2);
1501             }
1502 
1503         if (c1 != c2)
1504             {
1505             return false;
1506             }
1507 
1508         }
1509 
1510     return true;
1511 
1512     }
1513 
1514 /*****************************************************************************/
1515 
1516 bool dng_string::Contains (const char *s,
1517                            bool case_sensitive,
1518                            int32 *match_offset) const
1519     {
1520 
1521     if (match_offset)
1522         {
1523         *match_offset = -1;
1524         }
1525 
1526     uint32 len1 = Length ();
1527 
1528     uint32 len2 = strlenAsUint32 (s);
1529 
1530     if (len1 < len2)
1531         {
1532         return false;
1533         }
1534 
1535     uint32 offsets = len1 - len2;
1536 
1537     for (uint32 offset = 0; offset <= offsets; offset++)
1538         {
1539 
1540         const char *ss = s;
1541         const char *tt = Get () + offset;
1542 
1543         while (*ss != 0)
1544             {
1545 
1546             char c1 = *(ss++);
1547             char c2 = *(tt++);
1548 
1549             if (!case_sensitive)
1550                 {
1551                 c1 = ForceUppercase (c1);
1552                 c2 = ForceUppercase (c2);
1553                 }
1554 
1555             if (c1 != c2)
1556                 {
1557                 goto tryNextOffset;
1558                 }
1559 
1560             }
1561 
1562         if (match_offset)
1563             {
1564             *match_offset = offset;
1565             }
1566 
1567         return true;
1568 
1569         tryNextOffset:  ;
1570 
1571         }
1572 
1573     return false;
1574 
1575     }
1576 
1577 /*****************************************************************************/
1578 
1579 bool dng_string::Replace (const char *old_string,
1580                           const char *new_string,
1581                           bool case_sensitive)
1582     {
1583 
1584     int32 match_offset = -1;
1585 
1586     if (Contains (old_string,
1587                   case_sensitive,
1588                   &match_offset))
1589         {
1590 
1591         uint32 len1 = Length ();
1592 
1593         uint32 len2 = strlenAsUint32 (old_string);
1594         uint32 len3 = strlenAsUint32 (new_string);
1595 
1596         if (len2 == len3)
1597             {
1598 
1599             DNG_REQUIRE (fData.Buffer_char (), "Bad string in dng_string::Replace");
1600 
1601             strncpy (fData.Buffer_char () + match_offset,
1602                      new_string,
1603                      len3);
1604 
1605             }
1606 
1607         else if (len2 > len3)
1608             {
1609 
1610             DNG_REQUIRE (fData.Buffer_char (), "Bad string in dng_string::Replace");
1611 
1612             strncpy (fData.Buffer_char () + match_offset,
1613                      new_string,
1614                      len3);
1615 
1616             const char *s = fData.Buffer_char () + match_offset + len2;
1617                   char *d = fData.Buffer_char () + match_offset + len3;
1618 
1619             uint32 extra = len1 - match_offset - len2 + 1;  // + 1 for NULL termination
1620 
1621             for (uint32 j = 0; j < extra; j++)
1622                 {
1623                 *(d++) = *(s++);
1624                 }
1625 
1626             }
1627 
1628         else
1629             {
1630 
1631             // "len1 - len2" cannot wrap around because we know that if this
1632             // string contains old_string, len1 >= len2 must hold.
1633 
1634             dng_memory_data tempBuffer
1635                 (dng_safe_uint32 (len1 - len2) + len3 + 1u);
1636 
1637             if (match_offset)
1638                 {
1639 
1640                 strncpy (tempBuffer.Buffer_char (),
1641                          fData     .Buffer_char (),
1642                          match_offset);
1643 
1644                 }
1645 
1646             if (len3)
1647                 {
1648 
1649                 strncpy (tempBuffer.Buffer_char () + match_offset,
1650                          new_string,
1651                          len3);
1652 
1653                 }
1654 
1655             uint32 extra = len1 - match_offset - len2 + 1;  // + 1 for NULL termination
1656 
1657             DNG_REQUIRE (fData.Buffer_char (), "Bad string in dng_string::Replace");
1658 
1659             strncpy (tempBuffer.Buffer_char () + match_offset + len3,
1660                      fData     .Buffer_char () + match_offset + len2,
1661                      extra);
1662 
1663             Set (tempBuffer.Buffer_char ());
1664 
1665             }
1666 
1667         return true;
1668 
1669         }
1670 
1671     return false;
1672 
1673     }
1674 
1675 /*****************************************************************************/
1676 
1677 void dng_string::ReplaceChars (char oldChar,
1678                                char newChar)
1679     {
1680 
1681     if (fData.Buffer ())
1682         {
1683 
1684         uint32 len = Length ();
1685 
1686         char *dPtr = fData.Buffer_char ();
1687 
1688         for (uint32 j = 0; j < len; j++)
1689             {
1690 
1691             if (dPtr [j] == oldChar)
1692                 {
1693 
1694                 dPtr [j] = newChar;
1695 
1696                 }
1697 
1698             }
1699 
1700         }
1701 
1702     }
1703 
1704 /*****************************************************************************/
1705 
1706 bool dng_string::TrimLeading (const char *s,
1707                               bool case_sensitive)
1708     {
1709 
1710     if (StartsWith (s, case_sensitive))
1711         {
1712 
1713         Set (Get () + strlenAsUint32 (s));
1714 
1715         return true;
1716 
1717         }
1718 
1719     return false;
1720 
1721     }
1722 
1723 /*****************************************************************************/
1724 
1725 void dng_string::Append (const char *s)
1726     {
1727 
1728     dng_safe_uint32 len2 (strlenAsUint32 (s));
1729 
1730     if (len2.Get ())
1731         {
1732 
1733         dng_safe_uint32 len1 (Length ());
1734 
1735         dng_memory_data temp (len1 + len2 + 1u);
1736 
1737         char *buffer = temp.Buffer_char ();
1738 
1739         if (len1.Get ())
1740             {
1741             memcpy (buffer, Get (), len1.Get ());
1742             }
1743 
1744         memcpy (buffer + len1.Get (), s, (len2 + 1u).Get ());
1745 
1746         Set (buffer);
1747 
1748         }
1749 
1750     }
1751 
1752 /*****************************************************************************/
1753 
1754 void dng_string::SetUppercase ()
1755     {
1756 
1757     if (fData.Buffer ())
1758         {
1759 
1760         uint32 len = Length ();
1761 
1762         char *dPtr = fData.Buffer_char ();
1763 
1764         for (uint32 j = 0; j < len; j++)
1765             {
1766 
1767             char c = dPtr [j];
1768 
1769             if (c >= 'a' && c <= 'z')
1770                 {
1771 
1772                 dPtr [j] = c - 'a' + 'A';
1773 
1774                 }
1775 
1776             }
1777 
1778         }
1779 
1780     }
1781 
1782 /*****************************************************************************/
1783 
1784 void dng_string::SetLowercase ()
1785     {
1786 
1787     if (fData.Buffer ())
1788         {
1789 
1790         uint32 len = Length ();
1791 
1792         char *dPtr = fData.Buffer_char ();
1793 
1794         for (uint32 j = 0; j < len; j++)
1795             {
1796 
1797             char c = dPtr [j];
1798 
1799             if (c >= 'A' && c <= 'Z')
1800                 {
1801 
1802                 dPtr [j] = c - 'A' + 'a';
1803 
1804                 }
1805 
1806             }
1807 
1808         }
1809 
1810     }
1811 
1812 /*****************************************************************************/
1813 
1814 void dng_string::SetLineEndings (char ending)
1815     {
1816 
1817     if (fData.Buffer ())
1818         {
1819 
1820         const char *sPtr = fData.Buffer_char ();
1821               char *dPtr = fData.Buffer_char ();
1822 
1823         while (*sPtr)
1824             {
1825 
1826             char c = *(sPtr++);
1827 
1828             char nc = sPtr [0];
1829 
1830             if ((c == '\r' && nc == '\n') ||
1831                 (c == '\n' && nc == '\r'))
1832                 {
1833 
1834                 sPtr++;
1835 
1836                 if (ending)
1837                     {
1838                     *(dPtr++) = ending;
1839                     }
1840 
1841                 }
1842 
1843             else if (c == '\n' ||
1844                      c == '\r')
1845                 {
1846 
1847                 if (ending)
1848                     {
1849                     *(dPtr++) = ending;
1850                     }
1851 
1852                 }
1853 
1854             else
1855                 {
1856 
1857                 *(dPtr++) = c;
1858 
1859                 }
1860 
1861             }
1862 
1863         *dPtr = 0;
1864 
1865         }
1866 
1867     }
1868 
1869 /*****************************************************************************/
1870 
1871 void dng_string::StripLowASCII ()
1872     {
1873 
1874     if (fData.Buffer ())
1875         {
1876 
1877         const char *sPtr = fData.Buffer_char ();
1878               char *dPtr = fData.Buffer_char ();
1879 
1880         while (*sPtr)
1881             {
1882 
1883             char c = *(sPtr++);
1884 
1885             if (c == '\r' || c == '\n' || (uint8) c >= ' ')
1886                 {
1887 
1888                 *(dPtr++) = c;
1889 
1890                 }
1891 
1892             }
1893 
1894         *dPtr = 0;
1895 
1896         }
1897 
1898     }
1899 
1900 /*****************************************************************************/
1901 
1902 void dng_string::NormalizeAsCommaSeparatedNumbers ()
1903     {
1904 
1905     if (fData.Buffer ())
1906         {
1907 
1908         const char *sPtr = fData.Buffer_char ();
1909               char *dPtr = fData.Buffer_char ();
1910 
1911         bool commaInserted = false;
1912 
1913         while (*sPtr)
1914             {
1915 
1916             uint32 c = DecodeUTF8 (sPtr);
1917 
1918             // Support number formats such as "3", "+3.0", "-3.1416", "314.16e-2",
1919             // "0.31416E1", but no hex/octal number representations.
1920 
1921             if (isdigit ((int) c) || c == '.' || c == '-' || c == '+' || c == 'e' || c == 'E')
1922                 {
1923 
1924                 *(dPtr++) = (char) c;
1925 
1926                 if (commaInserted)
1927                     {
1928 
1929                     commaInserted = false;
1930 
1931                     }
1932 
1933                 }
1934 
1935             else if (!commaInserted)
1936                 {
1937 
1938                 *(dPtr++) = ',';
1939 
1940                 commaInserted = true;
1941 
1942                 }
1943 
1944             }
1945 
1946         *dPtr = 0;
1947 
1948         }
1949 
1950     }
1951 
1952 /******************************************************************************/
1953 
1954 // Unicode to low-ASCII strings table.
1955 
1956 struct UnicodeToLowASCIIEntry
1957     {
1958     uint32 unicode;
1959     const char *ascii;
1960     };
1961 
1962 static const UnicodeToLowASCIIEntry kUnicodeToLowASCII [] =
1963     {
1964     {   0x00A0, " "     },
1965     {   0x00A1, "!"     },
1966     {   0x00A9, "(C)"   },
1967     {   0x00AA, "a"     },
1968     {   0x00AB, "<<"    },
1969     {   0x00AC, "!"     },
1970     {   0x00AE, "(R)"   },
1971     {   0x00B0, "dg"    },
1972     {   0x00B1, "+-"    },
1973     {   0x00B7, "."     },
1974     {   0x00BA, "o"     },
1975     {   0x00BB, ">>"    },
1976     {   0x00BF, "?"     },
1977     {   0x00C0, "A"     },
1978     {   0x00C1, "A"     },
1979     {   0x00C2, "A"     },
1980     {   0x00C3, "A"     },
1981     {   0x00C4, "A"     },
1982     {   0x00C5, "A"     },
1983     {   0x00C6, "AE"    },
1984     {   0x00C7, "C"     },
1985     {   0x00C8, "E"     },
1986     {   0x00C9, "E"     },
1987     {   0x00CA, "E"     },
1988     {   0x00CB, "E"     },
1989     {   0x00CC, "I"     },
1990     {   0x00CD, "I"     },
1991     {   0x00CE, "I"     },
1992     {   0x00CF, "I"     },
1993     {   0x00D1, "N"     },
1994     {   0x00D2, "O"     },
1995     {   0x00D3, "O"     },
1996     {   0x00D4, "O"     },
1997     {   0x00D5, "O"     },
1998     {   0x00D6, "O"     },
1999     {   0x00D8, "O"     },
2000     {   0x00D9, "U"     },
2001     {   0x00DA, "U"     },
2002     {   0x00DB, "U"     },
2003     {   0x00DC, "U"     },
2004     {   0x00DD, "Y"     },
2005     {   0x00E0, "a"     },
2006     {   0x00E1, "a"     },
2007     {   0x00E2, "a"     },
2008     {   0x00E3, "a"     },
2009     {   0x00E4, "a"     },
2010     {   0x00E5, "a"     },
2011     {   0x00E6, "ae"    },
2012     {   0x00E7, "c"     },
2013     {   0x00E8, "e"     },
2014     {   0x00E9, "e"     },
2015     {   0x00EA, "e"     },
2016     {   0x00EB, "e"     },
2017     {   0x00EC, "i"     },
2018     {   0x00ED, "i"     },
2019     {   0x00EE, "i"     },
2020     {   0x00EF, "i"     },
2021     {   0x00F1, "n"     },
2022     {   0x00F2, "o"     },
2023     {   0x00F3, "o"     },
2024     {   0x00F4, "o"     },
2025     {   0x00F5, "o"     },
2026     {   0x00F6, "o"     },
2027     {   0x00F7, "/"     },
2028     {   0x00F8, "o"     },
2029     {   0x00F9, "u"     },
2030     {   0x00FA, "u"     },
2031     {   0x00FB, "u"     },
2032     {   0x00FC, "u"     },
2033     {   0x00FD, "y"     },
2034     {   0x00FF, "y"     },
2035     {   0x0131, "i"     },
2036     {   0x0152, "OE"    },
2037     {   0x0153, "oe"    },
2038     {   0x0178, "Y"     },
2039     {   0x2013, "-"     },
2040     {   0x2014, "-"     },
2041     {   0x2018, "'"     },
2042     {   0x2019, "'"     },
2043     {   0x201A, ","     },
2044     {   0x201C, "\""    },
2045     {   0x201D, "\""    },
2046     {   0x201E, ",,"    },
2047     {   0x2022, "."     },
2048     {   0x2026, "..."   },
2049     {   0x2039, "<"     },
2050     {   0x203A, ">"     },
2051     {   0x2044, "/"     },
2052     {   0x2122, "TM"    },
2053     {   0x2206, "d"     },
2054     {   0x2211, "S"     },
2055     {   0x2260, "!="    },
2056     {   0x2264, "<="    },
2057     {   0x2265, ">="    },
2058     {   0x2318, "#"     },
2059     {   0xFB01, "fi"    },
2060     {   0xFB02, "fl"    }
2061     };
2062 
2063 /******************************************************************************/
2064 
2065 void dng_string::ForceASCII ()
2066     {
2067 
2068     if (!IsASCII ())
2069         {
2070 
2071         dng_safe_uint32 tempBufferSize = dng_safe_uint32 (Length ()) * 3u + 1u;
2072 
2073         dng_memory_data tempBuffer (tempBufferSize);
2074 
2075         char *dPtr = tempBuffer.Buffer_char ();
2076         char * const destEnd = dPtr + tempBufferSize.Get ();
2077 
2078         const char *sPtr = Get ();
2079 
2080         while (*sPtr)
2081             {
2082 
2083             uint32 x = DecodeUTF8 (sPtr);
2084 
2085             if (x <= 0x007F)
2086                 {
2087 
2088                 CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
2089                 *(dPtr++) = (char) x;
2090 
2091                 }
2092 
2093             else
2094                 {
2095 
2096                 const char *ascii = NULL;
2097 
2098                 const uint32 kTableEntrys = sizeof (kUnicodeToLowASCII    ) /
2099                                             sizeof (kUnicodeToLowASCII [0]);
2100 
2101                 for (uint32 entry = 0; entry < kTableEntrys; entry++)
2102                     {
2103 
2104                     if (kUnicodeToLowASCII [entry] . unicode == x)
2105                         {
2106 
2107                         ascii = kUnicodeToLowASCII [entry] . ascii;
2108 
2109                         break;
2110 
2111                         }
2112 
2113                     }
2114 
2115                 if (ascii)
2116                     {
2117 
2118                     while (*ascii)
2119                         {
2120 
2121                         CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
2122                         *(dPtr++) = *(ascii++);
2123 
2124                         }
2125 
2126                     }
2127 
2128                 else
2129                     {
2130 
2131                     CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
2132                     *(dPtr++) ='?';
2133 
2134                     }
2135 
2136                 }
2137 
2138             }
2139 
2140         CheckSpaceLeftInBuffer (dPtr, destEnd, 1);
2141         *dPtr = 0;
2142 
2143         Set (tempBuffer.Buffer_char ());
2144 
2145         }
2146 
2147     }
2148 
2149 /******************************************************************************/
2150 
2151 static dng_std_mutex gProtectUCCalls;
2152 
2153 /******************************************************************************/
2154 
2155 int32 dng_string::Compare (const dng_string &s,
2156                            bool digitsAsNumber) const
2157     {
2158 
2159     #if qMacOS
2160 
2161         {
2162 
2163         dng_memory_data aStrA;
2164         dng_memory_data aStrB;
2165 
2166         uint32 aLenA = this->Get_UTF16 (aStrA);
2167         uint32 aLenB = s    .Get_UTF16 (aStrB);
2168 
2169         if (aLenA > 0)
2170             {
2171 
2172             if (aLenB > 0)
2173                 {
2174 
2175                 // For some Mac OS versions anyway, UCCompareTextDefault is not
2176                 // thread safe.
2177 
2178                 dng_lock_std_mutex lockMutex (gProtectUCCalls);
2179 
2180                 UCCollateOptions aOptions = kUCCollateStandardOptions |
2181                                             kUCCollatePunctuationSignificantMask;
2182 
2183                 if (digitsAsNumber)
2184                     {
2185 
2186                     aOptions |= kUCCollateDigitsOverrideMask |
2187                                 kUCCollateDigitsAsNumberMask;
2188 
2189                     }
2190 
2191                 SInt32 aOrder = -1;
2192 
2193                 Boolean aEqual = false;
2194 
2195                 OSStatus searchStatus = ::UCCompareTextDefault (aOptions,
2196                                                                 aStrA.Buffer_uint16 (),
2197                                                                 aLenA,
2198                                                                 aStrB.Buffer_uint16 (),
2199                                                                 aLenB,
2200                                                                 &aEqual,
2201                                                                 &aOrder);
2202 
2203                 if (searchStatus == noErr)
2204                     {
2205 
2206                     if (aEqual || (aOrder == 0))
2207                         {
2208                         return 0;
2209                         }
2210 
2211                     else
2212                         {
2213                         return (aOrder > 0) ? 1 : -1;
2214                         }
2215 
2216                     }
2217 
2218                 else
2219                     {
2220 
2221                     DNG_REPORT ("UCCompareTextDefault failed");
2222 
2223                     return -1;
2224 
2225                     }
2226 
2227                 }
2228 
2229             else
2230                 {
2231                 return 1;
2232                 }
2233 
2234             }
2235 
2236         else
2237             {
2238 
2239             if (aLenB > 0)
2240                 {
2241                 return -1;
2242                 }
2243 
2244             else
2245                 {
2246                 return 0;
2247                 }
2248 
2249             }
2250 
2251         }
2252 
2253     #elif qWinOS
2254 
2255         {
2256 
2257         dng_memory_data aStrA;
2258         dng_memory_data aStrB;
2259 
2260         uint32 aLenA = this->Get_UTF16 (aStrA);
2261         uint32 aLenB = s    .Get_UTF16 (aStrB);
2262 
2263         if (aLenA > 0)
2264             {
2265 
2266             if (aLenB > 0)
2267                 {
2268 
2269                 LCID locale = LOCALE_SYSTEM_DEFAULT;
2270 
2271                 DWORD aFlags = NORM_IGNOREWIDTH;
2272 
2273                 if (digitsAsNumber)
2274                     {
2275                     aFlags |= SORT_DIGITSASNUMBERS;
2276                     }
2277 
2278                 int aOrder = ::CompareStringW (locale,
2279                                                aFlags,
2280                                                (const WCHAR *) aStrA.Buffer_uint16 (),
2281                                                aLenA,
2282                                                (const WCHAR *) aStrB.Buffer_uint16 (),
2283                                                aLenB);
2284 
2285                 if (aOrder == CSTR_EQUAL)
2286                     {
2287                     return 0;
2288                     }
2289 
2290                 else if (aOrder == CSTR_GREATER_THAN)
2291                     {
2292                     return 1;
2293                     }
2294 
2295                 else
2296                     {
2297                     return -1;
2298                     }
2299 
2300                 }
2301 
2302             else
2303                 {
2304                 return 1;
2305                 }
2306 
2307             }
2308 
2309         else
2310             {
2311 
2312             if (aLenB > 0)
2313                 {
2314                 return -1;
2315                 }
2316             else
2317                 {
2318                 return 0;
2319                 }
2320 
2321             }
2322 
2323         }
2324 
2325     #else
2326 
2327     // Fallback to a pure Unicode sort order.
2328 
2329         {
2330 
2331         for (uint32 pass = 0; pass < 2; pass++)
2332             {
2333 
2334             const char *aPtr =   Get ();
2335             const char *bPtr = s.Get ();
2336 
2337             while (*aPtr || *bPtr)
2338                 {
2339 
2340                 if (!bPtr)
2341                     {
2342                     return 1;
2343                     }
2344 
2345                 else if (!aPtr)
2346                     {
2347                     return -1;
2348                     }
2349 
2350                 uint32 a = DecodeUTF8 (aPtr);
2351                 uint32 b = DecodeUTF8 (bPtr);
2352 
2353                 // Ignore case on first compare pass.
2354 
2355                 if (pass == 0)
2356                     {
2357 
2358                     if (a >= (uint32) 'a' && a <= (uint32) 'z')
2359                         {
2360                         a = a - (uint32) 'a' + (uint32) 'A';
2361                         }
2362 
2363                     if (b >= (uint32) 'a' && b <= (uint32) 'z')
2364                         {
2365                         b = b - (uint32) 'a' + (uint32) 'A';
2366                         }
2367 
2368                     }
2369 
2370                 if (digitsAsNumber)
2371                     {
2372 
2373                     uint32 aNumber = 0;
2374                     uint32 aDigits = 0;
2375 
2376                     if (a >= (uint32) '0' && a <= (uint32) '9')
2377                         {
2378 
2379                         aNumber = a - (uint32) '0';
2380                         aDigits = 1;
2381 
2382                         while (aDigits < 6 && *aPtr >= '0' && *aPtr <= '9')
2383                             {
2384                             aNumber = aNumber * 10 + ((uint32) *aPtr -
2385                                                       (uint32) '0');
2386                             aDigits++;
2387                             aPtr++;
2388                             }
2389 
2390                         }
2391 
2392                     uint32 bNumber = 0;
2393                     uint32 bDigits = 0;
2394 
2395                     if (b >= (uint32) '0' && b <= (uint32) '9')
2396                         {
2397 
2398                         bNumber = b - (uint32) '0';
2399                         bDigits = 1;
2400 
2401                         while (bDigits < 6 && *bPtr >= '0' && *bPtr <= '9')
2402                             {
2403                             bNumber = bNumber * 10 + ((uint32) *bPtr -
2404                                                       (uint32) '0');
2405                             bDigits++;
2406                             bPtr++;
2407                             }
2408 
2409                         }
2410 
2411                     if (aDigits > 0 && bDigits > 0)
2412                         {
2413 
2414                         if (aNumber > bNumber)
2415                             {
2416                             return 1;
2417                             }
2418 
2419                         if (aNumber < bNumber)
2420                             {
2421                             return -1;
2422                             }
2423 
2424                         if (aDigits > bDigits)
2425                             {
2426                             return 1;
2427                             }
2428 
2429                         if (aDigits < bDigits)
2430                             {
2431                             return -1;
2432                             }
2433 
2434                         continue;
2435 
2436                         }
2437 
2438                     }
2439 
2440                 if (a > b)
2441                     {
2442                     return 1;
2443                     }
2444 
2445                 else if (a < b)
2446                     {
2447                     return -1;
2448                     }
2449 
2450                 }
2451 
2452             }
2453 
2454         }
2455 
2456     #endif
2457 
2458     return 0;
2459 
2460     }
2461 
2462 /*****************************************************************************/