File indexing completed on 2024-05-19 04:06:44

0001 /* stringprep.c     Core stringprep implementation.
0002  * Copyright (C) 2002, 2003  Simon Josefsson
0003  *
0004  * This file is part of GNU Libidn.
0005  *
0006  * GNU Libidn is free software; you can redistribute it and/or
0007  * modify it under the terms of the GNU Lesser General Public
0008  * License as published by the Free Software Foundation; either
0009  * version 2.1 of the License, or (at your option) any later version.
0010  *
0011  * GNU Libidn is distributed in the hope that it will be useful,
0012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0014  * Lesser General Public License for more details.
0015  *
0016  * You should have received a copy of the GNU Lesser General Public
0017  * License along with GNU Libidn; if not, write to the Free Software
0018  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
0019  *
0020  */
0021 
0022 #include "internal.h"
0023 
0024 static ssize_t
0025 stringprep_find_character_in_table (my_uint32_t ucs4,
0026                     Stringprep_table_element * table)
0027 {
0028   ssize_t i;
0029 
0030   for (i = 0; table[i].start; i++)
0031     if (ucs4 >= table[i].start &&
0032     ucs4 <= (table[i].end ? table[i].end : table[i].start))
0033       return i;
0034 
0035   return -1;
0036 }
0037 
0038 static ssize_t
0039 stringprep_find_string_in_table (my_uint32_t * ucs4,
0040                  size_t ucs4len,
0041                  size_t * tablepos,
0042                  Stringprep_table_element * table)
0043 {
0044   size_t j;
0045   ssize_t pos;
0046 
0047   for (j = 0; j < ucs4len; j++)
0048     if ((pos = stringprep_find_character_in_table (ucs4[j], table)) != -1)
0049       {
0050     if (tablepos)
0051       *tablepos = pos;
0052     return j;
0053       }
0054 
0055   return -1;
0056 }
0057 
0058 static int
0059 stringprep_apply_table_to_string (my_uint32_t * ucs4,
0060                   size_t * ucs4len,
0061                   size_t maxucs4len,
0062                   Stringprep_table_element * table,
0063                   const char *tablename)
0064 {
0065   ssize_t pos;
0066   size_t i, maplen;
0067 
0068   while ((pos = stringprep_find_string_in_table (ucs4, *ucs4len,
0069                          &i, table)) != -1)
0070     {
0071       for (maplen = STRINGPREP_MAX_MAP_CHARS;
0072        maplen > 0 && table[i].map[maplen - 1] == 0; maplen--)
0073     ;
0074 
0075       if (*ucs4len - 1 + maplen >= maxucs4len)
0076     return STRINGPREP_TOO_SMALL_BUFFER;
0077 
0078       memmove (&ucs4[pos + maplen], &ucs4[pos + 1],
0079            *ucs4len * sizeof (my_uint32_t) - (&ucs4[pos + 1] - ucs4));
0080       memcpy (&ucs4[pos], table[i].map, sizeof (my_uint32_t) * maplen);
0081       *ucs4len = *ucs4len - 1 + maplen;
0082     }
0083 
0084   return STRINGPREP_OK;
0085 }
0086 
0087 #define INVERTED(x) ((x) & ((~0UL) >> 1))
0088 #define UNAPPLICAPLEFLAGS(flags, profileflags) \
0089   ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
0090    ( INVERTED(profileflags) && (profileflags & flags)))
0091 
0092 /**
0093  * stringprep:
0094  * @in: input/ouput array with string to prepare.
0095  * @maxlen: maximum length of input/output array.
0096  * @flags: optional stringprep profile flags.
0097  * @profile: pointer to stringprep profile to use.
0098  *
0099  * Prepare the input UTF-8 string according to the stringprep profile.
0100  * Normally application programmers use stringprep profile macros such
0101  * as stringprep_nameprep(), stringprep_kerberos5() etc instead of
0102  * calling this function directly.
0103  *
0104  * Since the stringprep operation can expand the string, @maxlen
0105  * indicate how large the buffer holding the string is.  The @flags
0106  * are one of Stringprep_profile_flags, or 0.  The profile indicates
0107  * processing details specific to that profile.  Your application can
0108  * define new profiles, possibly re-using the generic stringprep
0109  * tables that always will be part of the library.
0110  *
0111  * Note that you must convert strings entered in the systems locale
0112  * into UTF-8 before using this function.
0113  *
0114  * Return value: Returns 0 iff successful, or an error code.
0115  **/
0116 int
0117 stringprep (char *in,
0118         size_t maxlen,
0119         Stringprep_profile_flags flags, Stringprep_profile * profile)
0120 {
0121   size_t i, j;
0122   ssize_t k;
0123   int rc;
0124   char *p = 0;
0125   my_uint32_t *q = 0;
0126   my_uint32_t *ucs4;
0127   size_t ucs4len, maxucs4len;
0128 
0129   ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
0130   maxucs4len = 4 * ucs4len + 10;    /* XXX */
0131   ucs4 = realloc (ucs4, 1 + maxucs4len * sizeof (my_uint32_t));
0132   if (!ucs4)
0133     {
0134       rc = STRINGPREP_MALLOC_ERROR;
0135       goto done;
0136     }
0137 
0138   for (i = 0; profile[i].operation; i++)
0139     {
0140       switch (profile[i].operation)
0141     {
0142     case STRINGPREP_NFKC:
0143       if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
0144         {
0145           break;
0146         }
0147 
0148       if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
0149         {
0150           /* Profile requires NFKC, but callee asked for no NFKC. */
0151           rc = STRINGPREP_FLAG_ERROR;
0152           goto done;
0153         }
0154 
0155       q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
0156 
0157       if (!q)
0158         {
0159           rc = STRINGPREP_NFKC_FAILED;
0160           goto done;
0161         }
0162 
0163       for (j = 0; q[j]; j++)
0164         ;
0165 
0166       free (ucs4);
0167       ucs4 = q;
0168       ucs4len = j;
0169       q = 0;
0170       break;
0171 
0172     case STRINGPREP_PROHIBIT_TABLE:
0173       k = stringprep_find_string_in_table (ucs4, ucs4len,
0174                            NULL, profile[i].table);
0175       if (k != -1)
0176         {
0177           rc = STRINGPREP_CONTAINS_PROHIBITED;
0178           goto done;
0179         }
0180       break;
0181 
0182     case STRINGPREP_UNASSIGNED_TABLE:
0183       if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
0184         break;
0185       if (flags & STRINGPREP_NO_UNASSIGNED)
0186         {
0187           k = stringprep_find_string_in_table
0188         (ucs4, ucs4len, NULL, profile[i].table);
0189           if (k != -1)
0190         {
0191           rc = STRINGPREP_CONTAINS_UNASSIGNED;
0192           goto done;
0193         }
0194         }
0195       break;
0196 
0197     case STRINGPREP_MAP_TABLE:
0198       if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
0199         break;
0200       rc = stringprep_apply_table_to_string
0201         (ucs4, &ucs4len, maxucs4len, profile[i].table, profile[i].name);
0202       if (rc != STRINGPREP_OK)
0203         goto done;
0204       break;
0205 
0206     case STRINGPREP_BIDI_PROHIBIT_TABLE:
0207     case STRINGPREP_BIDI_RAL_TABLE:
0208     case STRINGPREP_BIDI_L_TABLE:
0209       break;
0210 
0211     case STRINGPREP_BIDI:
0212       {
0213         int done_prohibited = 0;
0214         int done_ral = 0;
0215         int done_l = 0;
0216         int contains_ral = -1;
0217         int contains_l = -1;
0218 
0219         for (j = 0; profile[j].operation; j++)
0220           if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
0221         {
0222           done_prohibited = 1;
0223           k = stringprep_find_string_in_table (ucs4, ucs4len,
0224                                NULL,
0225                                profile[j].table);
0226           if (k != -1)
0227             {
0228               rc = STRINGPREP_BIDI_CONTAINS_PROHIBITED;
0229               goto done;
0230             }
0231         }
0232           else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
0233         {
0234           done_ral = 1;
0235           if (stringprep_find_string_in_table
0236               (ucs4, ucs4len, NULL, profile[j].table) != -1)
0237             contains_ral = j;
0238         }
0239           else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
0240         {
0241           done_l = 1;
0242           if (stringprep_find_string_in_table
0243               (ucs4, ucs4len, NULL, profile[j].table) != -1)
0244             contains_l = j;
0245         }
0246 
0247         if (!done_prohibited || !done_ral || !done_l)
0248           {
0249         rc = STRINGPREP_PROFILE_ERROR;
0250         goto done;
0251           }
0252 
0253         if (contains_ral != -1 && contains_l != -1)
0254           {
0255         rc = STRINGPREP_BIDI_BOTH_L_AND_RAL;
0256         goto done;
0257           }
0258 
0259         if (contains_ral != -1)
0260           {
0261         if (!(stringprep_find_character_in_table
0262               (ucs4[0], profile[contains_ral].table) != -1 &&
0263               stringprep_find_character_in_table
0264               (ucs4[ucs4len - 1], profile[contains_ral].table) != -1))
0265           {
0266             rc = STRINGPREP_BIDI_LEADTRAIL_NOT_RAL;
0267             goto done;
0268           }
0269           }
0270       }
0271       break;
0272 
0273     default:
0274       rc = STRINGPREP_PROFILE_ERROR;
0275       goto done;
0276       break;
0277     }
0278     }
0279 
0280   p = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
0281 
0282   if (strlen (p) >= maxlen)
0283     {
0284       rc = STRINGPREP_TOO_SMALL_BUFFER;
0285       goto done;
0286     }
0287 
0288   strcpy (in, p);       /* flawfinder: ignore */
0289 
0290   rc = STRINGPREP_OK;
0291 
0292 done:
0293   if (p)
0294     free (p);
0295   if (q)
0296     free (q);
0297   if (ucs4)
0298     free (ucs4);
0299   return rc;
0300 }
0301 
0302 /**
0303  * stringprep_profile:
0304  * @in: input/ouput array with string to prepare.
0305  * @out: output variable with newly allocate string.
0306  * @flags: optional stringprep profile flags.
0307  * @profile: name of stringprep profile to use.
0308  *
0309  * Prepare the input UTF-8 string according to the stringprep profile.
0310  * Normally application programmers use stringprep profile macros such
0311  * as stringprep_nameprep(), stringprep_kerberos5() etc instead of
0312  * calling this function directly.
0313  *
0314  * Note that you must convert strings entered in the systems locale
0315  * into UTF-8 before using this function.
0316  *
0317  * The output @out variable must be deallocated by the caller.
0318  *
0319  * Return value: Returns 0 iff successful, or an error code.
0320  **/
0321 int
0322 stringprep_profile (char *in,
0323             char **out, char *profile, Stringprep_profile_flags flags)
0324 {
0325   Stringprep_profiles *p;
0326   char *str;
0327   size_t len;
0328   int rc;
0329 
0330   for (p = &stringprep_profiles[0]; p->name; p++)
0331     if (strcmp (p->name, profile) == 0)
0332       break;
0333 
0334   if (!p || !p->name || !p->tables)
0335     return STRINGPREP_UNKNOWN_PROFILE;
0336 
0337   len = strlen (in) + BUFSIZ;
0338   str = (char *) malloc (len);
0339   if (str == NULL)
0340     return STRINGPREP_MALLOC_ERROR;
0341 
0342   strcpy (str, in);
0343 
0344   rc = stringprep (str, len, flags, p->tables);
0345 
0346   if (rc == STRINGPREP_OK)
0347     *out = str;
0348   else
0349     free (str);
0350 
0351   return rc;
0352 }
0353 
0354 /**
0355  * STRINGPREP_VERSION
0356  *
0357  * String defined via CPP denoting the header file version number.
0358  * Used together with stringprep_check_version() to verify header file
0359  * and run-time library consistency.
0360  */
0361 
0362 /**
0363  * STRINGPREP_MAX_MAP_CHARS
0364  *
0365  * Maximum number of code points that can replace a single code point,
0366  * during stringprep mapping.
0367  */
0368 
0369 /**
0370  * Stringprep_rc
0371  *
0372  * Enumerated return codes of stringprep(), stringprep_profile()
0373  * functions (and macros using those functions).  The value 0 is
0374  * guaranteed to always correspond to success.
0375  */
0376 
0377 /**
0378  * Stringprep_profile_flags:
0379  * @STRINGPREP_NO_NFKC: Disable the NFKC normalization, as well as
0380  *   selecting the non-NFKC case folding tables.  Usually the profile
0381  *   specifies BIDI and NFKC settings, and applications should not
0382  *   override it unless in special situations.
0383  * @STRINGPREP_NO_BIDI: Disable the BIDI step.  Usually the profile
0384  *   specifies BIDI and NFKC settings, and applications should not
0385  *   override it unless in special situations.
0386  * @STRINGPREP_NO_UNASSIGNED: Make the library return with an error if
0387  *   string contains unassigned characters according to profile.
0388  *
0389  * Stringprep profile flags.
0390  */
0391 
0392 /**
0393  * Stringprep_profile_steps:
0394  *
0395  * Various steps in the stringprep algorithm.  You really want to
0396  * study the source code to understand this one.  Only useful if you
0397  * want to add another profile.
0398  */
0399 
0400 /**
0401  * stringprep_nameprep:
0402  * @in: input/ouput array with string to prepare.
0403  * @maxlen: maximum length of input/output array.
0404  *
0405  * Prepare the input UTF-8 string according to the nameprep profile.
0406  * The AllowUnassigned flag is true, use
0407  * stringprep_nameprep_no_unassigned() for false AllowUnassigned.
0408  * Returns 0 iff successful, or an error code.
0409  **/
0410 
0411 /**
0412  * stringprep_nameprep_no_unassigned:
0413  * @in: input/ouput array with string to prepare.
0414  * @maxlen: maximum length of input/output array.
0415  *
0416  * Prepare the input UTF-8 string according to the nameprep profile.
0417  * The AllowUnassigned flag is false, use stringprep_nameprep() for
0418  * true AllowUnassigned.  Returns 0 iff successful, or an error code.
0419  **/
0420 
0421 /**
0422  * stringprep_iscsi:
0423  * @in: input/ouput array with string to prepare.
0424  * @maxlen: maximum length of input/output array.
0425  *
0426  * Prepare the input UTF-8 string according to the draft iSCSI
0427  * stringprep profile.  Returns 0 iff successful, or an error code.
0428  **/
0429 
0430 /**
0431  * stringprep_kerberos5:
0432  * @in: input/ouput array with string to prepare.
0433  * @maxlen: maximum length of input/output array.
0434  *
0435  * Prepare the input UTF-8 string according to the draft Kerberos5
0436  * stringprep profile.  Returns 0 iff successful, or an error code.
0437  **/
0438 
0439 /**
0440  * stringprep_plain:
0441  * @in: input/ouput array with string to prepare.
0442  * @maxlen: maximum length of input/output array.
0443  *
0444  * Prepare the input UTF-8 string according to the draft SASL
0445  * ANONYMOUS profile.  Returns 0 iff successful, or an error code.
0446  **/
0447 
0448 /**
0449  * stringprep_xmpp_nodeprep:
0450  * @in: input/ouput array with string to prepare.
0451  * @maxlen: maximum length of input/output array.
0452  *
0453  * Prepare the input UTF-8 string according to the draft XMPP node
0454  * identifier profile.  Returns 0 iff successful, or an error code.
0455  **/
0456 
0457 /**
0458  * stringprep_xmpp_resourceprep:
0459  * @in: input/ouput array with string to prepare.
0460  * @maxlen: maximum length of input/output array.
0461  *
0462  * Prepare the input UTF-8 string according to the draft XMPP resource
0463  * identifier profile.  Returns 0 iff successful, or an error code.
0464  **/
0465 
0466 /**
0467  * stringprep_generic:
0468  * @in: input/ouput array with string to prepare.
0469  * @maxlen: maximum length of input/output array.
0470  *
0471  * Prepare the input UTF-8 string according to a hypotetical "generic"
0472  * stringprep profile. This is mostly used for debugging or when
0473  * constructing new stringprep profiles. Returns 0 iff successful, or
0474  * an error code.
0475  **/