File indexing completed on 2025-02-02 04:26:01
0001 /* Copyright 2015 the unarr project authors (see AUTHORS file). 0002 License: LGPLv3 */ 0003 0004 /* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADRARParser.m */ 0005 0006 #include "rar.h" 0007 0008 static inline uint8_t uint8le(unsigned char *data) { return data[0]; } 0009 static inline uint16_t uint16le(unsigned char *data) { return data[0] | data[1] << 8; } 0010 static inline uint32_t uint32le(unsigned char *data) { return data[0] | data[1] << 8 | data[2] << 16 | data[3] << 24; } 0011 0012 bool rar_parse_header(ar_archive *ar, struct rar_header *header) 0013 { 0014 unsigned char header_data[7]; 0015 size_t read = ar_read(ar->stream, header_data, sizeof(header_data)); 0016 if (read == 0) { 0017 ar->at_eof = true; 0018 return false; 0019 } 0020 if (read < sizeof(header_data)) 0021 return false; 0022 0023 header->crc = uint16le(header_data + 0); 0024 header->type = uint8le(header_data + 2); 0025 header->flags = uint16le(header_data + 3); 0026 header->size = uint16le(header_data + 5); 0027 0028 header->datasize = 0; 0029 if ((header->flags & LHD_LONG_BLOCK) || header->type == 0x74) { 0030 unsigned char size_data[4]; 0031 if (!(header->flags & LHD_LONG_BLOCK)) 0032 log("File header without LHD_LONG_BLOCK set"); 0033 read += ar_read(ar->stream, size_data, sizeof(size_data)); 0034 if (read < sizeof(header_data) + sizeof(size_data)) 0035 return false; 0036 header->datasize = uint32le(size_data); 0037 } 0038 0039 if (header->size < read) { 0040 warn("Invalid header size %d", header->size); 0041 return false; 0042 } 0043 0044 return true; 0045 } 0046 0047 bool rar_check_header_crc(ar_archive *ar) 0048 { 0049 unsigned char buffer[256]; 0050 uint16_t crc16, size; 0051 uint32_t crc32; 0052 0053 if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET)) 0054 return false; 0055 if (ar_read(ar->stream, buffer, 7) != 7) 0056 return false; 0057 0058 crc16 = uint16le(buffer + 0); 0059 size = uint16le(buffer + 5); 0060 if (size < 7) 0061 return false; 0062 size -= 7; 0063 0064 crc32 = ar_crc32(0, buffer + 2, 5); 0065 while (size > 0) { 0066 if (ar_read(ar->stream, buffer, smin(size, sizeof(buffer))) != smin(size, sizeof(buffer))) 0067 return false; 0068 crc32 = ar_crc32(crc32, buffer, smin(size, sizeof(buffer))); 0069 size -= (uint16_t)smin(size, sizeof(buffer)); 0070 } 0071 return (crc32 & 0xFFFF) == crc16; 0072 } 0073 0074 bool rar_parse_header_entry(ar_archive_rar *rar, struct rar_header *header, struct rar_entry *entry) 0075 { 0076 unsigned char data[21]; 0077 if (ar_read(rar->super.stream, data, sizeof(data)) != sizeof(data)) 0078 return false; 0079 0080 entry->size = uint32le(data + 0); 0081 entry->os = uint8le(data + 4); 0082 entry->crc = uint32le(data + 5); 0083 entry->dosdate = uint32le(data + 9); 0084 entry->version = uint8le(data + 13); 0085 entry->method = uint8le(data + 14); 0086 entry->namelen = uint16le(data + 15); 0087 entry->attrs = uint32le(data + 17); 0088 if ((header->flags & LHD_LARGE)) { 0089 unsigned char more_data[8]; 0090 if (ar_read(rar->super.stream, more_data, sizeof(more_data)) != sizeof(more_data)) 0091 return false; 0092 header->datasize += (uint64_t)uint32le(more_data + 0); 0093 entry->size += (uint64_t)uint32le(more_data + 4); 0094 } 0095 if (!ar_skip(rar->super.stream, entry->namelen)) 0096 return false; 0097 if ((header->flags & LHD_SALT)) { 0098 log("Skipping LHD_SALT"); 0099 ar_skip(rar->super.stream, 8); 0100 } 0101 0102 rar->entry.version = entry->version; 0103 rar->entry.method = entry->method; 0104 rar->entry.crc = entry->crc; 0105 rar->entry.header_size = header->size; 0106 rar->entry.solid = entry->version < 20 ? (rar->archive_flags & MHD_SOLID) : (header->flags & LHD_SOLID); 0107 free(rar->entry.name); 0108 rar->entry.name = NULL; 0109 0110 return true; 0111 } 0112 0113 /* this seems to be what RAR considers "Unicode" */ 0114 static char *rar_conv_unicode_to_utf8(const char *data, uint16_t len) 0115 { 0116 #define Check(cond) if (!(cond)) { free(str); return NULL; } else ((void)0) 0117 0118 uint8_t highbyte, flagbyte, flagbits, size, length, i; 0119 const uint8_t *in = (uint8_t *)data + strlen(data) + 1; 0120 const uint8_t *end_in = (uint8_t *)data + len; 0121 char *str = calloc(len + 1, 3); 0122 char *out = str; 0123 char *end_out = str + len * 3; 0124 0125 if (!str) 0126 return NULL; 0127 if (end_in - in <= 1) { 0128 memcpy(str, data, len); 0129 return str; 0130 } 0131 0132 highbyte = *in++; 0133 flagbyte = 0; 0134 flagbits = 0; 0135 size = 0; 0136 0137 while (in < end_in && out < end_out) { 0138 if (flagbits == 0) { 0139 flagbyte = *in++; 0140 flagbits = 8; 0141 } 0142 flagbits -= 2; 0143 switch ((flagbyte >> flagbits) & 3) { 0144 case 0: 0145 Check(in + 1 <= end_in); 0146 out += ar_conv_rune_to_utf8(*in++, out, end_out - out); 0147 size++; 0148 break; 0149 case 1: 0150 Check(in + 1 <= end_in); 0151 out += ar_conv_rune_to_utf8(((uint16_t)highbyte << 8) | *in++, out, end_out - out); 0152 size++; 0153 break; 0154 case 2: 0155 Check(in + 2 <= end_in); 0156 out += ar_conv_rune_to_utf8(((uint16_t)*(in + 1) << 8) | *in, out, end_out - out); 0157 in += 2; 0158 size++; 0159 break; 0160 case 3: 0161 Check(in + 1 <= end_in); 0162 length = *in++; 0163 if ((length & 0x80)) { 0164 uint8_t correction = *in++; 0165 for (i = 0; i < (length & 0x7F) + 2; i++) { 0166 Check(size < len); 0167 out += ar_conv_rune_to_utf8(((uint16_t)highbyte << 8) | (data[size] + (correction & 0xFF)), out, end_out - out); 0168 size++; 0169 } 0170 } 0171 else { 0172 for (i = 0; i < (length & 0x7F) + 2; i++) { 0173 Check(size < len); 0174 out += ar_conv_rune_to_utf8(data[size], out, end_out - out); 0175 size++; 0176 } 0177 } 0178 break; 0179 } 0180 } 0181 0182 return str; 0183 0184 #undef Check 0185 } 0186 0187 const char *rar_get_name(ar_archive *ar) 0188 { 0189 ar_archive_rar *rar = (ar_archive_rar *)ar; 0190 if (!rar->entry.name) { 0191 unsigned char data[21]; 0192 uint16_t namelen; 0193 char *name; 0194 0195 struct rar_header header; 0196 if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET)) 0197 return NULL; 0198 if (!rar_parse_header(ar, &header)) 0199 return NULL; 0200 if (ar_read(ar->stream, data, sizeof(data)) != sizeof(data)) 0201 return NULL; 0202 if ((header.flags & LHD_LARGE) && !ar_skip(ar->stream, 8)) 0203 return NULL; 0204 0205 namelen = uint16le(data + 15); 0206 name = malloc(namelen + 1); 0207 if (!name || ar_read(ar->stream, name, namelen) != namelen) { 0208 free(name); 0209 return NULL; 0210 } 0211 name[namelen] = '\0'; 0212 0213 if (!(header.flags & LHD_UNICODE)) { 0214 rar->entry.name = ar_conv_dos_to_utf8(name); 0215 free(name); 0216 } 0217 else if (namelen == strlen(name)) { 0218 rar->entry.name = name; 0219 } 0220 else { 0221 rar->entry.name = rar_conv_unicode_to_utf8(name, namelen); 0222 free(name); 0223 } 0224 /* normalize path separators */ 0225 if (rar->entry.name) { 0226 char *p = rar->entry.name; 0227 while ((p = strchr(p, '\\')) != NULL) { 0228 *p = '/'; 0229 } 0230 } 0231 0232 if (!ar_seek(ar->stream, ar->entry_offset + rar->entry.header_size, SEEK_SET)) 0233 warn("Couldn't seek back to the end of the entry header"); 0234 } 0235 return rar->entry.name; 0236 }