File indexing completed on 2025-02-02 04:26:01

0001 /* Copyright 2015 the unarr project authors (see AUTHORS file).
0002    License: LGPLv3 */
0003 
0004 /* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADRARParser.m */
0005 
0006 #include "rar.h"
0007 
0008 static inline uint8_t uint8le(unsigned char *data) { return data[0]; }
0009 static inline uint16_t uint16le(unsigned char *data) { return data[0] | data[1] << 8; }
0010 static inline uint32_t uint32le(unsigned char *data) { return data[0] | data[1] << 8 | data[2] << 16 | data[3] << 24; }
0011 
0012 bool rar_parse_header(ar_archive *ar, struct rar_header *header)
0013 {
0014     unsigned char header_data[7];
0015     size_t read = ar_read(ar->stream, header_data, sizeof(header_data));
0016     if (read == 0) {
0017         ar->at_eof = true;
0018         return false;
0019     }
0020     if (read < sizeof(header_data))
0021         return false;
0022 
0023     header->crc = uint16le(header_data + 0);
0024     header->type = uint8le(header_data + 2);
0025     header->flags = uint16le(header_data + 3);
0026     header->size = uint16le(header_data + 5);
0027 
0028     header->datasize = 0;
0029     if ((header->flags & LHD_LONG_BLOCK) || header->type == 0x74) {
0030         unsigned char size_data[4];
0031         if (!(header->flags & LHD_LONG_BLOCK))
0032             log("File header without LHD_LONG_BLOCK set");
0033         read += ar_read(ar->stream, size_data, sizeof(size_data));
0034         if (read < sizeof(header_data) + sizeof(size_data))
0035             return false;
0036         header->datasize = uint32le(size_data);
0037     }
0038 
0039     if (header->size < read) {
0040         warn("Invalid header size %d", header->size);
0041         return false;
0042     }
0043 
0044     return true;
0045 }
0046 
0047 bool rar_check_header_crc(ar_archive *ar)
0048 {
0049     unsigned char buffer[256];
0050     uint16_t crc16, size;
0051     uint32_t crc32;
0052 
0053     if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET))
0054         return false;
0055     if (ar_read(ar->stream, buffer, 7) != 7)
0056         return false;
0057 
0058     crc16 = uint16le(buffer + 0);
0059     size = uint16le(buffer + 5);
0060     if (size < 7)
0061         return false;
0062     size -= 7;
0063 
0064     crc32 = ar_crc32(0, buffer + 2, 5);
0065     while (size > 0) {
0066         if (ar_read(ar->stream, buffer, smin(size, sizeof(buffer))) != smin(size, sizeof(buffer)))
0067             return false;
0068         crc32 = ar_crc32(crc32, buffer, smin(size, sizeof(buffer)));
0069         size -= (uint16_t)smin(size, sizeof(buffer));
0070     }
0071     return (crc32 & 0xFFFF) == crc16;
0072 }
0073 
0074 bool rar_parse_header_entry(ar_archive_rar *rar, struct rar_header *header, struct rar_entry *entry)
0075 {
0076     unsigned char data[21];
0077     if (ar_read(rar->super.stream, data, sizeof(data)) != sizeof(data))
0078         return false;
0079 
0080     entry->size = uint32le(data + 0);
0081     entry->os = uint8le(data + 4);
0082     entry->crc = uint32le(data + 5);
0083     entry->dosdate = uint32le(data + 9);
0084     entry->version = uint8le(data + 13);
0085     entry->method = uint8le(data + 14);
0086     entry->namelen = uint16le(data + 15);
0087     entry->attrs = uint32le(data + 17);
0088     if ((header->flags & LHD_LARGE)) {
0089         unsigned char more_data[8];
0090         if (ar_read(rar->super.stream, more_data, sizeof(more_data)) != sizeof(more_data))
0091             return false;
0092         header->datasize += (uint64_t)uint32le(more_data + 0);
0093         entry->size += (uint64_t)uint32le(more_data + 4);
0094     }
0095     if (!ar_skip(rar->super.stream, entry->namelen))
0096         return false;
0097     if ((header->flags & LHD_SALT)) {
0098         log("Skipping LHD_SALT");
0099         ar_skip(rar->super.stream, 8);
0100     }
0101 
0102     rar->entry.version = entry->version;
0103     rar->entry.method = entry->method;
0104     rar->entry.crc = entry->crc;
0105     rar->entry.header_size = header->size;
0106     rar->entry.solid = entry->version < 20 ? (rar->archive_flags & MHD_SOLID) : (header->flags & LHD_SOLID);
0107     free(rar->entry.name);
0108     rar->entry.name = NULL;
0109 
0110     return true;
0111 }
0112 
0113 /* this seems to be what RAR considers "Unicode" */
0114 static char *rar_conv_unicode_to_utf8(const char *data, uint16_t len)
0115 {
0116 #define Check(cond) if (!(cond)) { free(str); return NULL; } else ((void)0)
0117 
0118     uint8_t highbyte, flagbyte, flagbits, size, length, i;
0119     const uint8_t *in = (uint8_t *)data + strlen(data) + 1;
0120     const uint8_t *end_in = (uint8_t *)data + len;
0121     char *str = calloc(len + 1, 3);
0122     char *out = str;
0123     char *end_out = str + len * 3;
0124 
0125     if (!str)
0126         return NULL;
0127     if (end_in - in <= 1) {
0128         memcpy(str, data, len);
0129         return str;
0130     }
0131 
0132     highbyte = *in++;
0133     flagbyte = 0;
0134     flagbits = 0;
0135     size = 0;
0136 
0137     while (in < end_in && out < end_out) {
0138         if (flagbits == 0) {
0139             flagbyte = *in++;
0140             flagbits = 8;
0141         }
0142         flagbits -= 2;
0143         switch ((flagbyte >> flagbits) & 3) {
0144         case 0:
0145             Check(in + 1 <= end_in);
0146             out += ar_conv_rune_to_utf8(*in++, out, end_out - out);
0147             size++;
0148             break;
0149         case 1:
0150             Check(in + 1 <= end_in);
0151             out += ar_conv_rune_to_utf8(((uint16_t)highbyte << 8) | *in++, out, end_out - out);
0152             size++;
0153             break;
0154         case 2:
0155             Check(in + 2 <= end_in);
0156             out += ar_conv_rune_to_utf8(((uint16_t)*(in + 1) << 8) | *in, out, end_out - out);
0157             in += 2;
0158             size++;
0159             break;
0160         case 3:
0161             Check(in + 1 <= end_in);
0162             length = *in++;
0163             if ((length & 0x80)) {
0164                 uint8_t correction = *in++;
0165                 for (i = 0; i < (length & 0x7F) + 2; i++) {
0166                     Check(size < len);
0167                     out += ar_conv_rune_to_utf8(((uint16_t)highbyte << 8) | (data[size] + (correction & 0xFF)), out, end_out - out);
0168                     size++;
0169                 }
0170             }
0171             else {
0172                 for (i = 0; i < (length & 0x7F) + 2; i++) {
0173                     Check(size < len);
0174                     out += ar_conv_rune_to_utf8(data[size], out, end_out - out);
0175                     size++;
0176                 }
0177             }
0178             break;
0179         }
0180     }
0181 
0182     return str;
0183 
0184 #undef Check
0185 }
0186 
0187 const char *rar_get_name(ar_archive *ar)
0188 {
0189     ar_archive_rar *rar = (ar_archive_rar *)ar;
0190     if (!rar->entry.name) {
0191         unsigned char data[21];
0192         uint16_t namelen;
0193         char *name;
0194 
0195         struct rar_header header;
0196         if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET))
0197             return NULL;
0198         if (!rar_parse_header(ar, &header))
0199             return NULL;
0200         if (ar_read(ar->stream, data, sizeof(data)) != sizeof(data))
0201             return NULL;
0202         if ((header.flags & LHD_LARGE) && !ar_skip(ar->stream, 8))
0203             return NULL;
0204 
0205         namelen = uint16le(data + 15);
0206         name = malloc(namelen + 1);
0207         if (!name || ar_read(ar->stream, name, namelen) != namelen) {
0208             free(name);
0209             return NULL;
0210         }
0211         name[namelen] = '\0';
0212 
0213         if (!(header.flags & LHD_UNICODE)) {
0214             rar->entry.name = ar_conv_dos_to_utf8(name);
0215             free(name);
0216         }
0217         else if (namelen == strlen(name)) {
0218             rar->entry.name = name;
0219         }
0220         else {
0221             rar->entry.name = rar_conv_unicode_to_utf8(name, namelen);
0222             free(name);
0223         }
0224         /* normalize path separators */
0225         if (rar->entry.name) {
0226             char *p = rar->entry.name;
0227             while ((p = strchr(p, '\\')) != NULL) {
0228                 *p = '/';
0229             }
0230         }
0231 
0232         if (!ar_seek(ar->stream, ar->entry_offset + rar->entry.header_size, SEEK_SET))
0233             warn("Couldn't seek back to the end of the entry header");
0234     }
0235     return rar->entry.name;
0236 }