File indexing completed on 2024-05-12 15:59:59
0001 /* This file is part of the KDE project 0002 SPDX-FileCopyrightText: 2005-2006 Ariya Hidayat <ariya@kde.org> 0003 SPDX-FileCopyrightText: 2015 Friedrich W. H. Kossebau <kossebau@kde.org> 0004 0005 SPDX-License-Identifier: LGPL-2.0-or-later 0006 */ 0007 0008 #include "KoLZF.h" 0009 0010 #include <QByteArray> 0011 #include <QDebug> 0012 0013 0014 namespace KoLZF { 0015 0016 #define HASH_LOG 12 0017 #define HASH_SIZE (1<< HASH_LOG) 0018 #define HASH_MASK (HASH_SIZE-1) 0019 0020 #ifdef __GNUC__ 0021 #pragma GCC diagnostic ignored "-Wcast-align" 0022 #endif 0023 #define UPDATE_HASH(v,p) { v = *((quint16*)p); v ^= *((quint16*)(p+1))^(v>>(16-HASH_LOG)); } 0024 0025 #define MAX_COPY 32 0026 #define MAX_LEN 264 /* 256 + 8 */ 0027 #define MAX_DISTANCE 8192 0028 0029 // Lossless compression using LZF algorithm, this is faster on modern CPU than 0030 // the original implementation in http://liblzf.plan9.de/ 0031 int compress(const void* input, int length, void* output, int maxout) 0032 { 0033 if (input == 0 || length < 1 || output == 0 || maxout < 2) { 0034 return 0; 0035 } 0036 0037 const quint8* ip = (const quint8*) input; 0038 const quint8* ip_limit = ip + length - MAX_COPY - 4; 0039 quint8* op = (quint8*) output; 0040 const quint8* last_op = (quint8*) output + maxout - 1; 0041 0042 const quint8* htab[HASH_SIZE]; 0043 const quint8** hslot; 0044 quint32 hval; 0045 0046 quint8* ref; 0047 qint32 copy; 0048 qint32 len; 0049 qint32 distance; 0050 quint8* anchor; 0051 0052 /* initializes hash table */ 0053 for (hslot = htab; hslot < htab + HASH_SIZE; ++hslot) { 0054 *hslot = ip; 0055 } 0056 0057 /* we start with literal copy */ 0058 copy = 0; 0059 *op++ = MAX_COPY - 1; 0060 0061 /* main loop */ 0062 while (ip < ip_limit) { 0063 /* find potential match */ 0064 UPDATE_HASH(hval, ip); 0065 hslot = htab + (hval & HASH_MASK); 0066 ref = (quint8*) * hslot; 0067 0068 /* update hash table */ 0069 *hslot = ip; 0070 0071 /* find itself? then it's no match */ 0072 if (ip == ref) 0073 goto literal; 0074 0075 /* is this a match? check the first 2 bytes */ 0076 if (*((quint16*)ref) != *((quint16*)ip)) 0077 goto literal; 0078 0079 /* now check the 3rd byte */ 0080 if (ref[2] != ip[2]) 0081 goto literal; 0082 0083 /* calculate distance to the match */ 0084 distance = ip - ref; 0085 0086 /* skip if too far away */ 0087 if (distance >= MAX_DISTANCE) 0088 goto literal; 0089 0090 /* here we have 3-byte matches */ 0091 anchor = (quint8*)ip; 0092 len = 3; 0093 ref += 3; 0094 ip += 3; 0095 0096 /* now we have to check how long the match is */ 0097 if (ip < ip_limit - MAX_LEN) { 0098 while (len < MAX_LEN - 8) { 0099 /* unroll 8 times */ 0100 if (*ref++ != *ip++) break; 0101 if (*ref++ != *ip++) break; 0102 if (*ref++ != *ip++) break; 0103 if (*ref++ != *ip++) break; 0104 if (*ref++ != *ip++) break; 0105 if (*ref++ != *ip++) break; 0106 if (*ref++ != *ip++) break; 0107 if (*ref++ != *ip++) break; 0108 len += 8; 0109 } 0110 --ip; 0111 } 0112 len = ip - anchor; 0113 0114 /* just before the last non-matching byte */ 0115 ip = anchor + len; 0116 0117 /* if we have copied something, adjust the copy count */ 0118 if (copy) { 0119 /* copy is biased, '0' means 1 byte copy */ 0120 anchor = anchor - copy - 1; 0121 *(op - copy - 1) = copy - 1; 0122 copy = 0; 0123 } else { 0124 /* back, to overwrite the copy count */ 0125 --op; 0126 } 0127 0128 /* length is biased, '1' means a match of 3 bytes */ 0129 len -= 2; 0130 0131 /* distance is also biased */ 0132 --distance; 0133 0134 /* encode the match */ 0135 if (len < 7) { 0136 if (op + 2 > last_op) { 0137 return 0; 0138 } 0139 *op++ = (len << 5) + (distance >> 8); 0140 } else { 0141 if (op + 3 > last_op) { 0142 return 0; 0143 } 0144 *op++ = (7 << 5) + (distance >> 8); 0145 *op++ = len - 7; 0146 } 0147 *op++ = (distance & 255); 0148 0149 /* assuming next will be literal copy */ 0150 *op++ = MAX_COPY - 1; 0151 0152 /* update the hash at match boundary */ 0153 --ip; 0154 UPDATE_HASH(hval, ip); 0155 htab[hval & HASH_MASK] = ip; 0156 ++ip; 0157 0158 continue; 0159 0160 literal: 0161 if (op + 1 > last_op) { 0162 return 0; 0163 } 0164 *op++ = *ip++; 0165 ++copy; 0166 if (copy >= MAX_COPY) { 0167 // start next literal copy item 0168 copy = 0; 0169 *op++ = MAX_COPY - 1; 0170 } 0171 } 0172 0173 /* left-over as literal copy */ 0174 ip_limit = (const quint8*)input + length; 0175 0176 // TODO: smart calculation to see here if enough output is left 0177 0178 while (ip < ip_limit) { 0179 if (op == last_op) { 0180 return 0; 0181 } 0182 *op++ = *ip++; 0183 ++copy; 0184 if (copy == MAX_COPY) { 0185 // start next literal copy item 0186 copy = 0; 0187 if (ip < ip_limit) { 0188 if (op == last_op) { 0189 return 0; 0190 } 0191 *op++ = MAX_COPY - 1; 0192 } else { 0193 // do not write possibly out of bounds 0194 // just pretend we moved one more, for the final treatment 0195 ++op; 0196 } 0197 } 0198 } 0199 0200 /* if we have copied something, adjust the copy length */ 0201 if (copy) { 0202 *(op - copy - 1) = copy - 1; 0203 } else { 0204 --op; 0205 } 0206 0207 return op - (quint8*)output; 0208 } 0209 0210 int decompress(const void* input, int length, void* output, int maxout) 0211 { 0212 if (input == 0 || length < 1) { 0213 return 0; 0214 } 0215 if (output == 0 || maxout < 1) { 0216 return 0; 0217 } 0218 0219 const quint8* ip = (const quint8*) input; 0220 const quint8* ip_limit = ip + length - 1; 0221 quint8* op = (quint8*) output; 0222 quint8* op_limit = op + maxout; 0223 quint8* ref; 0224 0225 while (ip < ip_limit) { 0226 quint32 ctrl = (*ip) + 1; 0227 quint32 ofs = ((*ip) & 31) << 8; 0228 quint32 len = (*ip++) >> 5; 0229 0230 if (ctrl < 33) { 0231 /* literal copy */ 0232 if (op + ctrl > op_limit) 0233 return 0; 0234 0235 /* crazy unrolling */ 0236 if (ctrl) { 0237 *op++ = *ip++; 0238 --ctrl; 0239 0240 if (ctrl) { 0241 *op++ = *ip++; 0242 --ctrl; 0243 0244 if (ctrl) { 0245 *op++ = *ip++; 0246 --ctrl; 0247 0248 for (;ctrl; --ctrl) 0249 *op++ = *ip++; 0250 } 0251 } 0252 } 0253 } else { 0254 /* back reference */ 0255 --len; 0256 ref = op - ofs; 0257 --ref; 0258 0259 if (len == 7 - 1) 0260 len += *ip++; 0261 0262 ref -= *ip++; 0263 0264 if (op + len + 3 > op_limit) 0265 return 0; 0266 0267 if (ref < (quint8 *)output) 0268 return 0; 0269 0270 *op++ = *ref++; 0271 *op++ = *ref++; 0272 *op++ = *ref++; 0273 if (len) 0274 for (; len; --len) 0275 *op++ = *ref++; 0276 } 0277 } 0278 0279 return op - (quint8*)output; 0280 } 0281 0282 0283 QByteArray compress(const QByteArray& input) 0284 { 0285 const void* const in_data = (const void*) input.constData(); 0286 unsigned int in_len = (unsigned int)input.size(); 0287 0288 QByteArray output; 0289 output.resize(in_len + 4 + 1); 0290 0291 // we use 4 bytes to store uncompressed length 0292 // and 1 extra byte as flag (0=uncompressed, 1=compressed) 0293 output[0] = in_len & 255; 0294 output[1] = (in_len >> 8) & 255; 0295 output[2] = (in_len >> 16) & 255; 0296 output[3] = (in_len >> 24) & 255; 0297 output[4] = 1; 0298 0299 unsigned int out_len = in_len - 1; 0300 unsigned char* out_data = (unsigned char*) output.data() + 5; 0301 0302 unsigned int len = compress(in_data, in_len, out_data, out_len); 0303 0304 if ((len > out_len) || (len == 0)) { 0305 // output buffer is too small, likely because the data can't 0306 // be compressed. so here just copy without compression 0307 output.replace(5, output.size()-5, input); 0308 0309 // flag must indicate "uncompressed block" 0310 output[4] = 0; 0311 } else { 0312 output.resize(len + 4 + 1); 0313 } 0314 0315 // minimize memory 0316 output.squeeze(); 0317 0318 return output; 0319 } 0320 0321 // will not squeeze output 0322 void decompress(const QByteArray& input, QByteArray& output) 0323 { 0324 Q_ASSERT(input.size() >= 5); 0325 0326 // read out first how big is the uncompressed size 0327 unsigned int unpack_size = 0; 0328 unpack_size |= ((quint8)input[0]); 0329 unpack_size |= ((quint8)input[1]) << 8; 0330 unpack_size |= ((quint8)input[2]) << 16; 0331 unpack_size |= ((quint8)input[3]) << 24; 0332 0333 // prepare the output 0334 output.resize(unpack_size); 0335 0336 // compression flag 0337 quint8 flag = (quint8)input[4]; 0338 0339 // prepare for lzf 0340 const void* const in_data = (const void*)(input.constData() + 5); 0341 unsigned int in_len = (unsigned int)input.size() - 5; 0342 unsigned char* out_data = (unsigned char*) output.data(); 0343 unsigned int out_len = (unsigned int)unpack_size; 0344 0345 if (flag == 0) { 0346 memcpy(output.data(), in_data, in_len); 0347 } else { 0348 unsigned int len = decompress(in_data, in_len, out_data, out_len); 0349 Q_ASSERT(len == out_len); 0350 Q_UNUSED(len); 0351 } 0352 } 0353 0354 }