Warning, file /office/calligra/libs/store/KoLZF.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 /* This file is part of the KDE project 0002 Copyright (C) 2005-2006 Ariya Hidayat <ariya@kde.org> 0003 Copyright (C) 2015 Friedrich W. H. Kossebau <kossebau@kde.org> 0004 0005 This library is free software; you can redistribute it and/or 0006 modify it under the terms of the GNU Library General Public 0007 License as published by the Free Software Foundation; either 0008 version 2 of the License, or (at your option) any later version. 0009 0010 This library is distributed in the hope that it will be useful, 0011 but WITHOUT ANY WARRANTY; without even the implied warranty of 0012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0013 Library General Public License for more details. 0014 0015 You should have received a copy of the GNU Library General Public License 0016 along with this library; see the file COPYING.LIB. If not, write to 0017 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 0018 Boston, MA 02110-1301, USA. 0019 */ 0020 0021 #include "KoLZF.h" 0022 0023 #include <QByteArray> 0024 #include <QDebug> 0025 0026 0027 namespace KoLZF { 0028 0029 #define HASH_LOG 12 0030 #define HASH_SIZE (1<< HASH_LOG) 0031 #define HASH_MASK (HASH_SIZE-1) 0032 0033 #ifdef __GNUC__ 0034 #pragma GCC diagnostic ignored "-Wcast-align" 0035 #endif 0036 #define UPDATE_HASH(v,p) { v = *((quint16*)p); v ^= *((quint16*)(p+1))^(v>>(16-HASH_LOG)); } 0037 0038 #define MAX_COPY 32 0039 #define MAX_LEN 264 /* 256 + 8 */ 0040 #define MAX_DISTANCE 8192 0041 0042 // Lossless compression using LZF algorithm, this is faster on modern CPU than 0043 // the original implementation in http://liblzf.plan9.de/ 0044 int compress(const void* input, int length, void* output, int maxout) 0045 { 0046 if (input == 0 || length < 1 || output == 0 || maxout < 2) { 0047 return 0; 0048 } 0049 0050 const quint8* ip = (const quint8*) input; 0051 const quint8* ip_limit = ip + length - MAX_COPY - 4; 0052 quint8* op = (quint8*) output; 0053 const quint8* last_op = (quint8*) output + maxout - 1; 0054 0055 const quint8* htab[HASH_SIZE]; 0056 const quint8** hslot; 0057 quint32 hval; 0058 0059 quint8* ref; 0060 qint32 copy; 0061 qint32 len; 0062 qint32 distance; 0063 quint8* anchor; 0064 0065 /* initializes hash table */ 0066 for (hslot = htab; hslot < htab + HASH_SIZE; ++hslot) { 0067 *hslot = ip; 0068 } 0069 0070 /* we start with literal copy */ 0071 copy = 0; 0072 *op++ = MAX_COPY - 1; 0073 0074 /* main loop */ 0075 while (ip < ip_limit) { 0076 /* find potential match */ 0077 UPDATE_HASH(hval, ip); 0078 hslot = htab + (hval & HASH_MASK); 0079 ref = (quint8*) * hslot; 0080 0081 /* update hash table */ 0082 *hslot = ip; 0083 0084 /* find itself? then it's no match */ 0085 if (ip == ref) 0086 goto literal; 0087 0088 /* is this a match? check the first 2 bytes */ 0089 if (*((quint16*)ref) != *((quint16*)ip)) 0090 goto literal; 0091 0092 /* now check the 3rd byte */ 0093 if (ref[2] != ip[2]) 0094 goto literal; 0095 0096 /* calculate distance to the match */ 0097 distance = ip - ref; 0098 0099 /* skip if too far away */ 0100 if (distance >= MAX_DISTANCE) 0101 goto literal; 0102 0103 /* here we have 3-byte matches */ 0104 anchor = (quint8*)ip; 0105 len = 3; 0106 ref += 3; 0107 ip += 3; 0108 0109 /* now we have to check how long the match is */ 0110 if (ip < ip_limit - MAX_LEN) { 0111 while (len < MAX_LEN - 8) { 0112 /* unroll 8 times */ 0113 if (*ref++ != *ip++) break; 0114 if (*ref++ != *ip++) break; 0115 if (*ref++ != *ip++) break; 0116 if (*ref++ != *ip++) break; 0117 if (*ref++ != *ip++) break; 0118 if (*ref++ != *ip++) break; 0119 if (*ref++ != *ip++) break; 0120 if (*ref++ != *ip++) break; 0121 len += 8; 0122 } 0123 --ip; 0124 } 0125 len = ip - anchor; 0126 0127 /* just before the last non-matching byte */ 0128 ip = anchor + len; 0129 0130 /* if we have copied something, adjust the copy count */ 0131 if (copy) { 0132 /* copy is biased, '0' means 1 byte copy */ 0133 anchor = anchor - copy - 1; 0134 *(op - copy - 1) = copy - 1; 0135 copy = 0; 0136 } else { 0137 /* back, to overwrite the copy count */ 0138 --op; 0139 } 0140 0141 /* length is biased, '1' means a match of 3 bytes */ 0142 len -= 2; 0143 0144 /* distance is also biased */ 0145 --distance; 0146 0147 /* encode the match */ 0148 if (len < 7) { 0149 if (op + 2 > last_op) { 0150 return 0; 0151 } 0152 *op++ = (len << 5) + (distance >> 8); 0153 } else { 0154 if (op + 3 > last_op) { 0155 return 0; 0156 } 0157 *op++ = (7 << 5) + (distance >> 8); 0158 *op++ = len - 7; 0159 } 0160 *op++ = (distance & 255); 0161 0162 /* assuming next will be literal copy */ 0163 *op++ = MAX_COPY - 1; 0164 0165 /* update the hash at match boundary */ 0166 --ip; 0167 UPDATE_HASH(hval, ip); 0168 htab[hval & HASH_MASK] = ip; 0169 ++ip; 0170 0171 continue; 0172 0173 literal: 0174 if (op + 1 > last_op) { 0175 return 0; 0176 } 0177 *op++ = *ip++; 0178 ++copy; 0179 if (copy >= MAX_COPY) { 0180 // start next literal copy item 0181 copy = 0; 0182 *op++ = MAX_COPY - 1; 0183 } 0184 } 0185 0186 /* left-over as literal copy */ 0187 ip_limit = (const quint8*)input + length; 0188 0189 // TODO: smart calculation to see here if enough output is left 0190 0191 while (ip < ip_limit) { 0192 if (op == last_op) { 0193 return 0; 0194 } 0195 *op++ = *ip++; 0196 ++copy; 0197 if (copy == MAX_COPY) { 0198 // start next literal copy item 0199 copy = 0; 0200 if (ip < ip_limit) { 0201 if (op == last_op) { 0202 return 0; 0203 } 0204 *op++ = MAX_COPY - 1; 0205 } else { 0206 // do not write possibly out of bounds 0207 // just pretend we moved one more, for the final treatment 0208 ++op; 0209 } 0210 } 0211 } 0212 0213 /* if we have copied something, adjust the copy length */ 0214 if (copy) { 0215 *(op - copy - 1) = copy - 1; 0216 } else { 0217 --op; 0218 } 0219 0220 return op - (quint8*)output; 0221 } 0222 0223 int decompress(const void* input, int length, void* output, int maxout) 0224 { 0225 if (input == 0 || length < 1) { 0226 return 0; 0227 } 0228 if (output == 0 || maxout < 1) { 0229 return 0; 0230 } 0231 0232 const quint8* ip = (const quint8*) input; 0233 const quint8* ip_limit = ip + length - 1; 0234 quint8* op = (quint8*) output; 0235 quint8* op_limit = op + maxout; 0236 quint8* ref; 0237 0238 while (ip < ip_limit) { 0239 quint32 ctrl = (*ip) + 1; 0240 quint32 ofs = ((*ip) & 31) << 8; 0241 quint32 len = (*ip++) >> 5; 0242 0243 if (ctrl < 33) { 0244 /* literal copy */ 0245 if (op + ctrl > op_limit) 0246 return 0; 0247 0248 /* crazy unrolling */ 0249 if (ctrl) { 0250 *op++ = *ip++; 0251 --ctrl; 0252 0253 if (ctrl) { 0254 *op++ = *ip++; 0255 --ctrl; 0256 0257 if (ctrl) { 0258 *op++ = *ip++; 0259 --ctrl; 0260 0261 for (;ctrl; --ctrl) 0262 *op++ = *ip++; 0263 } 0264 } 0265 } 0266 } else { 0267 /* back reference */ 0268 --len; 0269 ref = op - ofs; 0270 --ref; 0271 0272 if (len == 7 - 1) 0273 len += *ip++; 0274 0275 ref -= *ip++; 0276 0277 if (op + len + 3 > op_limit) 0278 return 0; 0279 0280 if (ref < (quint8 *)output) 0281 return 0; 0282 0283 *op++ = *ref++; 0284 *op++ = *ref++; 0285 *op++ = *ref++; 0286 if (len) 0287 for (; len; --len) 0288 *op++ = *ref++; 0289 } 0290 } 0291 0292 return op - (quint8*)output; 0293 } 0294 0295 0296 QByteArray compress(const QByteArray& input) 0297 { 0298 const void* const in_data = (const void*) input.constData(); 0299 unsigned int in_len = (unsigned int)input.size(); 0300 0301 QByteArray output; 0302 output.resize(in_len + 4 + 1); 0303 0304 // we use 4 bytes to store uncompressed length 0305 // and 1 extra byte as flag (0=uncompressed, 1=compressed) 0306 output[0] = in_len & 255; 0307 output[1] = (in_len >> 8) & 255; 0308 output[2] = (in_len >> 16) & 255; 0309 output[3] = (in_len >> 24) & 255; 0310 output[4] = 1; 0311 0312 unsigned int out_len = in_len - 1; 0313 unsigned char* out_data = (unsigned char*) output.data() + 5; 0314 0315 unsigned int len = compress(in_data, in_len, out_data, out_len); 0316 0317 if ((len > out_len) || (len == 0)) { 0318 // output buffer is too small, likely because the data can't 0319 // be compressed. so here just copy without compression 0320 output.replace(5, output.size()-5, input); 0321 0322 // flag must indicate "uncompressed block" 0323 output[4] = 0; 0324 } else { 0325 output.resize(len + 4 + 1); 0326 } 0327 0328 // minimize memory 0329 output.squeeze(); 0330 0331 return output; 0332 } 0333 0334 // will not squeeze output 0335 void decompress(const QByteArray& input, QByteArray& output) 0336 { 0337 Q_ASSERT(input.size() >= 5); 0338 0339 // read out first how big is the uncompressed size 0340 unsigned int unpack_size = 0; 0341 unpack_size |= ((quint8)input[0]); 0342 unpack_size |= ((quint8)input[1]) << 8; 0343 unpack_size |= ((quint8)input[2]) << 16; 0344 unpack_size |= ((quint8)input[3]) << 24; 0345 0346 // prepare the output 0347 output.resize(unpack_size); 0348 0349 // compression flag 0350 quint8 flag = (quint8)input[4]; 0351 0352 // prepare for lzf 0353 const void* const in_data = (const void*)(input.constData() + 5); 0354 unsigned int in_len = (unsigned int)input.size() - 5; 0355 unsigned char* out_data = (unsigned char*) output.data(); 0356 unsigned int out_len = (unsigned int)unpack_size; 0357 0358 if (flag == 0) { 0359 memcpy(output.data(), in_data, in_len); 0360 } else { 0361 unsigned int len = decompress(in_data, in_len, out_data, out_len); 0362 Q_ASSERT(len == out_len); 0363 Q_UNUSED(len) 0364 } 0365 } 0366 0367 }