File indexing completed on 2024-12-22 04:04:12
0001 /* Copyright (C) 2001-2019 Peter Selinger. 0002 This file is part of Potrace. It is free software and it is covered 0003 by the GNU General Public License. See the file COPYING for details. */ 0004 0005 0006 /* code for adaptive LZW compression, as used in PostScript. */ 0007 0008 #ifdef HAVE_CONFIG_H 0009 #include <config.h> 0010 #endif 0011 0012 #include <stdlib.h> 0013 #include <stdio.h> 0014 #include <errno.h> 0015 #include <string.h> 0016 0017 #include "lists.h" 0018 #include "bitops.h" 0019 #include "lzw.h" 0020 0021 /* ---------------------------------------------------------------------- */ 0022 /* compression state specification */ 0023 0024 /* The compression algorithm follows the following specification, 0025 expressed as a state machine. A state is a triple {s,d,n}, where s 0026 is a string of input symbols, d is a dictionary, which is a 0027 function from strings to output symbols, and n is the dictionary 0028 size, or equivalently, the next unused output symbol. There are 0029 also special states init and stop. emit[b, code] is a function 0030 which emits the code 'code' as a b-bit value into the output 0031 bitstream. hibit(n) returns the least number of binary digits 0032 required to represent n. 0033 0034 init ---> {[], newdict, 258} 0035 0036 where [] is the empty string, and newdict maps each of the 256 0037 singleton strings to itself. (Note that there are two special 0038 output symbols 256 and 257, so that the next available one is 0039 258). Note: hibit(258)=9. 0040 0041 {[], d, n} (input c) ---> (emit[hibit(n), 256]) {c, d, n} 0042 0043 {s,d,n} (input c) ---> {s*c,d,n} 0044 0045 if s!=[], s*c is in the domain of d. Here s*c is the strings s 0046 extended by the character c. 0047 0048 {s,d,n} (input c) ---> (emit[hibit(n), d(s)]) {c,d',n+1} 0049 0050 if s!=[], s*c is not in the domain of d, and hibit(n+2) <= 12. 0051 Here d'=d+{s*c->n}. 0052 0053 {s,d,n} (input c) ---> 0054 (emit[hibit(n), d(s)]) (emit[hibit(n+1), 256]) {c, newdict, 258} 0055 0056 if s!=[], s*c is not in the domain of d, and hibit(n+2) > 12. 0057 0058 {s,d,n} (input EOD) ---> (emit[hibit(n), d(s)]) (emit[hibit(n+1), 257]) stop 0059 0060 where s != []. Here, EOD stands for end-of-data. 0061 0062 {[],d,n} (input EOD) ---> (emit[hibit(n), 256]) (emit[hibit(n), 257]) stop 0063 0064 Notes: 0065 0066 * Each reachable state {s,d,n} satisfies hibit(n+1) <= 12. 0067 * Only codes of 12 or fewer bits are emitted. 0068 * Each reachable state {s,d,n} satisfies s=[] or s is in the domain of d. 0069 * The domain of d is always prefix closed (except for the empty prefix) 0070 * The state machine is deterministic and non-blocking. 0071 0072 */ 0073 0074 0075 /* ---------------------------------------------------------------------- */ 0076 /* private state */ 0077 0078 #define BITBUF_TYPE unsigned int 0079 0080 /* the dictionary is implemented as a tree of strings under the prefix 0081 order. The tree is in turns represented as a linked list of 0082 lzw_dict_t structures, with "children" pointing to a node's first 0083 child, and "next" pointing to a node's next sibling. As an 0084 optimization, the top-level nodes (singleton strings) are 0085 implemented lazily, i.e., the corresponding entry is not actually 0086 created until it is accessed. */ 0087 0088 struct lzw_dict_s { 0089 char c; /* last character of string represented by this entry */ 0090 unsigned int code; /* code for the string represented by this entry */ 0091 int freq; /* how often searched? For optimization only */ 0092 struct lzw_dict_s *children; /* list of sub-entries */ 0093 struct lzw_dict_s *next; /* for making a linked list */ 0094 }; 0095 typedef struct lzw_dict_s lzw_dict_t; 0096 0097 /* A state {s,d,n} is represented by the "dictionary state" part of 0098 the lzw_state_t structure. Here, s is a pointer directly to the node 0099 of the dictionary structure corresponding to the string s, or NULL 0100 if s=[]. Further, the lzw_state_t structure contains a buffer of 0101 pending output bits, and a flag indicating whether the EOD (end of 0102 data) has been reached in the input. */ 0103 0104 struct lzw_state_s { 0105 /* dictionary state */ 0106 int n; /* current size of the dictionary */ 0107 lzw_dict_t *d; /* pointer to dictionary */ 0108 lzw_dict_t *s; /* pointer to current string, or NULL at beginning */ 0109 0110 /* buffers for pending output */ 0111 BITBUF_TYPE buf; /* bits scheduled for output - left aligned, 0 padded */ 0112 int bufsize; /* number of bits scheduled for output. */ 0113 int eod; /* flush buffer? */ 0114 }; 0115 typedef struct lzw_state_s lzw_state_t; 0116 0117 /* ---------------------------------------------------------------------- */ 0118 /* auxiliary functions which operate on dictionary states */ 0119 0120 /* recursively free an lzw_dict_t object */ 0121 static void lzw_free_dict(lzw_dict_t *s) { 0122 lzw_dict_t *e; 0123 0124 list_forall_unlink(e, s) { 0125 lzw_free_dict(e->children); 0126 free(e); 0127 } 0128 } 0129 0130 /* re-initialize the lzw state's dictionary state to "newdict", 0131 freeing any old dictionary. */ 0132 static void lzw_clear_table(lzw_state_t *st) { 0133 0134 lzw_free_dict(st->d); 0135 st->d = NULL; 0136 st->n = 258; 0137 st->s = NULL; 0138 } 0139 0140 /* ---------------------------------------------------------------------- */ 0141 /* auxiliary functions for reading/writing the bit buffer */ 0142 0143 /* write the code to the bit buffer. Precondition st->bufsize <= 7. 0144 Note: this does not change the dictionary state; in particular, 0145 n must be updated between consecutive calls. */ 0146 static inline void lzw_emit(unsigned int code, lzw_state_t *st) { 0147 BITBUF_TYPE mask; 0148 int bits = hibit(st->n); 0149 0150 /* fill bit buffer */ 0151 mask = (1 << bits) - 1; 0152 code &= mask; 0153 0154 st->buf |= code << (8*sizeof(BITBUF_TYPE) - st->bufsize - bits); 0155 st->bufsize += bits; 0156 } 0157 0158 /* transfer one byte from bit buffer to output. Precondition: 0159 s->avail_out > 0. */ 0160 static inline void lzw_read_bitbuf(lzw_stream_t *s) { 0161 int ch; 0162 lzw_state_t *st = (lzw_state_t *)s->internal; 0163 0164 ch = st->buf >> (8*sizeof(BITBUF_TYPE)-8); 0165 st->buf <<= 8; 0166 st->bufsize -= 8; 0167 0168 s->next_out[0] = ch; 0169 s->next_out++; 0170 s->avail_out--; 0171 } 0172 0173 /* ---------------------------------------------------------------------- */ 0174 /* The following functions implement the state machine. */ 0175 0176 /* perform state transition of the state st on input character 0177 ch. This updates the dictionary state and/or writes to the bit 0178 buffer. Precondition: st->bufsize <= 7. Return 0 on success, or 1 0179 on error with errno set. */ 0180 static int lzw_encode_char(lzw_state_t *st, char c) { 0181 lzw_dict_t *e; 0182 0183 /* st = {s,d,n}. hibit(n+1)<=12. */ 0184 0185 /* {[], d, n} (input c) ---> (emit[hibit(n), 256]) {c, d, n} */ 0186 if (st->s == NULL) { 0187 lzw_emit(256, st); 0188 goto singleton; /* enter singleton state c */ 0189 } 0190 0191 /* {s,d,n} (input c) ---> {s*c,d,n} */ 0192 list_find(e, st->s->children, e->c == c); 0193 if (e) { 0194 e->freq++; 0195 st->s = e; 0196 return 0; 0197 } 0198 0199 /* {s,d,n} (input c) ---> (emit[hibit(n), d(s)]) {c,d',n+1} */ 0200 /* {s,d,n} (input c) ---> 0201 (emit[hibit(n), d(s)]) (emit[hibit(n+1), 256]) {c, newdict, 258} */ 0202 0203 lzw_emit(st->s->code, st); /* 9-12 bits */ 0204 if (st->n >= 4094) { /* hibit(n+2) > 12 */ 0205 st->n++; 0206 lzw_emit(256, st); 0207 goto dictfull; /* reset dictionary and enter singleton state c */ 0208 } 0209 0210 /* insert new code in dictionary, if possible */ 0211 e = (lzw_dict_t *)malloc(sizeof(lzw_dict_t)); 0212 if (!e) { 0213 return 1; 0214 } 0215 e->c = c; 0216 e->code = st->n; 0217 e->freq = 1; 0218 e->children = NULL; 0219 list_prepend(st->s->children, e); 0220 st->n++; 0221 goto singleton; /* enter singleton state c */ 0222 0223 dictfull: /* reset dictionary and enter singleton state c */ 0224 lzw_clear_table(st); 0225 /* fall through */ 0226 0227 singleton: /* enter singleton state c */ 0228 list_find(e, st->d, e->c == c); 0229 if (!e) { /* not found: lazily add it */ 0230 e = (lzw_dict_t *)malloc(sizeof(lzw_dict_t)); 0231 if (!e) { 0232 return 1; 0233 } 0234 e->c = c; 0235 e->code = (unsigned char)c; 0236 e->freq = 0; 0237 e->children = NULL; 0238 list_prepend(st->d, e); 0239 } 0240 e->freq++; 0241 st->s = e; 0242 return 0; 0243 } 0244 0245 /* perform state transition of the state st on input EOD. The leaves 0246 the dictionary state undefined and writes to the bit buffer. 0247 Precondition: st->bufsize <= 7. This function must be called 0248 exactly once, at the end of the stream. */ 0249 static void lzw_encode_eod(lzw_state_t *st) { 0250 0251 /* {[],d,n} (input EOD) ---> 0252 (emit[hibit(n), 256]) (emit[hibit(n), 257]) stop */ 0253 if (st->s == NULL) { 0254 lzw_emit(256, st); /* 9 bits */ 0255 st->n=258; 0256 lzw_emit(257, st); /* 9 bits */ 0257 return; 0258 } 0259 0260 /* {s,d,n} (input EOD) ---> 0261 (emit[hibit(n), code]) (emit[hibit(n+1), 257]) stop */ 0262 0263 lzw_emit(st->s->code, st); /* 9-12 bits */ 0264 st->n++; 0265 lzw_emit(257, st); /* 9-12 bits */ 0266 return; 0267 } 0268 0269 /* ---------------------------------------------------------------------- */ 0270 /* User visible functions. These implement a buffer interface. See 0271 lzw.h for the API description. */ 0272 0273 lzw_stream_t *lzw_init(void) { 0274 lzw_stream_t *s = NULL; 0275 lzw_state_t *st = NULL; 0276 0277 s = (lzw_stream_t *)malloc(sizeof(lzw_stream_t)); 0278 if (s==NULL) { 0279 goto fail; 0280 } 0281 st = (lzw_state_t *)malloc(sizeof(lzw_state_t)); 0282 if (st==NULL) { 0283 goto fail; 0284 } 0285 st->buf = 0; 0286 st->bufsize = 0; 0287 st->eod = 0; 0288 st->d = NULL; 0289 lzw_clear_table(st); 0290 s->internal = (void *) st; 0291 return s; 0292 0293 fail: 0294 free(s); 0295 free(st); 0296 return NULL; 0297 } 0298 0299 int lzw_compress(lzw_stream_t *s, int mode) { 0300 int r; 0301 lzw_state_t *st = (lzw_state_t *)s->internal; 0302 0303 while (st->eod == 0) { 0304 /* empty bit buffer */ 0305 while (st->bufsize > 7) { 0306 if (s->avail_out == 0) { 0307 return 0; 0308 } else { 0309 lzw_read_bitbuf(s); 0310 } 0311 } 0312 /* fill bit buffer */ 0313 if (s->avail_in == 0) { 0314 break; 0315 } else { 0316 r = lzw_encode_char(st, s->next_in[0]); 0317 if (r) { 0318 if (r==2) { 0319 errno = EINVAL; 0320 } 0321 return 1; 0322 } 0323 s->next_in++; 0324 s->avail_in--; 0325 } 0326 } 0327 0328 if (mode==LZW_EOD && st->eod == 0) { 0329 st->eod = 1; 0330 lzw_encode_eod(st); 0331 } 0332 0333 /* flush bit buffer */ 0334 if (st->eod) { 0335 while (st->bufsize > 0) { 0336 if (s->avail_out == 0) { 0337 return 0; 0338 } else { 0339 lzw_read_bitbuf(s); 0340 } 0341 } 0342 } 0343 0344 return 0; 0345 } 0346 0347 void lzw_free(lzw_stream_t *s) { 0348 lzw_state_t *st = (lzw_state_t *)s->internal; 0349 0350 lzw_free_dict(st->d); 0351 free(st); 0352 free(s); 0353 } 0354 0355 /* ---------------------------------------------------------------------- */ 0356 /* main function for testing and illustration purposes */ 0357 0358 #ifdef LZW_MAIN 0359 0360 int main() { 0361 lzw_stream_t *s; 0362 int ch; 0363 char inbuf[100]; 0364 char outbuf[100]; 0365 int i, r; 0366 int mode; 0367 0368 s = lzw_init(); 0369 if (!s) { 0370 goto error; 0371 } 0372 mode = LZW_NORMAL; 0373 0374 while (1) { 0375 /* fill inbuf */ 0376 for (i=0; i<100; i++) { 0377 ch = fgetc(stdin); 0378 if (ch==EOF) { 0379 break; 0380 } 0381 inbuf[i] = ch; 0382 } 0383 if (i<100) { /* end of input */ 0384 mode = LZW_EOD; 0385 } 0386 0387 /* compress */ 0388 s->next_in = inbuf; 0389 s->avail_in = i; 0390 do { 0391 s->next_out = outbuf; 0392 s->avail_out = 100; 0393 r = lzw_compress(s, mode); 0394 if (r) { 0395 goto error; 0396 } 0397 fwrite(outbuf, 1, 100-s->avail_out, stdout); 0398 } while (s->avail_out==0); 0399 if (mode == LZW_EOD) { 0400 break; 0401 } 0402 } 0403 fflush(stdout); 0404 lzw_free(s); 0405 0406 return 0; 0407 0408 error: 0409 fprintf(stderr, "lzw: %s\n", strerror(errno)); 0410 lzw_free(s); 0411 return 1; 0412 0413 } 0414 #endif 0415