File indexing completed on 2024-04-21 03:48:30

0001 /* gzread.c -- zlib functions for reading gzip files
0002  * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler
0003  * For conditions of distribution and use, see copyright notice in zlib.h
0004  */
0005 
0006 #include "gzguts.h"
0007 
0008 /* Local functions */
0009 local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
0010 local int gz_avail OF((gz_statep));
0011 local int gz_look OF((gz_statep));
0012 local int gz_decomp OF((gz_statep));
0013 local int gz_fetch OF((gz_statep));
0014 local int gz_skip OF((gz_statep, z_off64_t));
0015 
0016 /* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
0017    state->fd, and update state->eof, state->err, and state->msg as appropriate.
0018    This function needs to loop on read(), since read() is not guaranteed to
0019    read the number of bytes requested, depending on the type of descriptor. */
0020 local int gz_load(state, buf, len, have)
0021     gz_statep state;
0022     unsigned char *buf;
0023     unsigned len;
0024     unsigned *have;
0025 {
0026     int ret;
0027 
0028     *have = 0;
0029     do {
0030         ret = read(state->fd, buf + *have, len - *have);
0031         if (ret <= 0)
0032             break;
0033         *have += ret;
0034     } while (*have < len);
0035     if (ret < 0) {
0036         gz_error(state, Z_ERRNO, zstrerror());
0037         return -1;
0038     }
0039     if (ret == 0)
0040         state->eof = 1;
0041     return 0;
0042 }
0043 
0044 /* Load up input buffer and set eof flag if last data loaded -- return -1 on
0045    error, 0 otherwise.  Note that the eof flag is set when the end of the input
0046    file is reached, even though there may be unused data in the buffer.  Once
0047    that data has been used, no more attempts will be made to read the file.
0048    If strm->avail_in != 0, then the current data is moved to the beginning of
0049    the input buffer, and then the remainder of the buffer is loaded with the
0050    available data from the input file. */
0051 local int gz_avail(state)
0052     gz_statep state;
0053 {
0054     unsigned got;
0055     z_streamp strm = &(state->strm);
0056 
0057     if (state->err != Z_OK && state->err != Z_BUF_ERROR)
0058         return -1;
0059     if (state->eof == 0) {
0060         if (strm->avail_in) {       /* copy what's there to the start */
0061             unsigned char *p = state->in;
0062             unsigned const char *q = strm->next_in;
0063             unsigned n = strm->avail_in;
0064             do {
0065                 *p++ = *q++;
0066             } while (--n);
0067         }
0068         if (gz_load(state, state->in + strm->avail_in,
0069                     state->size - strm->avail_in, &got) == -1)
0070             return -1;
0071         strm->avail_in += got;
0072         strm->next_in = state->in;
0073     }
0074     return 0;
0075 }
0076 
0077 /* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
0078    If this is the first time in, allocate required memory.  state->how will be
0079    left unchanged if there is no more input data available, will be set to COPY
0080    if there is no gzip header and direct copying will be performed, or it will
0081    be set to GZIP for decompression.  If direct copying, then leftover input
0082    data from the input buffer will be copied to the output buffer.  In that
0083    case, all further file reads will be directly to either the output buffer or
0084    a user buffer.  If decompressing, the inflate state will be initialized.
0085    gz_look() will return 0 on success or -1 on failure. */
0086 local int gz_look(state)
0087     gz_statep state;
0088 {
0089     z_streamp strm = &(state->strm);
0090 
0091     /* allocate read buffers and inflate memory */
0092     if (state->size == 0) {
0093         /* allocate buffers */
0094         state->in = (unsigned char *)malloc(state->want);
0095         state->out = (unsigned char *)malloc(state->want << 1);
0096         if (state->in == NULL || state->out == NULL) {
0097             if (state->out != NULL)
0098                 free(state->out);
0099             if (state->in != NULL)
0100                 free(state->in);
0101             gz_error(state, Z_MEM_ERROR, "out of memory");
0102             return -1;
0103         }
0104         state->size = state->want;
0105 
0106         /* allocate inflate memory */
0107         state->strm.zalloc = Z_NULL;
0108         state->strm.zfree = Z_NULL;
0109         state->strm.opaque = Z_NULL;
0110         state->strm.avail_in = 0;
0111         state->strm.next_in = Z_NULL;
0112         if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
0113             free(state->out);
0114             free(state->in);
0115             state->size = 0;
0116             gz_error(state, Z_MEM_ERROR, "out of memory");
0117             return -1;
0118         }
0119     }
0120 
0121     /* get at least the magic bytes in the input buffer */
0122     if (strm->avail_in < 2) {
0123         if (gz_avail(state) == -1)
0124             return -1;
0125         if (strm->avail_in == 0)
0126             return 0;
0127     }
0128 
0129     /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
0130        a logical dilemma here when considering the case of a partially written
0131        gzip file, to wit, if a single 31 byte is written, then we cannot tell
0132        whether this is a single-byte file, or just a partially written gzip
0133        file -- for here we assume that if a gzip file is being written, then
0134        the header will be written in a single operation, so that reading a
0135        single byte is sufficient indication that it is not a gzip file) */
0136     if (strm->avail_in > 1 &&
0137             strm->next_in[0] == 31 && strm->next_in[1] == 139) {
0138         inflateReset(strm);
0139         state->how = GZIP;
0140         state->direct = 0;
0141         return 0;
0142     }
0143 
0144     /* no gzip header -- if we were decoding gzip before, then this is trailing
0145        garbage.  Ignore the trailing garbage and finish. */
0146     if (state->direct == 0) {
0147         strm->avail_in = 0;
0148         state->eof = 1;
0149         state->x.have = 0;
0150         return 0;
0151     }
0152 
0153     /* doing raw i/o, copy any leftover input to output -- this assumes that
0154        the output buffer is larger than the input buffer, which also assures
0155        space for gzungetc() */
0156     state->x.next = state->out;
0157     if (strm->avail_in) {
0158         memcpy(state->x.next, strm->next_in, strm->avail_in);
0159         state->x.have = strm->avail_in;
0160         strm->avail_in = 0;
0161     }
0162     state->how = COPY;
0163     state->direct = 1;
0164     return 0;
0165 }
0166 
0167 /* Decompress from input to the provided next_out and avail_out in the state.
0168    On return, state->x.have and state->x.next point to the just decompressed
0169    data.  If the gzip stream completes, state->how is reset to LOOK to look for
0170    the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
0171    on success, -1 on failure. */
0172 local int gz_decomp(state)
0173     gz_statep state;
0174 {
0175     int ret = Z_OK;
0176     unsigned had;
0177     z_streamp strm = &(state->strm);
0178 
0179     /* fill output buffer up to end of deflate stream */
0180     had = strm->avail_out;
0181     do {
0182         /* get more input for inflate() */
0183         if (strm->avail_in == 0 && gz_avail(state) == -1)
0184             return -1;
0185         if (strm->avail_in == 0) {
0186             gz_error(state, Z_BUF_ERROR, "unexpected end of file");
0187             break;
0188         }
0189 
0190         /* decompress and handle errors */
0191         ret = inflate(strm, Z_NO_FLUSH);
0192         if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
0193             gz_error(state, Z_STREAM_ERROR,
0194                      "internal error: inflate stream corrupt");
0195             return -1;
0196         }
0197         if (ret == Z_MEM_ERROR) {
0198             gz_error(state, Z_MEM_ERROR, "out of memory");
0199             return -1;
0200         }
0201         if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
0202             gz_error(state, Z_DATA_ERROR,
0203                      strm->msg == NULL ? "compressed data error" : strm->msg);
0204             return -1;
0205         }
0206     } while (strm->avail_out && ret != Z_STREAM_END);
0207 
0208     /* update available output */
0209     state->x.have = had - strm->avail_out;
0210     state->x.next = strm->next_out - state->x.have;
0211 
0212     /* if the gzip stream completed successfully, look for another */
0213     if (ret == Z_STREAM_END)
0214         state->how = LOOK;
0215 
0216     /* good decompression */
0217     return 0;
0218 }
0219 
0220 /* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
0221    Data is either copied from the input file or decompressed from the input
0222    file depending on state->how.  If state->how is LOOK, then a gzip header is
0223    looked for to determine whether to copy or decompress.  Returns -1 on error,
0224    otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
0225    end of the input file has been reached and all data has been processed.  */
0226 local int gz_fetch(state)
0227     gz_statep state;
0228 {
0229     z_streamp strm = &(state->strm);
0230 
0231     do {
0232         switch(state->how) {
0233         case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
0234             if (gz_look(state) == -1)
0235                 return -1;
0236             if (state->how == LOOK)
0237                 return 0;
0238             break;
0239         case COPY:      /* -> COPY */
0240             if (gz_load(state, state->out, state->size << 1, &(state->x.have))
0241                     == -1)
0242                 return -1;
0243             state->x.next = state->out;
0244             return 0;
0245         case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
0246             strm->avail_out = state->size << 1;
0247             strm->next_out = state->out;
0248             if (gz_decomp(state) == -1)
0249                 return -1;
0250         }
0251     } while (state->x.have == 0 && (!state->eof || strm->avail_in));
0252     return 0;
0253 }
0254 
0255 /* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
0256 local int gz_skip(state, len)
0257     gz_statep state;
0258     z_off64_t len;
0259 {
0260     unsigned n;
0261 
0262     /* skip over len bytes or reach end-of-file, whichever comes first */
0263     while (len)
0264         /* skip over whatever is in output buffer */
0265         if (state->x.have) {
0266             n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
0267                 (unsigned)len : state->x.have;
0268             state->x.have -= n;
0269             state->x.next += n;
0270             state->x.pos += n;
0271             len -= n;
0272         }
0273 
0274         /* output buffer empty -- return if we're at the end of the input */
0275         else if (state->eof && state->strm.avail_in == 0)
0276             break;
0277 
0278         /* need more data to skip -- load up output buffer */
0279         else {
0280             /* get more output, looking for header if required */
0281             if (gz_fetch(state) == -1)
0282                 return -1;
0283         }
0284     return 0;
0285 }
0286 
0287 /* -- see zlib.h -- */
0288 int ZEXPORT gzread(file, buf, len)
0289     gzFile file;
0290     voidp buf;
0291     unsigned len;
0292 {
0293     unsigned got, n;
0294     gz_statep state;
0295     z_streamp strm;
0296 
0297     /* get internal structure */
0298     if (file == NULL)
0299         return -1;
0300     state = (gz_statep)file;
0301     strm = &(state->strm);
0302 
0303     /* check that we're reading and that there's no (serious) error */
0304     if (state->mode != GZ_READ ||
0305             (state->err != Z_OK && state->err != Z_BUF_ERROR))
0306         return -1;
0307 
0308     /* since an int is returned, make sure len fits in one, otherwise return
0309        with an error (this avoids the flaw in the interface) */
0310     if ((int)len < 0) {
0311         gz_error(state, Z_DATA_ERROR, "requested length does not fit in int");
0312         return -1;
0313     }
0314 
0315     /* if len is zero, avoid unnecessary operations */
0316     if (len == 0)
0317         return 0;
0318 
0319     /* process a skip request */
0320     if (state->seek) {
0321         state->seek = 0;
0322         if (gz_skip(state, state->skip) == -1)
0323             return -1;
0324     }
0325 
0326     /* get len bytes to buf, or less than len if at the end */
0327     got = 0;
0328     do {
0329         /* first just try copying data from the output buffer */
0330         if (state->x.have) {
0331             n = state->x.have > len ? len : state->x.have;
0332             memcpy(buf, state->x.next, n);
0333             state->x.next += n;
0334             state->x.have -= n;
0335         }
0336 
0337         /* output buffer empty -- return if we're at the end of the input */
0338         else if (state->eof && strm->avail_in == 0) {
0339             state->past = 1;        /* tried to read past end */
0340             break;
0341         }
0342 
0343         /* need output data -- for small len or new stream load up our output
0344            buffer */
0345         else if (state->how == LOOK || len < (state->size << 1)) {
0346             /* get more output, looking for header if required */
0347             if (gz_fetch(state) == -1)
0348                 return -1;
0349             continue;       /* no progress yet -- go back to copy above */
0350             /* the copy above assures that we will leave with space in the
0351                output buffer, allowing at least one gzungetc() to succeed */
0352         }
0353 
0354         /* large len -- read directly into user buffer */
0355         else if (state->how == COPY) {      /* read directly */
0356             if (gz_load(state, (unsigned char *)buf, len, &n) == -1)
0357                 return -1;
0358         }
0359 
0360         /* large len -- decompress directly into user buffer */
0361         else {  /* state->how == GZIP */
0362             strm->avail_out = len;
0363             strm->next_out = (unsigned char *)buf;
0364             if (gz_decomp(state) == -1)
0365                 return -1;
0366             n = state->x.have;
0367             state->x.have = 0;
0368         }
0369 
0370         /* update progress */
0371         len -= n;
0372         buf = (char *)buf + n;
0373         got += n;
0374         state->x.pos += n;
0375     } while (len);
0376 
0377     /* return number of bytes read into user buffer (will fit in int) */
0378     return (int)got;
0379 }
0380 
0381 /* -- see zlib.h -- */
0382 #ifdef Z_PREFIX_SET
0383 #  undef z_gzgetc
0384 #else
0385 #  undef gzgetc
0386 #endif
0387 int ZEXPORT gzgetc(file)
0388     gzFile file;
0389 {
0390     int ret;
0391     unsigned char buf[1];
0392     gz_statep state;
0393 
0394     /* get internal structure */
0395     if (file == NULL)
0396         return -1;
0397     state = (gz_statep)file;
0398 
0399     /* check that we're reading and that there's no (serious) error */
0400     if (state->mode != GZ_READ ||
0401         (state->err != Z_OK && state->err != Z_BUF_ERROR))
0402         return -1;
0403 
0404     /* try output buffer (no need to check for skip request) */
0405     if (state->x.have) {
0406         state->x.have--;
0407         state->x.pos++;
0408         return *(state->x.next)++;
0409     }
0410 
0411     /* nothing there -- try gzread() */
0412     ret = gzread(file, buf, 1);
0413     return ret < 1 ? -1 : buf[0];
0414 }
0415 
0416 int ZEXPORT gzgetc_(file)
0417 gzFile file;
0418 {
0419     return gzgetc(file);
0420 }
0421 
0422 /* -- see zlib.h -- */
0423 int ZEXPORT gzungetc(c, file)
0424     int c;
0425     gzFile file;
0426 {
0427     gz_statep state;
0428 
0429     /* get internal structure */
0430     if (file == NULL)
0431         return -1;
0432     state = (gz_statep)file;
0433 
0434     /* check that we're reading and that there's no (serious) error */
0435     if (state->mode != GZ_READ ||
0436         (state->err != Z_OK && state->err != Z_BUF_ERROR))
0437         return -1;
0438 
0439     /* process a skip request */
0440     if (state->seek) {
0441         state->seek = 0;
0442         if (gz_skip(state, state->skip) == -1)
0443             return -1;
0444     }
0445 
0446     /* can't push EOF */
0447     if (c < 0)
0448         return -1;
0449 
0450     /* if output buffer empty, put byte at end (allows more pushing) */
0451     if (state->x.have == 0) {
0452         state->x.have = 1;
0453         state->x.next = state->out + (state->size << 1) - 1;
0454         state->x.next[0] = c;
0455         state->x.pos--;
0456         state->past = 0;
0457         return c;
0458     }
0459 
0460     /* if no room, give up (must have already done a gzungetc()) */
0461     if (state->x.have == (state->size << 1)) {
0462         gz_error(state, Z_DATA_ERROR, "out of room to push characters");
0463         return -1;
0464     }
0465 
0466     /* slide output data if needed and insert byte before existing data */
0467     if (state->x.next == state->out) {
0468         unsigned char *src = state->out + state->x.have;
0469         unsigned char *dest = state->out + (state->size << 1);
0470         while (src > state->out)
0471             *--dest = *--src;
0472         state->x.next = dest;
0473     }
0474     state->x.have++;
0475     state->x.next--;
0476     state->x.next[0] = c;
0477     state->x.pos--;
0478     state->past = 0;
0479     return c;
0480 }
0481 
0482 /* -- see zlib.h -- */
0483 char * ZEXPORT gzgets(file, buf, len)
0484     gzFile file;
0485     char *buf;
0486     int len;
0487 {
0488     unsigned left, n;
0489     char *str;
0490     unsigned char *eol;
0491     gz_statep state;
0492 
0493     /* check parameters and get internal structure */
0494     if (file == NULL || buf == NULL || len < 1)
0495         return NULL;
0496     state = (gz_statep)file;
0497 
0498     /* check that we're reading and that there's no (serious) error */
0499     if (state->mode != GZ_READ ||
0500         (state->err != Z_OK && state->err != Z_BUF_ERROR))
0501         return NULL;
0502 
0503     /* process a skip request */
0504     if (state->seek) {
0505         state->seek = 0;
0506         if (gz_skip(state, state->skip) == -1)
0507             return NULL;
0508     }
0509 
0510     /* copy output bytes up to new line or len - 1, whichever comes first --
0511        append a terminating zero to the string (we don't check for a zero in
0512        the contents, let the user worry about that) */
0513     str = buf;
0514     left = (unsigned)len - 1;
0515     if (left) do {
0516         /* assure that something is in the output buffer */
0517         if (state->x.have == 0 && gz_fetch(state) == -1)
0518             return NULL;                /* error */
0519         if (state->x.have == 0) {       /* end of file */
0520             state->past = 1;            /* read past end */
0521             break;                      /* return what we have */
0522         }
0523 
0524         /* look for end-of-line in current output buffer */
0525         n = state->x.have > left ? left : state->x.have;
0526         eol = (unsigned char *)memchr(state->x.next, '\n', n);
0527         if (eol != NULL)
0528             n = (unsigned)(eol - state->x.next) + 1;
0529 
0530         /* copy through end-of-line, or remainder if not found */
0531         memcpy(buf, state->x.next, n);
0532         state->x.have -= n;
0533         state->x.next += n;
0534         state->x.pos += n;
0535         left -= n;
0536         buf += n;
0537     } while (left && eol == NULL);
0538 
0539     /* return terminated string, or if nothing, end of file */
0540     if (buf == str)
0541         return NULL;
0542     buf[0] = 0;
0543     return str;
0544 }
0545 
0546 /* -- see zlib.h -- */
0547 int ZEXPORT gzdirect(file)
0548     gzFile file;
0549 {
0550     gz_statep state;
0551 
0552     /* get internal structure */
0553     if (file == NULL)
0554         return 0;
0555     state = (gz_statep)file;
0556 
0557     /* if the state is not known, but we can find out, then do so (this is
0558        mainly for right after a gzopen() or gzdopen()) */
0559     if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
0560         (void)gz_look(state);
0561 
0562     /* return 1 if transparent, 0 if processing a gzip stream */
0563     return state->direct;
0564 }
0565 
0566 /* -- see zlib.h -- */
0567 int ZEXPORT gzclose_r(file)
0568     gzFile file;
0569 {
0570     int ret, err;
0571     gz_statep state;
0572 
0573     /* get internal structure */
0574     if (file == NULL)
0575         return Z_STREAM_ERROR;
0576     state = (gz_statep)file;
0577 
0578     /* check that we're reading */
0579     if (state->mode != GZ_READ)
0580         return Z_STREAM_ERROR;
0581 
0582     /* free memory and close file */
0583     if (state->size) {
0584         inflateEnd(&(state->strm));
0585         free(state->out);
0586         free(state->in);
0587     }
0588     err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
0589     gz_error(state, Z_OK, NULL);
0590     free(state->path);
0591     ret = close(state->fd);
0592     free(state);
0593     return ret ? Z_ERRNO : err;
0594 }