6 * Copyright (c) 1998 by Gilbert Ramirez <gram@alumni.rice.edu>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 2
11 * of the License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 /* file_access interface based heavily on zlib gzread.c and gzlib.c from zlib
24 * Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
27 * This software is provided 'as-is', without any express or implied
28 * warranty. In no event will the authors be held liable for any damages
29 * arising from the use of this software.
31 * Permission is granted to anyone to use this software for any purpose,
32 * including commercial applications, and to alter it and redistribute it
33 * freely, subject to the following restrictions:
35 * 1. The origin of this software must not be misrepresented; you must not
36 * claim that you wrote the original software. If you use this software
37 * in a product, an acknowledgment in the product documentation would be
38 * appreciated but is not required.
39 * 2. Altered source versions must be plainly marked as such, and must not be
40 * misrepresented as being the original software.
41 * 3. This notice may not be removed or altered from any source distribution.
50 #endif /* HAVE_UNISTD_H */
56 #endif /* HAVE_FCNTL_H */
59 #include "file_wrappers.h"
60 #include <wsutil/file_util.h>
63 * See RFC 1952 for a description of the gzip file format.
65 * Some other compressed file formats we might want to support:
67 * XZ format: http://tukaani.org/xz/
69 * Bzip2 format: http://bzip.org/
72 /* #define GZBUFSIZE 8192 */
73 #define GZBUFSIZE 4096
76 int fd; /* file descriptor */
77 gint64 pos; /* current position in uncompressed data */
78 unsigned size; /* buffer size */
79 unsigned char *in; /* input buffer */
80 unsigned char *out; /* output buffer (double-sized when reading) */
81 unsigned char *next; /* next output data to deliver or write */
83 unsigned have; /* amount of output data unused at next */
84 int eof; /* true if end of input file reached */
85 gint64 start; /* where the gzip data started, for rewinding */
86 gint64 raw; /* where the raw data started, for seeking */
87 int compression; /* 0: ?, 1: uncompressed, 2: zlib */
89 gint64 skip; /* amount to skip (already rewound if backwards) */
90 int seek; /* true if seek request pending */
91 /* error information */
92 int err; /* error code */
94 unsigned int avail_in; /* number of bytes available at next_in */
95 unsigned char *next_in; /* next input byte */
97 /* zlib inflate stream */
98 z_stream strm; /* stream structure in-place (not a pointer) */
102 /* values for gz_state compression */
103 #define UNKNOWN 0 /* look for a gzip header */
104 #define UNCOMPRESSED 1 /* copy input directly */
106 #define ZLIB 2 /* decompress a zlib stream */
109 static int /* gz_load */
110 raw_read(FILE_T state, unsigned char *buf, unsigned int count, unsigned *have)
116 ret = read(state->fd, buf + *have, count - *have);
120 } while (*have < count);
130 static int /* gz_avail */
131 fill_in_buffer(FILE_T state)
135 if (state->eof == 0) {
136 if (raw_read(state, state->in, state->size, (unsigned *)&(state->avail_in)) == -1)
138 state->next_in = state->in;
145 /* Get next byte from input, or -1 if end or error. */
146 #define NEXT() ((state->avail_in == 0 && fill_in_buffer(state) == -1) ? -1 : \
147 (state->avail_in == 0 ? -1 : \
148 (state->avail_in--, *(state->next_in)++)))
150 /* Get a four-byte little-endian integer and return 0 on success and the value
151 in *ret. Otherwise -1 is returned and *ret is not modified. */
153 gz_next4(FILE_T state, guint32 *ret)
159 val += (unsigned)NEXT() << 8;
160 val += (guint32)NEXT() << 16;
164 val += (guint32)ch << 24;
169 static int /* gz_decomp */
170 zlib_read(FILE_T state, unsigned char *buf, unsigned int count)
174 z_streamp strm = &(state->strm);
176 strm->avail_out = count;
177 strm->next_out = buf;
179 /* fill output buffer up to end of deflate stream */
181 /* get more input for inflate() */
182 if (state->avail_in == 0 && fill_in_buffer(state) == -1)
184 if (state->avail_in == 0) {
185 state->err = WTAP_ERR_ZLIB + Z_DATA_ERROR;
189 strm->avail_in = state->avail_in;
190 strm->next_in = state->next_in;
191 /* decompress and handle errors */
192 ret = inflate(strm, Z_NO_FLUSH);
193 state->avail_in = strm->avail_in;
194 state->next_in = strm->next_in;
195 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
196 state->err = WTAP_ERR_ZLIB + Z_STREAM_ERROR;
199 if (ret == Z_MEM_ERROR) {
200 state->err = WTAP_ERR_ZLIB + Z_MEM_ERROR; /* ENOMEM? */
203 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
204 state->err = WTAP_ERR_ZLIB + Z_DATA_ERROR;
207 } while (strm->avail_out && ret != Z_STREAM_END);
209 /* update available output and crc check value */
211 state->have = count - strm->avail_out;
212 strm->adler = crc32(strm->adler, state->next, state->have);
214 /* check gzip trailer if at end of deflate stream */
215 if (ret == Z_STREAM_END) {
216 if (gz_next4(state, &crc) == -1 || gz_next4(state, &len) == -1) {
217 state->err = WTAP_ERR_ZLIB + Z_DATA_ERROR;
220 if (crc != strm->adler) {
221 state->err = WTAP_ERR_ZLIB + Z_DATA_ERROR;
224 if (len != (strm->total_out & 0xffffffffL)) {
225 state->err = WTAP_ERR_ZLIB + Z_DATA_ERROR;
228 state->compression = UNKNOWN; /* ready for next stream, once have is 0 */
231 /* good decompression */
237 gz_head(FILE_T state)
239 /* get some data in the input buffer */
240 if (state->avail_in == 0) {
241 if (fill_in_buffer(state) == -1)
243 if (state->avail_in == 0)
247 /* look for the gzip magic header bytes 31 and 139 */
249 if (state->next_in[0] == 31) {
252 if (state->avail_in == 0 && fill_in_buffer(state) == -1)
254 if (state->avail_in && state->next_in[0] == 139) {
258 /* we have a gzip header, woo hoo! */
262 /* skip rest of header */
263 if (NEXT() != 8) { /* compression method */
264 state->err = WTAP_ERR_ZLIB + Z_DATA_ERROR;
268 if (flags & 0xe0) { /* reserved flag bits */
269 state->err = WTAP_ERR_ZLIB + Z_DATA_ERROR;
272 NEXT(); /* modification time */
276 NEXT(); /* extra flags */
277 NEXT(); /* operating system */
278 if (flags & 4) { /* extra field */
279 len = (unsigned)NEXT();
280 len += (unsigned)NEXT() << 8;
285 if (flags & 8) /* file name */
288 if (flags & 16) /* comment */
291 if (flags & 2) { /* header crc */
295 /* an unexpected end of file is not checked for here -- it will be
296 noticed on the first request for uncompressed data */
298 /* set up for decompression */
299 inflateReset(&(state->strm));
300 state->strm.adler = crc32(0L, Z_NULL, 0);
301 state->compression = ZLIB;
305 /* not a gzip file -- save first byte (31) and fall to raw i/o */
312 /* { 0xFD, '7', 'z', 'X', 'Z', 0x00 } */
313 /* FD 37 7A 58 5A 00 */
316 /* doing raw i/o, save start of raw data for seeking, copy any leftover
317 input to output -- this assumes that the output buffer is larger than
318 the input buffer, which also assures space for gzungetc() */
319 state->raw = state->pos;
320 state->next = state->out;
321 if (state->avail_in) {
322 memcpy(state->next + state->have, state->next_in, state->avail_in);
323 state->have += state->avail_in;
326 state->compression = UNCOMPRESSED;
330 static int /* gz_make */
331 fill_out_buffer(FILE_T state)
333 if (state->compression == UNKNOWN) { /* look for gzip header */
334 if (gz_head(state) == -1)
336 if (state->have) /* got some data from gz_head() */
339 if (state->compression == UNCOMPRESSED) { /* straight copy */
340 if (raw_read(state, state->out, state->size /* << 1 */, &(state->have)) == -1)
342 state->next = state->out;
345 else if (state->compression == ZLIB) { /* decompress */
346 if (zlib_read(state, state->out, state->size << 1) == -1)
354 gz_skip(FILE_T state, gint64 len)
358 /* skip over len bytes or reach end-of-file, whichever comes first */
360 /* skip over whatever is in output buffer */
362 n = (gint64)state->have > len ? (unsigned)len : state->have;
369 /* output buffer empty -- return if we're at the end of the input */
370 else if (state->eof && state->avail_in == 0)
373 /* need more data to skip -- load up output buffer */
375 /* get more output, looking for header if required */
376 if (fill_out_buffer(state) == -1)
383 gz_reset(FILE_T state)
385 state->have = 0; /* no output data available */
386 state->eof = 0; /* not at end of file */
387 state->compression = UNKNOWN; /* look for gzip header */
389 state->seek = 0; /* no seek request pending */
390 state->err = 0; /* clear error */
391 state->pos = 0; /* no uncompressed data yet */
392 state->avail_in = 0; /* no input data yet */
398 #ifdef _STATBUF_ST_BLKSIZE /* XXX, _STATBUF_ST_BLKSIZE portable? */
401 int want = GZBUFSIZE;
407 /* allocate gzFile structure to return */
408 state = g_try_malloc(sizeof *state);
412 /* open the file with the appropriate mode (or just use fd) */
415 /* save the current position for rewinding (only if reading) */
416 state->start = ws_lseek64(state->fd, 0, SEEK_CUR);
417 if (state->start == -1) state->start = 0;
419 /* initialize stream */
422 #ifdef _STATBUF_ST_BLKSIZE
423 if (fstat(fd, &st) >= 0) {
424 want = st.st_blksize;
425 /* XXX, verify result? */
429 /* allocate buffers */
430 state->in = g_try_malloc(want);
431 state->out = g_try_malloc(want << 1);
433 if (state->in == NULL || state->out == NULL) {
442 /* allocate inflate memory */
443 state->strm.zalloc = Z_NULL;
444 state->strm.zfree = Z_NULL;
445 state->strm.opaque = Z_NULL;
446 state->strm.avail_in = 0;
447 state->strm.next_in = Z_NULL;
448 if (inflateInit2(&(state->strm), -15) != Z_OK) { /* raw inflate */
456 gz_head(state); /* read first chunk */
463 file_open(const char *path)
475 /* open file and do correct filename conversions */
476 if ((fd = ws_open(path, oflag, 0666)) == -1)
479 /* open file handle */
490 file_seek(FILE_T file, gint64 offset, int whence, int *err)
494 /* check that there's no error */
500 /* can only seek from start or relative to current position */
501 if (whence != SEEK_SET && whence != SEEK_CUR) {
502 g_assert_not_reached();
509 /* normalize offset to a SEEK_CUR specification */
510 if (whence == SEEK_SET)
513 offset += file->skip;
516 /* if within raw area while reading, just go there */
517 if (file->compression == UNCOMPRESSED && file->pos + offset >= file->raw) {
518 if (ws_lseek64(file->fd, offset - file->have, SEEK_CUR) == -1) {
531 /* calculate skip amount, rewinding if needed for back seek when reading */
534 if (offset < 0) { /* before start of file! */
538 /* rewind, then skip to offset */
540 /* back up and start over */
541 if (ws_lseek64(file->fd, file->start, SEEK_SET) == -1) {
548 /* skip what's in output buffer (one less gzgetc() check) */
549 n = (gint64)file->have > offset ? (unsigned)offset : file->have;
555 /* request skip (if not zero) */
560 return file->pos + offset;
564 file_tell(FILE_T stream)
566 /* return position */
567 return stream->pos + (stream->seek ? stream->skip : 0);
571 file_read(void *buf, unsigned int len, FILE_T file)
575 /* check that we're reading and that there's no error */
579 /* if len is zero, avoid unnecessary operations */
583 /* process a skip request */
586 if (gz_skip(file, file->skip) == -1)
590 /* get len bytes to buf, or less than len if at the end */
593 /* first just try copying data from the output buffer */
595 n = file->have > len ? len : file->have;
596 memcpy(buf, file->next, n);
601 /* output buffer empty -- return if we're at the end of the input */
602 else if (file->eof && file->avail_in == 0)
605 /* need output data -- for small len or new stream load up our output buffer */
606 else if (file->compression == UNKNOWN || len < (file->size << 1)) {
607 /* get more output, looking for header if required */
608 if (fill_out_buffer(file) == -1)
610 continue; /* no progress yet -- go back to memcpy() above */
612 } else if (file->compression == UNCOMPRESSED) { /* large len -- read directly into user buffer */
613 if (raw_read(file, buf, len, &n) == -1)
617 /* large len -- decompress directly into user buffer */
618 else { /* file->compression == ZLIB */
619 if (zlib_read(file, buf, len) == -1)
625 /* update progress */
627 buf = (char *)buf + n;
636 file_getc(FILE_T file)
638 unsigned char buf[1];
641 /* check that we're reading and that there's no error */
645 /* try output buffer (no need to check for skip request) */
649 return *(file->next)++;
652 ret = file_read(buf, 1, file);
653 return ret < 1 ? -1 : buf[0];
657 file_gets(char *buf, int len, FILE_T file)
663 /* check parameters */
664 if (buf == NULL || len < 1)
667 /* check that there's no error */
671 /* process a skip request */
674 if (gz_skip(file, file->skip) == -1)
678 /* copy output bytes up to new line or len - 1, whichever comes first --
679 append a terminating zero to the string (we don't check for a zero in
680 the contents, let the user worry about that) */
682 left = (unsigned)len - 1;
684 /* assure that something is in the output buffer */
685 if (file->have == 0) {
686 if (fill_out_buffer(file) == -1)
687 return NULL; /* error */
688 if (file->have == 0) { /* end of file */
689 if (buf == str) /* got bupkus */
691 break; /* got something -- return it */
695 /* look for end-of-line in current output buffer */
696 n = file->have > left ? left : file->have;
697 eol = memchr(file->next, '\n', n);
699 n = (unsigned)(eol - file->next) + 1;
701 /* copy through end-of-line, or remainder if not found */
702 memcpy(buf, file->next, n);
708 } while (left && eol == NULL);
710 /* found end-of-line or out of space -- terminate string and return it */
716 file_eof(FILE_T file)
718 /* return end-of-file state */
719 return (file->eof && file->avail_in == 0 && file->have == 0);
723 * Routine to return a Wiretap error code (0 for no error, an errno
724 * for a file error, or a WTAP_ERR_ code for other errors) for an
728 file_error(FILE_T fh)
734 file_clearerr(FILE_T stream)
736 /* clear error and end-of-file */
742 file_close(FILE_T file)
746 /* free memory and close file */
749 inflateEnd(&(file->strm));