diff options
Diffstat (limited to 'deps/zlib-1.3.1/examples')
-rw-r--r-- | deps/zlib-1.3.1/examples/README.examples | 54 | ||||
-rw-r--r-- | deps/zlib-1.3.1/examples/enough.c | 597 | ||||
-rw-r--r-- | deps/zlib-1.3.1/examples/fitblk.c | 233 | ||||
-rw-r--r-- | deps/zlib-1.3.1/examples/gun.c | 702 | ||||
-rw-r--r-- | deps/zlib-1.3.1/examples/gzappend.c | 504 | ||||
-rw-r--r-- | deps/zlib-1.3.1/examples/gzjoin.c | 449 | ||||
-rw-r--r-- | deps/zlib-1.3.1/examples/gzlog.c | 1061 | ||||
-rw-r--r-- | deps/zlib-1.3.1/examples/gzlog.h | 91 | ||||
-rw-r--r-- | deps/zlib-1.3.1/examples/gznorm.c | 470 | ||||
-rw-r--r-- | deps/zlib-1.3.1/examples/zlib_how.html | 549 | ||||
-rw-r--r-- | deps/zlib-1.3.1/examples/zpipe.c | 205 | ||||
-rw-r--r-- | deps/zlib-1.3.1/examples/zran.c | 533 | ||||
-rw-r--r-- | deps/zlib-1.3.1/examples/zran.h | 51 |
13 files changed, 5499 insertions, 0 deletions
diff --git a/deps/zlib-1.3.1/examples/README.examples b/deps/zlib-1.3.1/examples/README.examples new file mode 100644 index 0000000..e3a4b88 --- /dev/null +++ b/deps/zlib-1.3.1/examples/README.examples @@ -0,0 +1,54 @@ +This directory contains examples of the use of zlib and other relevant +programs and documentation. + +enough.c + calculation and justification of ENOUGH parameter in inftrees.h + - calculates the maximum table space used in inflate tree + construction over all possible Huffman codes + +fitblk.c + compress just enough input to nearly fill a requested output size + - zlib isn't designed to do this, but fitblk does it anyway + +gun.c + uncompress a gzip file + - illustrates the use of inflateBack() for high speed file-to-file + decompression using call-back functions + - is approximately twice as fast as gzip -d + - also provides Unix uncompress functionality, again twice as fast + +gzappend.c + append to a gzip file + - illustrates the use of the Z_BLOCK flush parameter for inflate() + - illustrates the use of deflatePrime() to start at any bit + +gzjoin.c + join gzip files without recalculating the crc or recompressing + - illustrates the use of the Z_BLOCK flush parameter for inflate() + - illustrates the use of crc32_combine() + +gzlog.c +gzlog.h + efficiently and robustly maintain a message log file in gzip format + - illustrates use of raw deflate, Z_PARTIAL_FLUSH, deflatePrime(), + and deflateSetDictionary() + - illustrates use of a gzip header extra field + +gznorm.c + normalize a gzip file by combining members into a single member + - demonstrates how to concatenate deflate streams using Z_BLOCK + +zlib_how.html + painfully comprehensive description of zpipe.c (see below) + - describes in excruciating detail the use of deflate() and inflate() + +zpipe.c + reads and writes zlib streams from stdin to stdout + - illustrates the proper use of deflate() and inflate() + - deeply commented in zlib_how.html (see above) + +zran.c +zran.h + index a zlib or gzip stream and randomly access it + - illustrates the use of Z_BLOCK, inflatePrime(), and + inflateSetDictionary() to provide random access diff --git a/deps/zlib-1.3.1/examples/enough.c b/deps/zlib-1.3.1/examples/enough.c new file mode 100644 index 0000000..8a3cade --- /dev/null +++ b/deps/zlib-1.3.1/examples/enough.c @@ -0,0 +1,597 @@ +/* enough.c -- determine the maximum size of inflate's Huffman code tables over + * all possible valid and complete prefix codes, subject to a length limit. + * Copyright (C) 2007, 2008, 2012, 2018 Mark Adler + * Version 1.5 5 August 2018 Mark Adler + */ + +/* Version history: + 1.0 3 Jan 2007 First version (derived from codecount.c version 1.4) + 1.1 4 Jan 2007 Use faster incremental table usage computation + Prune examine() search on previously visited states + 1.2 5 Jan 2007 Comments clean up + As inflate does, decrease root for short codes + Refuse cases where inflate would increase root + 1.3 17 Feb 2008 Add argument for initial root table size + Fix bug for initial root table size == max - 1 + Use a macro to compute the history index + 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!) + Clean up comparisons of different types + Clean up code indentation + 1.5 5 Aug 2018 Clean up code style, formatting, and comments + Show all the codes for the maximum, and only the maximum + */ + +/* + Examine all possible prefix codes for a given number of symbols and a + maximum code length in bits to determine the maximum table size for zlib's + inflate. Only complete prefix codes are counted. + + Two codes are considered distinct if the vectors of the number of codes per + length are not identical. So permutations of the symbol assignments result + in the same code for the counting, as do permutations of the assignments of + the bit values to the codes (i.e. only canonical codes are counted). + + We build a code from shorter to longer lengths, determining how many symbols + are coded at each length. At each step, we have how many symbols remain to + be coded, what the last code length used was, and how many bit patterns of + that length remain unused. Then we add one to the code length and double the + number of unused patterns to graduate to the next code length. We then + assign all portions of the remaining symbols to that code length that + preserve the properties of a correct and eventually complete code. Those + properties are: we cannot use more bit patterns than are available; and when + all the symbols are used, there are exactly zero possible bit patterns left + unused. + + The inflate Huffman decoding algorithm uses two-level lookup tables for + speed. There is a single first-level table to decode codes up to root bits + in length (root == 9 for literal/length codes and root == 6 for distance + codes, in the current inflate implementation). The base table has 1 << root + entries and is indexed by the next root bits of input. Codes shorter than + root bits have replicated table entries, so that the correct entry is + pointed to regardless of the bits that follow the short code. If the code is + longer than root bits, then the table entry points to a second-level table. + The size of that table is determined by the longest code with that root-bit + prefix. If that longest code has length len, then the table has size 1 << + (len - root), to index the remaining bits in that set of codes. Each + subsequent root-bit prefix then has its own sub-table. The total number of + table entries required by the code is calculated incrementally as the number + of codes at each bit length is populated. When all of the codes are shorter + than root bits, then root is reduced to the longest code length, resulting + in a single, smaller, one-level table. + + The inflate algorithm also provides for small values of root (relative to + the log2 of the number of symbols), where the shortest code has more bits + than root. In that case, root is increased to the length of the shortest + code. This program, by design, does not handle that case, so it is verified + that the number of symbols is less than 1 << (root + 1). + + In order to speed up the examination (by about ten orders of magnitude for + the default arguments), the intermediate states in the build-up of a code + are remembered and previously visited branches are pruned. The memory + required for this will increase rapidly with the total number of symbols and + the maximum code length in bits. However this is a very small price to pay + for the vast speedup. + + First, all of the possible prefix codes are counted, and reachable + intermediate states are noted by a non-zero count in a saved-results array. + Second, the intermediate states that lead to (root + 1) bit or longer codes + are used to look at all sub-codes from those junctures for their inflate + memory usage. (The amount of memory used is not affected by the number of + codes of root bits or less in length.) Third, the visited states in the + construction of those sub-codes and the associated calculation of the table + size is recalled in order to avoid recalculating from the same juncture. + Beginning the code examination at (root + 1) bit codes, which is enabled by + identifying the reachable nodes, accounts for about six of the orders of + magnitude of improvement for the default arguments. About another four + orders of magnitude come from not revisiting previous states. Out of + approximately 2x10^16 possible prefix codes, only about 2x10^6 sub-codes + need to be examined to cover all of the possible table memory usage cases + for the default arguments of 286 symbols limited to 15-bit codes. + + Note that the uintmax_t type is used for counting. It is quite easy to + exceed the capacity of an eight-byte integer with a large number of symbols + and a large maximum code length, so multiple-precision arithmetic would need + to replace the integer arithmetic in that case. This program will abort if + an overflow occurs. The big_t type identifies where the counting takes + place. + + The uintmax_t type is also used for calculating the number of possible codes + remaining at the maximum length. This limits the maximum code length to the + number of bits in a long long minus the number of bits needed to represent + the symbols in a flat code. The code_t type identifies where the bit-pattern + counting takes place. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <stdint.h> +#include <assert.h> + +#define local static + +// Special data types. +typedef uintmax_t big_t; // type for code counting +#define PRIbig "ju" // printf format for big_t +typedef uintmax_t code_t; // type for bit pattern counting +struct tab { // type for been-here check + size_t len; // allocated length of bit vector in octets + char *vec; // allocated bit vector +}; + +/* The array for saving results, num[], is indexed with this triplet: + + syms: number of symbols remaining to code + left: number of available bit patterns at length len + len: number of bits in the codes currently being assigned + + Those indices are constrained thusly when saving results: + + syms: 3..totsym (totsym == total symbols to code) + left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6) + len: 1..max - 1 (max == maximum code length in bits) + + syms == 2 is not saved since that immediately leads to a single code. left + must be even, since it represents the number of available bit patterns at + the current length, which is double the number at the previous length. left + ends at syms-1 since left == syms immediately results in a single code. + (left > sym is not allowed since that would result in an incomplete code.) + len is less than max, since the code completes immediately when len == max. + + The offset into the array is calculated for the three indices with the first + one (syms) being outermost, and the last one (len) being innermost. We build + the array with length max-1 lists for the len index, with syms-3 of those + for each symbol. There are totsym-2 of those, with each one varying in + length as a function of sym. See the calculation of index in map() for the + index, and the calculation of size in main() for the size of the array. + + For the deflate example of 286 symbols limited to 15-bit codes, the array + has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than half + of the space allocated for saved results is actually used -- not all + possible triplets are reached in the generation of valid prefix codes. + */ + +/* The array for tracking visited states, done[], is itself indexed identically + to the num[] array as described above for the (syms, left, len) triplet. + Each element in the array is further indexed by the (mem, rem) doublet, + where mem is the amount of inflate table space used so far, and rem is the + remaining unused entries in the current inflate sub-table. Each indexed + element is simply one bit indicating whether the state has been visited or + not. Since the ranges for mem and rem are not known a priori, each bit + vector is of a variable size, and grows as needed to accommodate the visited + states. mem and rem are used to calculate a single index in a triangular + array. Since the range of mem is expected in the default case to be about + ten times larger than the range of rem, the array is skewed to reduce the + memory usage, with eight times the range for mem than for rem. See the + calculations for offset and bit in been_here() for the details. + + For the deflate example of 286 symbols limited to 15-bit codes, the bit + vectors grow to total 5.5 MB, in addition to the 4.3 MB done array itself. + */ + +// Type for a variable-length, allocated string. +typedef struct { + char *str; // pointer to allocated string + size_t size; // size of allocation + size_t len; // length of string, not including terminating zero +} string_t; + +// Clear a string_t. +local void string_clear(string_t *s) { + s->str[0] = 0; + s->len = 0; +} + +// Initialize a string_t. +local void string_init(string_t *s) { + s->size = 16; + s->str = malloc(s->size); + assert(s->str != NULL && "out of memory"); + string_clear(s); +} + +// Release the allocation of a string_t. +local void string_free(string_t *s) { + free(s->str); + s->str = NULL; + s->size = 0; + s->len = 0; +} + +// Save the results of printf with fmt and the subsequent argument list to s. +// Each call appends to s. The allocated space for s is increased as needed. +local void string_printf(string_t *s, char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + size_t len = s->len; + int ret = vsnprintf(s->str + len, s->size - len, fmt, ap); + assert(ret >= 0 && "out of memory"); + s->len += ret; + if (s->size < s->len + 1) { + do { + s->size <<= 1; + assert(s->size != 0 && "overflow"); + } while (s->size < s->len + 1); + s->str = realloc(s->str, s->size); + assert(s->str != NULL && "out of memory"); + vsnprintf(s->str + len, s->size - len, fmt, ap); + } + va_end(ap); +} + +// Globals to avoid propagating constants or constant pointers recursively. +struct { + int max; // maximum allowed bit length for the codes + int root; // size of base code table in bits + int large; // largest code table so far + size_t size; // number of elements in num and done + big_t tot; // total number of codes with maximum tables size + string_t out; // display of subcodes for maximum tables size + int *code; // number of symbols assigned to each bit length + big_t *num; // saved results array for code counting + struct tab *done; // states already evaluated array +} g; + +// Index function for num[] and done[]. +local inline size_t map(int syms, int left, int len) { + return ((size_t)((syms - 1) >> 1) * ((syms - 2) >> 1) + + (left >> 1) - 1) * (g.max - 1) + + len - 1; +} + +// Free allocated space in globals. +local void cleanup(void) { + if (g.done != NULL) { + for (size_t n = 0; n < g.size; n++) + if (g.done[n].len) + free(g.done[n].vec); + g.size = 0; + free(g.done); g.done = NULL; + } + free(g.num); g.num = NULL; + free(g.code); g.code = NULL; + string_free(&g.out); +} + +// Return the number of possible prefix codes using bit patterns of lengths len +// through max inclusive, coding syms symbols, with left bit patterns of length +// len unused -- return -1 if there is an overflow in the counting. Keep a +// record of previous results in num to prevent repeating the same calculation. +local big_t count(int syms, int left, int len) { + // see if only one possible code + if (syms == left) + return 1; + + // note and verify the expected state + assert(syms > left && left > 0 && len < g.max); + + // see if we've done this one already + size_t index = map(syms, left, len); + big_t got = g.num[index]; + if (got) + return got; // we have -- return the saved result + + // we need to use at least this many bit patterns so that the code won't be + // incomplete at the next length (more bit patterns than symbols) + int least = (left << 1) - syms; + if (least < 0) + least = 0; + + // we can use at most this many bit patterns, lest there not be enough + // available for the remaining symbols at the maximum length (if there were + // no limit to the code length, this would become: most = left - 1) + int most = (((code_t)left << (g.max - len)) - syms) / + (((code_t)1 << (g.max - len)) - 1); + + // count all possible codes from this juncture and add them up + big_t sum = 0; + for (int use = least; use <= most; use++) { + got = count(syms - use, (left - use) << 1, len + 1); + sum += got; + if (got == (big_t)-1 || sum < got) // overflow + return (big_t)-1; + } + + // verify that all recursive calls are productive + assert(sum != 0); + + // save the result and return it + g.num[index] = sum; + return sum; +} + +// Return true if we've been here before, set to true if not. Set a bit in a +// bit vector to indicate visiting this state. Each (syms,len,left) state has a +// variable size bit vector indexed by (mem,rem). The bit vector is lengthened +// as needed to allow setting the (mem,rem) bit. +local int been_here(int syms, int left, int len, int mem, int rem) { + // point to vector for (syms,left,len), bit in vector for (mem,rem) + size_t index = map(syms, left, len); + mem -= 1 << g.root; // mem always includes the root table + mem >>= 1; // mem and rem are always even + rem >>= 1; + size_t offset = (mem >> 3) + rem; + offset = ((offset * (offset + 1)) >> 1) + rem; + int bit = 1 << (mem & 7); + + // see if we've been here + size_t length = g.done[index].len; + if (offset < length && (g.done[index].vec[offset] & bit) != 0) + return 1; // done this! + + // we haven't been here before -- set the bit to show we have now + + // see if we need to lengthen the vector in order to set the bit + if (length <= offset) { + // if we have one already, enlarge it, zero out the appended space + char *vector; + if (length) { + do { + length <<= 1; + } while (length <= offset); + vector = realloc(g.done[index].vec, length); + assert(vector != NULL && "out of memory"); + memset(vector + g.done[index].len, 0, length - g.done[index].len); + } + + // otherwise we need to make a new vector and zero it out + else { + length = 16; + while (length <= offset) + length <<= 1; + vector = calloc(length, 1); + assert(vector != NULL && "out of memory"); + } + + // install the new vector + g.done[index].len = length; + g.done[index].vec = vector; + } + + // set the bit + g.done[index].vec[offset] |= bit; + return 0; +} + +// Examine all possible codes from the given node (syms, len, left). Compute +// the amount of memory required to build inflate's decoding tables, where the +// number of code structures used so far is mem, and the number remaining in +// the current sub-table is rem. +local void examine(int syms, int left, int len, int mem, int rem) { + // see if we have a complete code + if (syms == left) { + // set the last code entry + g.code[len] = left; + + // complete computation of memory used by this code + while (rem < left) { + left -= rem; + rem = 1 << (len - g.root); + mem += rem; + } + assert(rem == left); + + // if this is at the maximum, show the sub-code + if (mem >= g.large) { + // if this is a new maximum, update the maximum and clear out the + // printed sub-codes from the previous maximum + if (mem > g.large) { + g.large = mem; + string_clear(&g.out); + } + + // compute the starting state for this sub-code + syms = 0; + left = 1 << g.max; + for (int bits = g.max; bits > g.root; bits--) { + syms += g.code[bits]; + left -= g.code[bits]; + assert((left & 1) == 0); + left >>= 1; + } + + // print the starting state and the resulting sub-code to g.out + string_printf(&g.out, "<%u, %u, %u>:", + syms, g.root + 1, ((1 << g.root) - left) << 1); + for (int bits = g.root + 1; bits <= g.max; bits++) + if (g.code[bits]) + string_printf(&g.out, " %d[%d]", g.code[bits], bits); + string_printf(&g.out, "\n"); + } + + // remove entries as we drop back down in the recursion + g.code[len] = 0; + return; + } + + // prune the tree if we can + if (been_here(syms, left, len, mem, rem)) + return; + + // we need to use at least this many bit patterns so that the code won't be + // incomplete at the next length (more bit patterns than symbols) + int least = (left << 1) - syms; + if (least < 0) + least = 0; + + // we can use at most this many bit patterns, lest there not be enough + // available for the remaining symbols at the maximum length (if there were + // no limit to the code length, this would become: most = left - 1) + int most = (((code_t)left << (g.max - len)) - syms) / + (((code_t)1 << (g.max - len)) - 1); + + // occupy least table spaces, creating new sub-tables as needed + int use = least; + while (rem < use) { + use -= rem; + rem = 1 << (len - g.root); + mem += rem; + } + rem -= use; + + // examine codes from here, updating table space as we go + for (use = least; use <= most; use++) { + g.code[len] = use; + examine(syms - use, (left - use) << 1, len + 1, + mem + (rem ? 1 << (len - g.root) : 0), rem << 1); + if (rem == 0) { + rem = 1 << (len - g.root); + mem += rem; + } + rem--; + } + + // remove entries as we drop back down in the recursion + g.code[len] = 0; +} + +// Look at all sub-codes starting with root + 1 bits. Look at only the valid +// intermediate code states (syms, left, len). For each completed code, +// calculate the amount of memory required by inflate to build the decoding +// tables. Find the maximum amount of memory required and show the codes that +// require that maximum. +local void enough(int syms) { + // clear code + for (int n = 0; n <= g.max; n++) + g.code[n] = 0; + + // look at all (root + 1) bit and longer codes + string_clear(&g.out); // empty saved results + g.large = 1 << g.root; // base table + if (g.root < g.max) // otherwise, there's only a base table + for (int n = 3; n <= syms; n++) + for (int left = 2; left < n; left += 2) { + // look at all reachable (root + 1) bit nodes, and the + // resulting codes (complete at root + 2 or more) + size_t index = map(n, left, g.root + 1); + if (g.root + 1 < g.max && g.num[index]) // reachable node + examine(n, left, g.root + 1, 1 << g.root, 0); + + // also look at root bit codes with completions at root + 1 + // bits (not saved in num, since complete), just in case + if (g.num[index - 1] && n <= left << 1) + examine((n - left) << 1, (n - left) << 1, g.root + 1, + 1 << g.root, 0); + } + + // done + printf("maximum of %d table entries for root = %d\n", g.large, g.root); + fputs(g.out.str, stdout); +} + +// Examine and show the total number of possible prefix codes for a given +// maximum number of symbols, initial root table size, and maximum code length +// in bits -- those are the command arguments in that order. The default values +// are 286, 9, and 15 respectively, for the deflate literal/length code. The +// possible codes are counted for each number of coded symbols from two to the +// maximum. The counts for each of those and the total number of codes are +// shown. The maximum number of inflate table entries is then calculated across +// all possible codes. Each new maximum number of table entries and the +// associated sub-code (starting at root + 1 == 10 bits) is shown. +// +// To count and examine prefix codes that are not length-limited, provide a +// maximum length equal to the number of symbols minus one. +// +// For the deflate literal/length code, use "enough". For the deflate distance +// code, use "enough 30 6". +int main(int argc, char **argv) { + // set up globals for cleanup() + g.code = NULL; + g.num = NULL; + g.done = NULL; + string_init(&g.out); + + // get arguments -- default to the deflate literal/length code + int syms = 286; + g.root = 9; + g.max = 15; + if (argc > 1) { + syms = atoi(argv[1]); + if (argc > 2) { + g.root = atoi(argv[2]); + if (argc > 3) + g.max = atoi(argv[3]); + } + } + if (argc > 4 || syms < 2 || g.root < 1 || g.max < 1) { + fputs("invalid arguments, need: [sym >= 2 [root >= 1 [max >= 1]]]\n", + stderr); + return 1; + } + + // if not restricting the code length, the longest is syms - 1 + if (g.max > syms - 1) + g.max = syms - 1; + + // determine the number of bits in a code_t + int bits = 0; + for (code_t word = 1; word; word <<= 1) + bits++; + + // make sure that the calculation of most will not overflow + if (g.max > bits || (code_t)(syms - 2) >= ((code_t)-1 >> (g.max - 1))) { + fputs("abort: code length too long for internal types\n", stderr); + return 1; + } + + // reject impossible code requests + if ((code_t)(syms - 1) > ((code_t)1 << g.max) - 1) { + fprintf(stderr, "%d symbols cannot be coded in %d bits\n", + syms, g.max); + return 1; + } + + // allocate code vector + g.code = calloc(g.max + 1, sizeof(int)); + assert(g.code != NULL && "out of memory"); + + // determine size of saved results array, checking for overflows, + // allocate and clear the array (set all to zero with calloc()) + if (syms == 2) // iff max == 1 + g.num = NULL; // won't be saving any results + else { + g.size = syms >> 1; + int n = (syms - 1) >> 1; + assert(g.size <= (size_t)-1 / n && "overflow"); + g.size *= n; + n = g.max - 1; + assert(g.size <= (size_t)-1 / n && "overflow"); + g.size *= n; + g.num = calloc(g.size, sizeof(big_t)); + assert(g.num != NULL && "out of memory"); + } + + // count possible codes for all numbers of symbols, add up counts + big_t sum = 0; + for (int n = 2; n <= syms; n++) { + big_t got = count(n, 2, 1); + sum += got; + assert(got != (big_t)-1 && sum >= got && "overflow"); + } + printf("%"PRIbig" total codes for 2 to %d symbols", sum, syms); + if (g.max < syms - 1) + printf(" (%d-bit length limit)\n", g.max); + else + puts(" (no length limit)"); + + // allocate and clear done array for been_here() + if (syms == 2) + g.done = NULL; + else { + g.done = calloc(g.size, sizeof(struct tab)); + assert(g.done != NULL && "out of memory"); + } + + // find and show maximum inflate table usage + if (g.root > g.max) // reduce root to max length + g.root = g.max; + if ((code_t)syms < ((code_t)1 << (g.root + 1))) + enough(syms); + else + fputs("cannot handle minimum code lengths > root", stderr); + + // done + cleanup(); + return 0; +} diff --git a/deps/zlib-1.3.1/examples/fitblk.c b/deps/zlib-1.3.1/examples/fitblk.c new file mode 100644 index 0000000..723dc00 --- /dev/null +++ b/deps/zlib-1.3.1/examples/fitblk.c @@ -0,0 +1,233 @@ +/* fitblk.c: example of fitting compressed output to a specified size + Not copyrighted -- provided to the public domain + Version 1.1 25 November 2004 Mark Adler */ + +/* Version history: + 1.0 24 Nov 2004 First version + 1.1 25 Nov 2004 Change deflateInit2() to deflateInit() + Use fixed-size, stack-allocated raw buffers + Simplify code moving compression to subroutines + Use assert() for internal errors + Add detailed description of approach + */ + +/* Approach to just fitting a requested compressed size: + + fitblk performs three compression passes on a portion of the input + data in order to determine how much of that input will compress to + nearly the requested output block size. The first pass generates + enough deflate blocks to produce output to fill the requested + output size plus a specified excess amount (see the EXCESS define + below). The last deflate block may go quite a bit past that, but + is discarded. The second pass decompresses and recompresses just + the compressed data that fit in the requested plus excess sized + buffer. The deflate process is terminated after that amount of + input, which is less than the amount consumed on the first pass. + The last deflate block of the result will be of a comparable size + to the final product, so that the header for that deflate block and + the compression ratio for that block will be about the same as in + the final product. The third compression pass decompresses the + result of the second step, but only the compressed data up to the + requested size minus an amount to allow the compressed stream to + complete (see the MARGIN define below). That will result in a + final compressed stream whose length is less than or equal to the + requested size. Assuming sufficient input and a requested size + greater than a few hundred bytes, the shortfall will typically be + less than ten bytes. + + If the input is short enough that the first compression completes + before filling the requested output size, then that compressed + stream is return with no recompression. + + EXCESS is chosen to be just greater than the shortfall seen in a + two pass approach similar to the above. That shortfall is due to + the last deflate block compressing more efficiently with a smaller + header on the second pass. EXCESS is set to be large enough so + that there is enough uncompressed data for the second pass to fill + out the requested size, and small enough so that the final deflate + block of the second pass will be close in size to the final deflate + block of the third and final pass. MARGIN is chosen to be just + large enough to assure that the final compression has enough room + to complete in all cases. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include "zlib.h" + +#define local static + +/* print nastygram and leave */ +local void quit(char *why) +{ + fprintf(stderr, "fitblk abort: %s\n", why); + exit(1); +} + +#define RAWLEN 4096 /* intermediate uncompressed buffer size */ + +/* compress from file to def until provided buffer is full or end of + input reached; return last deflate() return value, or Z_ERRNO if + there was read error on the file */ +local int partcompress(FILE *in, z_streamp def) +{ + int ret, flush; + unsigned char raw[RAWLEN]; + + flush = Z_NO_FLUSH; + do { + def->avail_in = fread(raw, 1, RAWLEN, in); + if (ferror(in)) + return Z_ERRNO; + def->next_in = raw; + if (feof(in)) + flush = Z_FINISH; + ret = deflate(def, flush); + assert(ret != Z_STREAM_ERROR); + } while (def->avail_out != 0 && flush == Z_NO_FLUSH); + return ret; +} + +/* recompress from inf's input to def's output; the input for inf and + the output for def are set in those structures before calling; + return last deflate() return value, or Z_MEM_ERROR if inflate() + was not able to allocate enough memory when it needed to */ +local int recompress(z_streamp inf, z_streamp def) +{ + int ret, flush; + unsigned char raw[RAWLEN]; + + flush = Z_NO_FLUSH; + do { + /* decompress */ + inf->avail_out = RAWLEN; + inf->next_out = raw; + ret = inflate(inf, Z_NO_FLUSH); + assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && + ret != Z_NEED_DICT); + if (ret == Z_MEM_ERROR) + return ret; + + /* compress what was decompressed until done or no room */ + def->avail_in = RAWLEN - inf->avail_out; + def->next_in = raw; + if (inf->avail_out != 0) + flush = Z_FINISH; + ret = deflate(def, flush); + assert(ret != Z_STREAM_ERROR); + } while (ret != Z_STREAM_END && def->avail_out != 0); + return ret; +} + +#define EXCESS 256 /* empirically determined stream overage */ +#define MARGIN 8 /* amount to back off for completion */ + +/* compress from stdin to fixed-size block on stdout */ +int main(int argc, char **argv) +{ + int ret; /* return code */ + unsigned size; /* requested fixed output block size */ + unsigned have; /* bytes written by deflate() call */ + unsigned char *blk; /* intermediate and final stream */ + unsigned char *tmp; /* close to desired size stream */ + z_stream def, inf; /* zlib deflate and inflate states */ + + /* get requested output size */ + if (argc != 2) + quit("need one argument: size of output block"); + ret = strtol(argv[1], argv + 1, 10); + if (argv[1][0] != 0) + quit("argument must be a number"); + if (ret < 8) /* 8 is minimum zlib stream size */ + quit("need positive size of 8 or greater"); + size = (unsigned)ret; + + /* allocate memory for buffers and compression engine */ + blk = malloc(size + EXCESS); + def.zalloc = Z_NULL; + def.zfree = Z_NULL; + def.opaque = Z_NULL; + ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); + if (ret != Z_OK || blk == NULL) + quit("out of memory"); + + /* compress from stdin until output full, or no more input */ + def.avail_out = size + EXCESS; + def.next_out = blk; + ret = partcompress(stdin, &def); + if (ret == Z_ERRNO) + quit("error reading input"); + + /* if it all fit, then size was undersubscribed -- done! */ + if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { + /* write block to stdout */ + have = size + EXCESS - def.avail_out; + if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) + quit("error writing output"); + + /* clean up and print results to stderr */ + ret = deflateEnd(&def); + assert(ret != Z_STREAM_ERROR); + free(blk); + fprintf(stderr, + "%u bytes unused out of %u requested (all input)\n", + size - have, size); + return 0; + } + + /* it didn't all fit -- set up for recompression */ + inf.zalloc = Z_NULL; + inf.zfree = Z_NULL; + inf.opaque = Z_NULL; + inf.avail_in = 0; + inf.next_in = Z_NULL; + ret = inflateInit(&inf); + tmp = malloc(size + EXCESS); + if (ret != Z_OK || tmp == NULL) + quit("out of memory"); + ret = deflateReset(&def); + assert(ret != Z_STREAM_ERROR); + + /* do first recompression close to the right amount */ + inf.avail_in = size + EXCESS; + inf.next_in = blk; + def.avail_out = size + EXCESS; + def.next_out = tmp; + ret = recompress(&inf, &def); + if (ret == Z_MEM_ERROR) + quit("out of memory"); + + /* set up for next recompression */ + ret = inflateReset(&inf); + assert(ret != Z_STREAM_ERROR); + ret = deflateReset(&def); + assert(ret != Z_STREAM_ERROR); + + /* do second and final recompression (third compression) */ + inf.avail_in = size - MARGIN; /* assure stream will complete */ + inf.next_in = tmp; + def.avail_out = size; + def.next_out = blk; + ret = recompress(&inf, &def); + if (ret == Z_MEM_ERROR) + quit("out of memory"); + assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */ + + /* done -- write block to stdout */ + have = size - def.avail_out; + if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) + quit("error writing output"); + + /* clean up and print results to stderr */ + free(tmp); + ret = inflateEnd(&inf); + assert(ret != Z_STREAM_ERROR); + ret = deflateEnd(&def); + assert(ret != Z_STREAM_ERROR); + free(blk); + fprintf(stderr, + "%u bytes unused out of %u requested (%lu input)\n", + size - have, size, def.total_in); + return 0; +} diff --git a/deps/zlib-1.3.1/examples/gun.c b/deps/zlib-1.3.1/examples/gun.c new file mode 100644 index 0000000..bea5497 --- /dev/null +++ b/deps/zlib-1.3.1/examples/gun.c @@ -0,0 +1,702 @@ +/* gun.c -- simple gunzip to give an example of the use of inflateBack() + * Copyright (C) 2003, 2005, 2008, 2010, 2012 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + Version 1.7 12 August 2012 Mark Adler */ + +/* Version history: + 1.0 16 Feb 2003 First version for testing of inflateBack() + 1.1 21 Feb 2005 Decompress concatenated gzip streams + Remove use of "this" variable (C++ keyword) + Fix return value for in() + Improve allocation failure checking + Add typecasting for void * structures + Add -h option for command version and usage + Add a bunch of comments + 1.2 20 Mar 2005 Add Unix compress (LZW) decompression + Copy file attributes from input file to output file + 1.3 12 Jun 2005 Add casts for error messages [Oberhumer] + 1.4 8 Dec 2006 LZW decompression speed improvements + 1.5 9 Feb 2008 Avoid warning in latest version of gcc + 1.6 17 Jan 2010 Avoid signed/unsigned comparison warnings + 1.7 12 Aug 2012 Update for z_const usage in zlib 1.2.8 + */ + +/* + gun [ -t ] [ name ... ] + + decompresses the data in the named gzip files. If no arguments are given, + gun will decompress from stdin to stdout. The names must end in .gz, -gz, + .z, -z, _z, or .Z. The uncompressed data will be written to a file name + with the suffix stripped. On success, the original file is deleted. On + failure, the output file is deleted. For most failures, the command will + continue to process the remaining names on the command line. A memory + allocation failure will abort the command. If -t is specified, then the + listed files or stdin will be tested as gzip files for integrity (without + checking for a proper suffix), no output will be written, and no files + will be deleted. + + Like gzip, gun allows concatenated gzip streams and will decompress them, + writing all of the uncompressed data to the output. Unlike gzip, gun allows + an empty file on input, and will produce no error writing an empty output + file. + + gun will also decompress files made by Unix compress, which uses LZW + compression. These files are automatically detected by virtue of their + magic header bytes. Since the end of Unix compress stream is marked by the + end-of-file, they cannot be concatenated. If a Unix compress stream is + encountered in an input file, it is the last stream in that file. + + Like gunzip and uncompress, the file attributes of the original compressed + file are maintained in the final uncompressed file, to the extent that the + user permissions allow it. + + On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version + 1.2.4) is on the same file, when gun is linked with zlib 1.2.2. Also the + LZW decompression provided by gun is about twice as fast as the standard + Unix uncompress command. + */ + +/* external functions and related types and constants */ +#include <stdio.h> /* fprintf() */ +#include <stdlib.h> /* malloc(), free() */ +#include <string.h> /* strerror(), strcmp(), strlen(), memcpy() */ +#include <errno.h> /* errno */ +#include <fcntl.h> /* open() */ +#include <unistd.h> /* read(), write(), close(), chown(), unlink() */ +#include <sys/types.h> +#include <sys/stat.h> /* stat(), chmod() */ +#include <utime.h> /* utime() */ +#include "zlib.h" /* inflateBackInit(), inflateBack(), */ + /* inflateBackEnd(), crc32() */ + +/* function declaration */ +#define local static + +/* buffer constants */ +#define SIZE 32768U /* input and output buffer sizes */ +#define PIECE 16384 /* limits i/o chunks for 16-bit int case */ + +/* structure for infback() to pass to input function in() -- it maintains the + input file and a buffer of size SIZE */ +struct ind { + int infile; + unsigned char *inbuf; +}; + +/* Load input buffer, assumed to be empty, and return bytes loaded and a + pointer to them. read() is called until the buffer is full, or until it + returns end-of-file or error. Return 0 on error. */ +local unsigned in(void *in_desc, z_const unsigned char **buf) +{ + int ret; + unsigned len; + unsigned char *next; + struct ind *me = (struct ind *)in_desc; + + next = me->inbuf; + *buf = next; + len = 0; + do { + ret = PIECE; + if ((unsigned)ret > SIZE - len) + ret = (int)(SIZE - len); + ret = (int)read(me->infile, next, ret); + if (ret == -1) { + len = 0; + break; + } + next += ret; + len += ret; + } while (ret != 0 && len < SIZE); + return len; +} + +/* structure for infback() to pass to output function out() -- it maintains the + output file, a running CRC-32 check on the output and the total number of + bytes output, both for checking against the gzip trailer. (The length in + the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and + the output is greater than 4 GB.) */ +struct outd { + int outfile; + int check; /* true if checking crc and total */ + unsigned long crc; + unsigned long total; +}; + +/* Write output buffer and update the CRC-32 and total bytes written. write() + is called until all of the output is written or an error is encountered. + On success out() returns 0. For a write failure, out() returns 1. If the + output file descriptor is -1, then nothing is written. + */ +local int out(void *out_desc, unsigned char *buf, unsigned len) +{ + int ret; + struct outd *me = (struct outd *)out_desc; + + if (me->check) { + me->crc = crc32(me->crc, buf, len); + me->total += len; + } + if (me->outfile != -1) + do { + ret = PIECE; + if ((unsigned)ret > len) + ret = (int)len; + ret = (int)write(me->outfile, buf, ret); + if (ret == -1) + return 1; + buf += ret; + len -= ret; + } while (len != 0); + return 0; +} + +/* next input byte macro for use inside lunpipe() and gunpipe() */ +#define NEXT() (have ? 0 : (have = in(indp, &next)), \ + last = have ? (have--, (int)(*next++)) : -1) + +/* memory for gunpipe() and lunpipe() -- + the first 256 entries of prefix[] and suffix[] are never used, could + have offset the index, but it's faster to waste the memory */ +unsigned char inbuf[SIZE]; /* input buffer */ +unsigned char outbuf[SIZE]; /* output buffer */ +unsigned short prefix[65536]; /* index to LZW prefix string */ +unsigned char suffix[65536]; /* one-character LZW suffix */ +unsigned char match[65280 + 2]; /* buffer for reversed match or gzip + 32K sliding window */ + +/* throw out what's left in the current bits byte buffer (this is a vestigial + aspect of the compressed data format derived from an implementation that + made use of a special VAX machine instruction!) */ +#define FLUSHCODE() \ + do { \ + left = 0; \ + rem = 0; \ + if (chunk > have) { \ + chunk -= have; \ + have = 0; \ + if (NEXT() == -1) \ + break; \ + chunk--; \ + if (chunk > have) { \ + chunk = have = 0; \ + break; \ + } \ + } \ + have -= chunk; \ + next += chunk; \ + chunk = 0; \ + } while (0) + +/* Decompress a compress (LZW) file from indp to outfile. The compress magic + header (two bytes) has already been read and verified. There are have bytes + of buffered input at next. strm is used for passing error information back + to gunpipe(). + + lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of + file, read error, or write error (a write error indicated by strm->next_in + not equal to Z_NULL), or Z_DATA_ERROR for invalid input. + */ +local int lunpipe(unsigned have, z_const unsigned char *next, struct ind *indp, + int outfile, z_stream *strm) +{ + int last; /* last byte read by NEXT(), or -1 if EOF */ + unsigned chunk; /* bytes left in current chunk */ + int left; /* bits left in rem */ + unsigned rem; /* unused bits from input */ + int bits; /* current bits per code */ + unsigned code; /* code, table traversal index */ + unsigned mask; /* mask for current bits codes */ + int max; /* maximum bits per code for this stream */ + unsigned flags; /* compress flags, then block compress flag */ + unsigned end; /* last valid entry in prefix/suffix tables */ + unsigned temp; /* current code */ + unsigned prev; /* previous code */ + unsigned final; /* last character written for previous code */ + unsigned stack; /* next position for reversed string */ + unsigned outcnt; /* bytes in output buffer */ + struct outd outd; /* output structure */ + unsigned char *p; + + /* set up output */ + outd.outfile = outfile; + outd.check = 0; + + /* process remainder of compress header -- a flags byte */ + flags = NEXT(); + if (last == -1) + return Z_BUF_ERROR; + if (flags & 0x60) { + strm->msg = (char *)"unknown lzw flags set"; + return Z_DATA_ERROR; + } + max = flags & 0x1f; + if (max < 9 || max > 16) { + strm->msg = (char *)"lzw bits out of range"; + return Z_DATA_ERROR; + } + if (max == 9) /* 9 doesn't really mean 9 */ + max = 10; + flags &= 0x80; /* true if block compress */ + + /* clear table */ + bits = 9; + mask = 0x1ff; + end = flags ? 256 : 255; + + /* set up: get first 9-bit code, which is the first decompressed byte, but + don't create a table entry until the next code */ + if (NEXT() == -1) /* no compressed data is ok */ + return Z_OK; + final = prev = (unsigned)last; /* low 8 bits of code */ + if (NEXT() == -1) /* missing a bit */ + return Z_BUF_ERROR; + if (last & 1) { /* code must be < 256 */ + strm->msg = (char *)"invalid lzw code"; + return Z_DATA_ERROR; + } + rem = (unsigned)last >> 1; /* remaining 7 bits */ + left = 7; + chunk = bits - 2; /* 7 bytes left in this chunk */ + outbuf[0] = (unsigned char)final; /* write first decompressed byte */ + outcnt = 1; + + /* decode codes */ + stack = 0; + for (;;) { + /* if the table will be full after this, increment the code size */ + if (end >= mask && bits < max) { + FLUSHCODE(); + bits++; + mask <<= 1; + mask++; + } + + /* get a code of length bits */ + if (chunk == 0) /* decrement chunk modulo bits */ + chunk = bits; + code = rem; /* low bits of code */ + if (NEXT() == -1) { /* EOF is end of compressed data */ + /* write remaining buffered output */ + if (outcnt && out(&outd, outbuf, outcnt)) { + strm->next_in = outbuf; /* signal write error */ + return Z_BUF_ERROR; + } + return Z_OK; + } + code += (unsigned)last << left; /* middle (or high) bits of code */ + left += 8; + chunk--; + if (bits > left) { /* need more bits */ + if (NEXT() == -1) /* can't end in middle of code */ + return Z_BUF_ERROR; + code += (unsigned)last << left; /* high bits of code */ + left += 8; + chunk--; + } + code &= mask; /* mask to current code length */ + left -= bits; /* number of unused bits */ + rem = (unsigned)last >> (8 - left); /* unused bits from last byte */ + + /* process clear code (256) */ + if (code == 256 && flags) { + FLUSHCODE(); + bits = 9; /* initialize bits and mask */ + mask = 0x1ff; + end = 255; /* empty table */ + continue; /* get next code */ + } + + /* special code to reuse last match */ + temp = code; /* save the current code */ + if (code > end) { + /* Be picky on the allowed code here, and make sure that the code + we drop through (prev) will be a valid index so that random + input does not cause an exception. The code != end + 1 check is + empirically derived, and not checked in the original uncompress + code. If this ever causes a problem, that check could be safely + removed. Leaving this check in greatly improves gun's ability + to detect random or corrupted input after a compress header. + In any case, the prev > end check must be retained. */ + if (code != end + 1 || prev > end) { + strm->msg = (char *)"invalid lzw code"; + return Z_DATA_ERROR; + } + match[stack++] = (unsigned char)final; + code = prev; + } + + /* walk through linked list to generate output in reverse order */ + p = match + stack; + while (code >= 256) { + *p++ = suffix[code]; + code = prefix[code]; + } + stack = p - match; + match[stack++] = (unsigned char)code; + final = code; + + /* link new table entry */ + if (end < mask) { + end++; + prefix[end] = (unsigned short)prev; + suffix[end] = (unsigned char)final; + } + + /* set previous code for next iteration */ + prev = temp; + + /* write output in forward order */ + while (stack > SIZE - outcnt) { + while (outcnt < SIZE) + outbuf[outcnt++] = match[--stack]; + if (out(&outd, outbuf, outcnt)) { + strm->next_in = outbuf; /* signal write error */ + return Z_BUF_ERROR; + } + outcnt = 0; + } + p = match + stack; + do { + outbuf[outcnt++] = *--p; + } while (p > match); + stack = 0; + + /* loop for next code with final and prev as the last match, rem and + left provide the first 0..7 bits of the next code, end is the last + valid table entry */ + } +} + +/* Decompress a gzip file from infile to outfile. strm is assumed to have been + successfully initialized with inflateBackInit(). The input file may consist + of a series of gzip streams, in which case all of them will be decompressed + to the output file. If outfile is -1, then the gzip stream(s) integrity is + checked and nothing is written. + + The return value is a zlib error code: Z_MEM_ERROR if out of memory, + Z_DATA_ERROR if the header or the compressed data is invalid, or if the + trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends + prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip + stream) follows a valid gzip stream. + */ +local int gunpipe(z_stream *strm, int infile, int outfile) +{ + int ret, first, last; + unsigned have, flags, len; + z_const unsigned char *next = NULL; + struct ind ind, *indp; + struct outd outd; + + /* setup input buffer */ + ind.infile = infile; + ind.inbuf = inbuf; + indp = &ind; + + /* decompress concatenated gzip streams */ + have = 0; /* no input data read in yet */ + first = 1; /* looking for first gzip header */ + strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */ + for (;;) { + /* look for the two magic header bytes for a gzip stream */ + if (NEXT() == -1) { + ret = Z_OK; + break; /* empty gzip stream is ok */ + } + if (last != 31 || (NEXT() != 139 && last != 157)) { + strm->msg = (char *)"incorrect header check"; + ret = first ? Z_DATA_ERROR : Z_ERRNO; + break; /* not a gzip or compress header */ + } + first = 0; /* next non-header is junk */ + + /* process a compress (LZW) file -- can't be concatenated after this */ + if (last == 157) { + ret = lunpipe(have, next, indp, outfile, strm); + break; + } + + /* process remainder of gzip header */ + ret = Z_BUF_ERROR; + if (NEXT() != 8) { /* only deflate method allowed */ + if (last == -1) break; + strm->msg = (char *)"unknown compression method"; + ret = Z_DATA_ERROR; + break; + } + flags = NEXT(); /* header flags */ + NEXT(); /* discard mod time, xflgs, os */ + NEXT(); + NEXT(); + NEXT(); + NEXT(); + NEXT(); + if (last == -1) break; + if (flags & 0xe0) { + strm->msg = (char *)"unknown header flags set"; + ret = Z_DATA_ERROR; + break; + } + if (flags & 4) { /* extra field */ + len = NEXT(); + len += (unsigned)(NEXT()) << 8; + if (last == -1) break; + while (len > have) { + len -= have; + have = 0; + if (NEXT() == -1) break; + len--; + } + if (last == -1) break; + have -= len; + next += len; + } + if (flags & 8) /* file name */ + while (NEXT() != 0 && last != -1) + ; + if (flags & 16) /* comment */ + while (NEXT() != 0 && last != -1) + ; + if (flags & 2) { /* header crc */ + NEXT(); + NEXT(); + } + if (last == -1) break; + + /* set up output */ + outd.outfile = outfile; + outd.check = 1; + outd.crc = crc32(0L, Z_NULL, 0); + outd.total = 0; + + /* decompress data to output */ + strm->next_in = next; + strm->avail_in = have; + ret = inflateBack(strm, in, indp, out, &outd); + if (ret != Z_STREAM_END) break; + next = strm->next_in; + have = strm->avail_in; + strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */ + + /* check trailer */ + ret = Z_BUF_ERROR; + if (NEXT() != (int)(outd.crc & 0xff) || + NEXT() != (int)((outd.crc >> 8) & 0xff) || + NEXT() != (int)((outd.crc >> 16) & 0xff) || + NEXT() != (int)((outd.crc >> 24) & 0xff)) { + /* crc error */ + if (last != -1) { + strm->msg = (char *)"incorrect data check"; + ret = Z_DATA_ERROR; + } + break; + } + if (NEXT() != (int)(outd.total & 0xff) || + NEXT() != (int)((outd.total >> 8) & 0xff) || + NEXT() != (int)((outd.total >> 16) & 0xff) || + NEXT() != (int)((outd.total >> 24) & 0xff)) { + /* length error */ + if (last != -1) { + strm->msg = (char *)"incorrect length check"; + ret = Z_DATA_ERROR; + } + break; + } + + /* go back and look for another gzip stream */ + } + + /* clean up and return */ + return ret; +} + +/* Copy file attributes, from -> to, as best we can. This is best effort, so + no errors are reported. The mode bits, including suid, sgid, and the sticky + bit are copied (if allowed), the owner's user id and group id are copied + (again if allowed), and the access and modify times are copied. */ +local void copymeta(char *from, char *to) +{ + struct stat was; + struct utimbuf when; + + /* get all of from's Unix meta data, return if not a regular file */ + if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG) + return; + + /* set to's mode bits, ignore errors */ + (void)chmod(to, was.st_mode & 07777); + + /* copy owner's user and group, ignore errors */ + (void)chown(to, was.st_uid, was.st_gid); + + /* copy access and modify times, ignore errors */ + when.actime = was.st_atime; + when.modtime = was.st_mtime; + (void)utime(to, &when); +} + +/* Decompress the file inname to the file outnname, of if test is true, just + decompress without writing and check the gzip trailer for integrity. If + inname is NULL or an empty string, read from stdin. If outname is NULL or + an empty string, write to stdout. strm is a pre-initialized inflateBack + structure. When appropriate, copy the file attributes from inname to + outname. + + gunzip() returns 1 if there is an out-of-memory error or an unexpected + return code from gunpipe(). Otherwise it returns 0. + */ +local int gunzip(z_stream *strm, char *inname, char *outname, int test) +{ + int ret; + int infile, outfile; + + /* open files */ + if (inname == NULL || *inname == 0) { + inname = "-"; + infile = 0; /* stdin */ + } + else { + infile = open(inname, O_RDONLY, 0); + if (infile == -1) { + fprintf(stderr, "gun cannot open %s\n", inname); + return 0; + } + } + if (test) + outfile = -1; + else if (outname == NULL || *outname == 0) { + outname = "-"; + outfile = 1; /* stdout */ + } + else { + outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666); + if (outfile == -1) { + close(infile); + fprintf(stderr, "gun cannot create %s\n", outname); + return 0; + } + } + errno = 0; + + /* decompress */ + ret = gunpipe(strm, infile, outfile); + if (outfile > 2) close(outfile); + if (infile > 2) close(infile); + + /* interpret result */ + switch (ret) { + case Z_OK: + case Z_ERRNO: + if (infile > 2 && outfile > 2) { + copymeta(inname, outname); /* copy attributes */ + unlink(inname); + } + if (ret == Z_ERRNO) + fprintf(stderr, "gun warning: trailing garbage ignored in %s\n", + inname); + break; + case Z_DATA_ERROR: + if (outfile > 2) unlink(outname); + fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg); + break; + case Z_MEM_ERROR: + if (outfile > 2) unlink(outname); + fprintf(stderr, "gun out of memory error--aborting\n"); + return 1; + case Z_BUF_ERROR: + if (outfile > 2) unlink(outname); + if (strm->next_in != Z_NULL) { + fprintf(stderr, "gun write error on %s: %s\n", + outname, strerror(errno)); + } + else if (errno) { + fprintf(stderr, "gun read error on %s: %s\n", + inname, strerror(errno)); + } + else { + fprintf(stderr, "gun unexpected end of file on %s\n", + inname); + } + break; + default: + if (outfile > 2) unlink(outname); + fprintf(stderr, "gun internal error--aborting\n"); + return 1; + } + return 0; +} + +/* Process the gun command line arguments. See the command syntax near the + beginning of this source file. */ +int main(int argc, char **argv) +{ + int ret, len, test; + char *outname; + unsigned char *window; + z_stream strm; + + /* initialize inflateBack state for repeated use */ + window = match; /* reuse LZW match buffer */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + ret = inflateBackInit(&strm, 15, window); + if (ret != Z_OK) { + fprintf(stderr, "gun out of memory error--aborting\n"); + return 1; + } + + /* decompress each file to the same name with the suffix removed */ + argc--; + argv++; + test = 0; + if (argc && strcmp(*argv, "-h") == 0) { + fprintf(stderr, "gun 1.6 (17 Jan 2010)\n"); + fprintf(stderr, "Copyright (C) 2003-2010 Mark Adler\n"); + fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n"); + return 0; + } + if (argc && strcmp(*argv, "-t") == 0) { + test = 1; + argc--; + argv++; + } + if (argc) + do { + if (test) + outname = NULL; + else { + len = (int)strlen(*argv); + if (strcmp(*argv + len - 3, ".gz") == 0 || + strcmp(*argv + len - 3, "-gz") == 0) + len -= 3; + else if (strcmp(*argv + len - 2, ".z") == 0 || + strcmp(*argv + len - 2, "-z") == 0 || + strcmp(*argv + len - 2, "_z") == 0 || + strcmp(*argv + len - 2, ".Z") == 0) + len -= 2; + else { + fprintf(stderr, "gun error: no gz type on %s--skipping\n", + *argv); + continue; + } + outname = malloc(len + 1); + if (outname == NULL) { + fprintf(stderr, "gun out of memory error--aborting\n"); + ret = 1; + break; + } + memcpy(outname, *argv, len); + outname[len] = 0; + } + ret = gunzip(&strm, *argv, outname, test); + if (outname != NULL) free(outname); + if (ret) break; + } while (argv++, --argc); + else + ret = gunzip(&strm, NULL, NULL, test); + + /* clean up */ + inflateBackEnd(&strm); + return ret; +} diff --git a/deps/zlib-1.3.1/examples/gzappend.c b/deps/zlib-1.3.1/examples/gzappend.c new file mode 100644 index 0000000..23e93cf --- /dev/null +++ b/deps/zlib-1.3.1/examples/gzappend.c @@ -0,0 +1,504 @@ +/* gzappend -- command to append to a gzip file + + Copyright (C) 2003, 2012 Mark Adler, all rights reserved + version 1.2, 11 Oct 2012 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + +/* + * Change history: + * + * 1.0 19 Oct 2003 - First version + * 1.1 4 Nov 2003 - Expand and clarify some comments and notes + * - Add version and copyright to help + * - Send help to stdout instead of stderr + * - Add some preemptive typecasts + * - Add L to constants in lseek() calls + * - Remove some debugging information in error messages + * - Use new data_type definition for zlib 1.2.1 + * - Simplify and unify file operations + * - Finish off gzip file in gztack() + * - Use deflatePrime() instead of adding empty blocks + * - Keep gzip file clean on appended file read errors + * - Use in-place rotate instead of auxiliary buffer + * (Why you ask? Because it was fun to write!) + * 1.2 11 Oct 2012 - Fix for proper z_const usage + * - Check for input buffer malloc failure + */ + +/* + gzappend takes a gzip file and appends to it, compressing files from the + command line or data from stdin. The gzip file is written to directly, to + avoid copying that file, in case it's large. Note that this results in the + unfriendly behavior that if gzappend fails, the gzip file is corrupted. + + This program was written to illustrate the use of the new Z_BLOCK option of + zlib 1.2.x's inflate() function. This option returns from inflate() at each + block boundary to facilitate locating and modifying the last block bit at + the start of the final deflate block. Also whether using Z_BLOCK or not, + another required feature of zlib 1.2.x is that inflate() now provides the + number of unused bits in the last input byte used. gzappend will not work + with versions of zlib earlier than 1.2.1. + + gzappend first decompresses the gzip file internally, discarding all but + the last 32K of uncompressed data, and noting the location of the last block + bit and the number of unused bits in the last byte of the compressed data. + The gzip trailer containing the CRC-32 and length of the uncompressed data + is verified. This trailer will be later overwritten. + + Then the last block bit is cleared by seeking back in the file and rewriting + the byte that contains it. Seeking forward, the last byte of the compressed + data is saved along with the number of unused bits to initialize deflate. + + A deflate process is initialized, using the last 32K of the uncompressed + data from the gzip file to initialize the dictionary. If the total + uncompressed data was less than 32K, then all of it is used to initialize + the dictionary. The deflate output bit buffer is also initialized with the + last bits from the original deflate stream. From here on, the data to + append is simply compressed using deflate, and written to the gzip file. + When that is complete, the new CRC-32 and uncompressed length are written + as the trailer of the gzip file. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include "zlib.h" + +#define local static +#define LGCHUNK 14 +#define CHUNK (1U << LGCHUNK) +#define DSIZE 32768U + +/* print an error message and terminate with extreme prejudice */ +local void bye(char *msg1, char *msg2) +{ + fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2); + exit(1); +} + +/* return the greatest common divisor of a and b using Euclid's algorithm, + modified to be fast when one argument much greater than the other, and + coded to avoid unnecessary swapping */ +local unsigned gcd(unsigned a, unsigned b) +{ + unsigned c; + + while (a && b) + if (a > b) { + c = b; + while (a - c >= c) + c <<= 1; + a -= c; + } + else { + c = a; + while (b - c >= c) + c <<= 1; + b -= c; + } + return a + b; +} + +/* rotate list[0..len-1] left by rot positions, in place */ +local void rotate(unsigned char *list, unsigned len, unsigned rot) +{ + unsigned char tmp; + unsigned cycles; + unsigned char *start, *last, *to, *from; + + /* normalize rot and handle degenerate cases */ + if (len < 2) return; + if (rot >= len) rot %= len; + if (rot == 0) return; + + /* pointer to last entry in list */ + last = list + (len - 1); + + /* do simple left shift by one */ + if (rot == 1) { + tmp = *list; + memmove(list, list + 1, len - 1); + *last = tmp; + return; + } + + /* do simple right shift by one */ + if (rot == len - 1) { + tmp = *last; + memmove(list + 1, list, len - 1); + *list = tmp; + return; + } + + /* otherwise do rotate as a set of cycles in place */ + cycles = gcd(len, rot); /* number of cycles */ + do { + start = from = list + cycles; /* start index is arbitrary */ + tmp = *from; /* save entry to be overwritten */ + for (;;) { + to = from; /* next step in cycle */ + from += rot; /* go right rot positions */ + if (from > last) from -= len; /* (pointer better not wrap) */ + if (from == start) break; /* all but one shifted */ + *to = *from; /* shift left */ + } + *to = tmp; /* complete the circle */ + } while (--cycles); +} + +/* structure for gzip file read operations */ +typedef struct { + int fd; /* file descriptor */ + int size; /* 1 << size is bytes in buf */ + unsigned left; /* bytes available at next */ + unsigned char *buf; /* buffer */ + z_const unsigned char *next; /* next byte in buffer */ + char *name; /* file name for error messages */ +} file; + +/* reload buffer */ +local int readin(file *in) +{ + int len; + + len = read(in->fd, in->buf, 1 << in->size); + if (len == -1) bye("error reading ", in->name); + in->left = (unsigned)len; + in->next = in->buf; + return len; +} + +/* read from file in, exit if end-of-file */ +local int readmore(file *in) +{ + if (readin(in) == 0) bye("unexpected end of ", in->name); + return 0; +} + +#define read1(in) (in->left == 0 ? readmore(in) : 0, \ + in->left--, *(in->next)++) + +/* skip over n bytes of in */ +local void skip(file *in, unsigned n) +{ + unsigned bypass; + + if (n > in->left) { + n -= in->left; + bypass = n & ~((1U << in->size) - 1); + if (bypass) { + if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1) + bye("seeking ", in->name); + n -= bypass; + } + readmore(in); + if (n > in->left) + bye("unexpected end of ", in->name); + } + in->left -= n; + in->next += n; +} + +/* read a four-byte unsigned integer, little-endian, from in */ +unsigned long read4(file *in) +{ + unsigned long val; + + val = read1(in); + val += (unsigned)read1(in) << 8; + val += (unsigned long)read1(in) << 16; + val += (unsigned long)read1(in) << 24; + return val; +} + +/* skip over gzip header */ +local void gzheader(file *in) +{ + int flags; + unsigned n; + + if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file"); + if (read1(in) != 8) bye("unknown compression method in", in->name); + flags = read1(in); + if (flags & 0xe0) bye("unknown header flags set in", in->name); + skip(in, 6); + if (flags & 4) { + n = read1(in); + n += (unsigned)(read1(in)) << 8; + skip(in, n); + } + if (flags & 8) while (read1(in) != 0) ; + if (flags & 16) while (read1(in) != 0) ; + if (flags & 2) skip(in, 2); +} + +/* decompress gzip file "name", return strm with a deflate stream ready to + continue compression of the data in the gzip file, and return a file + descriptor pointing to where to write the compressed data -- the deflate + stream is initialized to compress using level "level" */ +local int gzscan(char *name, z_stream *strm, int level) +{ + int ret, lastbit, left, full; + unsigned have; + unsigned long crc, tot; + unsigned char *window; + off_t lastoff, end; + file gz; + + /* open gzip file */ + gz.name = name; + gz.fd = open(name, O_RDWR, 0); + if (gz.fd == -1) bye("cannot open ", name); + gz.buf = malloc(CHUNK); + if (gz.buf == NULL) bye("out of memory", ""); + gz.size = LGCHUNK; + gz.left = 0; + + /* skip gzip header */ + gzheader(&gz); + + /* prepare to decompress */ + window = malloc(DSIZE); + if (window == NULL) bye("out of memory", ""); + strm->zalloc = Z_NULL; + strm->zfree = Z_NULL; + strm->opaque = Z_NULL; + ret = inflateInit2(strm, -15); + if (ret != Z_OK) bye("out of memory", " or library mismatch"); + + /* decompress the deflate stream, saving append information */ + lastbit = 0; + lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left; + left = 0; + strm->avail_in = gz.left; + strm->next_in = gz.next; + crc = crc32(0L, Z_NULL, 0); + have = full = 0; + do { + /* if needed, get more input */ + if (strm->avail_in == 0) { + readmore(&gz); + strm->avail_in = gz.left; + strm->next_in = gz.next; + } + + /* set up output to next available section of sliding window */ + strm->avail_out = DSIZE - have; + strm->next_out = window + have; + + /* inflate and check for errors */ + ret = inflate(strm, Z_BLOCK); + if (ret == Z_STREAM_ERROR) bye("internal stream error!", ""); + if (ret == Z_MEM_ERROR) bye("out of memory", ""); + if (ret == Z_DATA_ERROR) + bye("invalid compressed data--format violated in", name); + + /* update crc and sliding window pointer */ + crc = crc32(crc, window + have, DSIZE - have - strm->avail_out); + if (strm->avail_out) + have = DSIZE - strm->avail_out; + else { + have = 0; + full = 1; + } + + /* process end of block */ + if (strm->data_type & 128) { + if (strm->data_type & 64) + left = strm->data_type & 0x1f; + else { + lastbit = strm->data_type & 0x1f; + lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in; + } + } + } while (ret != Z_STREAM_END); + inflateEnd(strm); + gz.left = strm->avail_in; + gz.next = strm->next_in; + + /* save the location of the end of the compressed data */ + end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left; + + /* check gzip trailer and save total for deflate */ + if (crc != read4(&gz)) + bye("invalid compressed data--crc mismatch in ", name); + tot = strm->total_out; + if ((tot & 0xffffffffUL) != read4(&gz)) + bye("invalid compressed data--length mismatch in", name); + + /* if not at end of file, warn */ + if (gz.left || readin(&gz)) + fprintf(stderr, + "gzappend warning: junk at end of gzip file overwritten\n"); + + /* clear last block bit */ + lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET); + if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name); + *gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7))); + lseek(gz.fd, -1L, SEEK_CUR); + if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name); + + /* if window wrapped, build dictionary from window by rotating */ + if (full) { + rotate(window, DSIZE, have); + have = DSIZE; + } + + /* set up deflate stream with window, crc, total_in, and leftover bits */ + ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); + if (ret != Z_OK) bye("out of memory", ""); + deflateSetDictionary(strm, window, have); + strm->adler = crc; + strm->total_in = tot; + if (left) { + lseek(gz.fd, --end, SEEK_SET); + if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name); + deflatePrime(strm, 8 - left, *gz.buf); + } + lseek(gz.fd, end, SEEK_SET); + + /* clean up and return */ + free(window); + free(gz.buf); + return gz.fd; +} + +/* append file "name" to gzip file gd using deflate stream strm -- if last + is true, then finish off the deflate stream at the end */ +local void gztack(char *name, int gd, z_stream *strm, int last) +{ + int fd, len, ret; + unsigned left; + unsigned char *in, *out; + + /* open file to compress and append */ + fd = 0; + if (name != NULL) { + fd = open(name, O_RDONLY, 0); + if (fd == -1) + fprintf(stderr, "gzappend warning: %s not found, skipping ...\n", + name); + } + + /* allocate buffers */ + in = malloc(CHUNK); + out = malloc(CHUNK); + if (in == NULL || out == NULL) bye("out of memory", ""); + + /* compress input file and append to gzip file */ + do { + /* get more input */ + len = read(fd, in, CHUNK); + if (len == -1) { + fprintf(stderr, + "gzappend warning: error reading %s, skipping rest ...\n", + name); + len = 0; + } + strm->avail_in = (unsigned)len; + strm->next_in = in; + if (len) strm->adler = crc32(strm->adler, in, (unsigned)len); + + /* compress and write all available output */ + do { + strm->avail_out = CHUNK; + strm->next_out = out; + ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH); + left = CHUNK - strm->avail_out; + while (left) { + len = write(gd, out + CHUNK - strm->avail_out - left, left); + if (len == -1) bye("writing gzip file", ""); + left -= (unsigned)len; + } + } while (strm->avail_out == 0 && ret != Z_STREAM_END); + } while (len != 0); + + /* write trailer after last entry */ + if (last) { + deflateEnd(strm); + out[0] = (unsigned char)(strm->adler); + out[1] = (unsigned char)(strm->adler >> 8); + out[2] = (unsigned char)(strm->adler >> 16); + out[3] = (unsigned char)(strm->adler >> 24); + out[4] = (unsigned char)(strm->total_in); + out[5] = (unsigned char)(strm->total_in >> 8); + out[6] = (unsigned char)(strm->total_in >> 16); + out[7] = (unsigned char)(strm->total_in >> 24); + len = 8; + do { + ret = write(gd, out + 8 - len, len); + if (ret == -1) bye("writing gzip file", ""); + len -= ret; + } while (len); + close(gd); + } + + /* clean up and return */ + free(out); + free(in); + if (fd > 0) close(fd); +} + +/* process the compression level option if present, scan the gzip file, and + append the specified files, or append the data from stdin if no other file + names are provided on the command line -- the gzip file must be writable + and seekable */ +int main(int argc, char **argv) +{ + int gd, level; + z_stream strm; + + /* ignore command name */ + argc--; argv++; + + /* provide usage if no arguments */ + if (*argv == NULL) { + printf( + "gzappend 1.2 (11 Oct 2012) Copyright (C) 2003, 2012 Mark Adler\n" + ); + printf( + "usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n"); + return 0; + } + + /* set compression level */ + level = Z_DEFAULT_COMPRESSION; + if (argv[0][0] == '-') { + if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0) + bye("invalid compression level", ""); + level = argv[0][1] - '0'; + if (*++argv == NULL) bye("no gzip file name after options", ""); + } + + /* prepare to append to gzip file */ + gd = gzscan(*argv++, &strm, level); + + /* append files on command line, or from stdin if none */ + if (*argv == NULL) + gztack(NULL, gd, &strm, 1); + else + do { + gztack(*argv, gd, &strm, argv[1] == NULL); + } while (*++argv != NULL); + return 0; +} diff --git a/deps/zlib-1.3.1/examples/gzjoin.c b/deps/zlib-1.3.1/examples/gzjoin.c new file mode 100644 index 0000000..89e8098 --- /dev/null +++ b/deps/zlib-1.3.1/examples/gzjoin.c @@ -0,0 +1,449 @@ +/* gzjoin -- command to join gzip files into one gzip file + + Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved + version 1.2, 14 Aug 2012 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + +/* + * Change history: + * + * 1.0 11 Dec 2004 - First version + * 1.1 12 Jun 2005 - Changed ssize_t to long for portability + * 1.2 14 Aug 2012 - Clean up for z_const usage + */ + +/* + gzjoin takes one or more gzip files on the command line and writes out a + single gzip file that will uncompress to the concatenation of the + uncompressed data from the individual gzip files. gzjoin does this without + having to recompress any of the data and without having to calculate a new + crc32 for the concatenated uncompressed data. gzjoin does however have to + decompress all of the input data in order to find the bits in the compressed + data that need to be modified to concatenate the streams. + + gzjoin does not do an integrity check on the input gzip files other than + checking the gzip header and decompressing the compressed data. They are + otherwise assumed to be complete and correct. + + Each joint between gzip files removes at least 18 bytes of previous trailer + and subsequent header, and inserts an average of about three bytes to the + compressed data in order to connect the streams. The output gzip file + has a minimal ten-byte gzip header with no file name or modification time. + + This program was written to illustrate the use of the Z_BLOCK option of + inflate() and the crc32_combine() function. gzjoin will not compile with + versions of zlib earlier than 1.2.3. + */ + +#include <stdio.h> /* fputs(), fprintf(), fwrite(), putc() */ +#include <stdlib.h> /* exit(), malloc(), free() */ +#include <fcntl.h> /* open() */ +#include <unistd.h> /* close(), read(), lseek() */ +#include "zlib.h" + /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */ + +#define local static + +/* exit with an error (return a value to allow use in an expression) */ +local int bail(char *why1, char *why2) +{ + fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2); + exit(1); + return 0; +} + +/* -- simple buffered file input with access to the buffer -- */ + +#define CHUNK 32768 /* must be a power of two and fit in unsigned */ + +/* bin buffered input file type */ +typedef struct { + char *name; /* name of file for error messages */ + int fd; /* file descriptor */ + unsigned left; /* bytes remaining at next */ + unsigned char *next; /* next byte to read */ + unsigned char *buf; /* allocated buffer of length CHUNK */ +} bin; + +/* close a buffered file and free allocated memory */ +local void bclose(bin *in) +{ + if (in != NULL) { + if (in->fd != -1) + close(in->fd); + if (in->buf != NULL) + free(in->buf); + free(in); + } +} + +/* open a buffered file for input, return a pointer to type bin, or NULL on + failure */ +local bin *bopen(char *name) +{ + bin *in; + + in = malloc(sizeof(bin)); + if (in == NULL) + return NULL; + in->buf = malloc(CHUNK); + in->fd = open(name, O_RDONLY, 0); + if (in->buf == NULL || in->fd == -1) { + bclose(in); + return NULL; + } + in->left = 0; + in->next = in->buf; + in->name = name; + return in; +} + +/* load buffer from file, return -1 on read error, 0 or 1 on success, with + 1 indicating that end-of-file was reached */ +local int bload(bin *in) +{ + long len; + + if (in == NULL) + return -1; + if (in->left != 0) + return 0; + in->next = in->buf; + do { + len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left); + if (len < 0) + return -1; + in->left += (unsigned)len; + } while (len != 0 && in->left < CHUNK); + return len == 0 ? 1 : 0; +} + +/* get a byte from the file, bail if end of file */ +#define bget(in) (in->left ? 0 : bload(in), \ + in->left ? (in->left--, *(in->next)++) : \ + bail("unexpected end of file on ", in->name)) + +/* get a four-byte little-endian unsigned integer from file */ +local unsigned long bget4(bin *in) +{ + unsigned long val; + + val = bget(in); + val += (unsigned long)(bget(in)) << 8; + val += (unsigned long)(bget(in)) << 16; + val += (unsigned long)(bget(in)) << 24; + return val; +} + +/* skip bytes in file */ +local void bskip(bin *in, unsigned skip) +{ + /* check pointer */ + if (in == NULL) + return; + + /* easy case -- skip bytes in buffer */ + if (skip <= in->left) { + in->left -= skip; + in->next += skip; + return; + } + + /* skip what's in buffer, discard buffer contents */ + skip -= in->left; + in->left = 0; + + /* seek past multiples of CHUNK bytes */ + if (skip > CHUNK) { + unsigned left; + + left = skip & (CHUNK - 1); + if (left == 0) { + /* exact number of chunks: seek all the way minus one byte to check + for end-of-file with a read */ + lseek(in->fd, skip - 1, SEEK_CUR); + if (read(in->fd, in->buf, 1) != 1) + bail("unexpected end of file on ", in->name); + return; + } + + /* skip the integral chunks, update skip with remainder */ + lseek(in->fd, skip - left, SEEK_CUR); + skip = left; + } + + /* read more input and skip remainder */ + bload(in); + if (skip > in->left) + bail("unexpected end of file on ", in->name); + in->left -= skip; + in->next += skip; +} + +/* -- end of buffered input functions -- */ + +/* skip the gzip header from file in */ +local void gzhead(bin *in) +{ + int flags; + + /* verify gzip magic header and compression method */ + if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8) + bail(in->name, " is not a valid gzip file"); + + /* get and verify flags */ + flags = bget(in); + if ((flags & 0xe0) != 0) + bail("unknown reserved bits set in ", in->name); + + /* skip modification time, extra flags, and os */ + bskip(in, 6); + + /* skip extra field if present */ + if (flags & 4) { + unsigned len; + + len = bget(in); + len += (unsigned)(bget(in)) << 8; + bskip(in, len); + } + + /* skip file name if present */ + if (flags & 8) + while (bget(in) != 0) + ; + + /* skip comment if present */ + if (flags & 16) + while (bget(in) != 0) + ; + + /* skip header crc if present */ + if (flags & 2) + bskip(in, 2); +} + +/* write a four-byte little-endian unsigned integer to out */ +local void put4(unsigned long val, FILE *out) +{ + putc(val & 0xff, out); + putc((val >> 8) & 0xff, out); + putc((val >> 16) & 0xff, out); + putc((val >> 24) & 0xff, out); +} + +/* Load up zlib stream from buffered input, bail if end of file */ +local void zpull(z_streamp strm, bin *in) +{ + if (in->left == 0) + bload(in); + if (in->left == 0) + bail("unexpected end of file on ", in->name); + strm->avail_in = in->left; + strm->next_in = in->next; +} + +/* Write header for gzip file to out and initialize trailer. */ +local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out) +{ + fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); + *crc = crc32(0L, Z_NULL, 0); + *tot = 0; +} + +/* Copy the compressed data from name, zeroing the last block bit of the last + block if clr is true, and adding empty blocks as needed to get to a byte + boundary. If clr is false, then the last block becomes the last block of + the output, and the gzip trailer is written. crc and tot maintains the + crc and length (modulo 2^32) of the output for the trailer. The resulting + gzip file is written to out. gzinit() must be called before the first call + of gzcopy() to write the gzip header and to initialize crc and tot. */ +local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot, + FILE *out) +{ + int ret; /* return value from zlib functions */ + int pos; /* where the "last block" bit is in byte */ + int last; /* true if processing the last block */ + bin *in; /* buffered input file */ + unsigned char *start; /* start of compressed data in buffer */ + unsigned char *junk; /* buffer for uncompressed data -- discarded */ + z_off_t len; /* length of uncompressed data (support > 4 GB) */ + z_stream strm; /* zlib inflate stream */ + + /* open gzip file and skip header */ + in = bopen(name); + if (in == NULL) + bail("could not open ", name); + gzhead(in); + + /* allocate buffer for uncompressed data and initialize raw inflate + stream */ + junk = malloc(CHUNK); + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, -15); + if (junk == NULL || ret != Z_OK) + bail("out of memory", ""); + + /* inflate and copy compressed data, clear last-block bit if requested */ + len = 0; + zpull(&strm, in); + start = in->next; + last = start[0] & 1; + if (last && clr) + start[0] &= ~1; + strm.avail_out = 0; + for (;;) { + /* if input used and output done, write used input and get more */ + if (strm.avail_in == 0 && strm.avail_out != 0) { + fwrite(start, 1, strm.next_in - start, out); + start = in->buf; + in->left = 0; + zpull(&strm, in); + } + + /* decompress -- return early when end-of-block reached */ + strm.avail_out = CHUNK; + strm.next_out = junk; + ret = inflate(&strm, Z_BLOCK); + switch (ret) { + case Z_MEM_ERROR: + bail("out of memory", ""); + case Z_DATA_ERROR: + bail("invalid compressed data in ", in->name); + } + + /* update length of uncompressed data */ + len += CHUNK - strm.avail_out; + + /* check for block boundary (only get this when block copied out) */ + if (strm.data_type & 128) { + /* if that was the last block, then done */ + if (last) + break; + + /* number of unused bits in last byte */ + pos = strm.data_type & 7; + + /* find the next last-block bit */ + if (pos != 0) { + /* next last-block bit is in last used byte */ + pos = 0x100 >> pos; + last = strm.next_in[-1] & pos; + if (last && clr) + in->buf[strm.next_in - in->buf - 1] &= ~pos; + } + else { + /* next last-block bit is in next unused byte */ + if (strm.avail_in == 0) { + /* don't have that byte yet -- get it */ + fwrite(start, 1, strm.next_in - start, out); + start = in->buf; + in->left = 0; + zpull(&strm, in); + } + last = strm.next_in[0] & 1; + if (last && clr) + in->buf[strm.next_in - in->buf] &= ~1; + } + } + } + + /* update buffer with unused input */ + in->left = strm.avail_in; + in->next = in->buf + (strm.next_in - in->buf); + + /* copy used input, write empty blocks to get to byte boundary */ + pos = strm.data_type & 7; + fwrite(start, 1, in->next - start - 1, out); + last = in->next[-1]; + if (pos == 0 || !clr) + /* already at byte boundary, or last file: write last byte */ + putc(last, out); + else { + /* append empty blocks to last byte */ + last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */ + if (pos & 1) { + /* odd -- append an empty stored block */ + putc(last, out); + if (pos == 1) + putc(0, out); /* two more bits in block header */ + fwrite("\0\0\xff\xff", 1, 4, out); + } + else { + /* even -- append 1, 2, or 3 empty fixed blocks */ + switch (pos) { + case 6: + putc(last | 8, out); + last = 0; + case 4: + putc(last | 0x20, out); + last = 0; + case 2: + putc(last | 0x80, out); + putc(0, out); + } + } + } + + /* update crc and tot */ + *crc = crc32_combine(*crc, bget4(in), len); + *tot += (unsigned long)len; + + /* clean up */ + inflateEnd(&strm); + free(junk); + bclose(in); + + /* write trailer if this is the last gzip file */ + if (!clr) { + put4(*crc, out); + put4(*tot, out); + } +} + +/* join the gzip files on the command line, write result to stdout */ +int main(int argc, char **argv) +{ + unsigned long crc, tot; /* running crc and total uncompressed length */ + + /* skip command name */ + argc--; + argv++; + + /* show usage if no arguments */ + if (argc == 0) { + fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n", + stderr); + return 0; + } + + /* join gzip files on command line and write to stdout */ + gzinit(&crc, &tot, stdout); + while (argc--) + gzcopy(*argv++, argc, &crc, &tot, stdout); + + /* done */ + return 0; +} diff --git a/deps/zlib-1.3.1/examples/gzlog.c b/deps/zlib-1.3.1/examples/gzlog.c new file mode 100644 index 0000000..da1b02e --- /dev/null +++ b/deps/zlib-1.3.1/examples/gzlog.c @@ -0,0 +1,1061 @@ +/* + * gzlog.c + * Copyright (C) 2004, 2008, 2012, 2016, 2019 Mark Adler, all rights reserved + * For conditions of distribution and use, see copyright notice in gzlog.h + * version 2.3, 25 May 2019 + */ + +/* + gzlog provides a mechanism for frequently appending short strings to a gzip + file that is efficient both in execution time and compression ratio. The + strategy is to write the short strings in an uncompressed form to the end of + the gzip file, only compressing when the amount of uncompressed data has + reached a given threshold. + + gzlog also provides protection against interruptions in the process due to + system crashes. The status of the operation is recorded in an extra field + in the gzip file, and is only updated once the gzip file is brought to a + valid state. The last data to be appended or compressed is saved in an + auxiliary file, so that if the operation is interrupted, it can be completed + the next time an append operation is attempted. + + gzlog maintains another auxiliary file with the last 32K of data from the + compressed portion, which is preloaded for the compression of the subsequent + data. This minimizes the impact to the compression ratio of appending. + */ + +/* + Operations Concept: + + Files (log name "foo"): + foo.gz -- gzip file with the complete log + foo.add -- last message to append or last data to compress + foo.dict -- dictionary of the last 32K of data for next compression + foo.temp -- temporary dictionary file for compression after this one + foo.lock -- lock file for reading and writing the other files + foo.repairs -- log file for log file recovery operations (not compressed) + + gzip file structure: + - fixed-length (no file name) header with extra field (see below) + - compressed data ending initially with empty stored block + - uncompressed data filling out originally empty stored block and + subsequent stored blocks as needed (16K max each) + - gzip trailer + - no junk at end (no other gzip streams) + + When appending data, the information in the first three items above plus the + foo.add file are sufficient to recover an interrupted append operation. The + extra field has the necessary information to restore the start of the last + stored block and determine where to append the data in the foo.add file, as + well as the crc and length of the gzip data before the append operation. + + The foo.add file is created before the gzip file is marked for append, and + deleted after the gzip file is marked as complete. So if the append + operation is interrupted, the data to add will still be there. If due to + some external force, the foo.add file gets deleted between when the append + operation was interrupted and when recovery is attempted, the gzip file will + still be restored, but without the appended data. + + When compressing data, the information in the first two items above plus the + foo.add file are sufficient to recover an interrupted compress operation. + The extra field has the necessary information to find the end of the + compressed data, and contains both the crc and length of just the compressed + data and of the complete set of data including the contents of the foo.add + file. + + Again, the foo.add file is maintained during the compress operation in case + of an interruption. If in the unlikely event the foo.add file with the data + to be compressed is missing due to some external force, a gzip file with + just the previous compressed data will be reconstructed. In this case, all + of the data that was to be compressed is lost (approximately one megabyte). + This will not occur if all that happened was an interruption of the compress + operation. + + The third state that is marked is the replacement of the old dictionary with + the new dictionary after a compress operation. Once compression is + complete, the gzip file is marked as being in the replace state. This + completes the gzip file, so an interrupt after being so marked does not + result in recompression. Then the dictionary file is replaced, and the gzip + file is marked as completed. This state prevents the possibility of + restarting compression with the wrong dictionary file. + + All three operations are wrapped by a lock/unlock procedure. In order to + gain exclusive access to the log files, first a foo.lock file must be + exclusively created. When all operations are complete, the lock is + released by deleting the foo.lock file. If when attempting to create the + lock file, it already exists and the modify time of the lock file is more + than five minutes old (set by the PATIENCE define below), then the old + lock file is considered stale and deleted, and the exclusive creation of + the lock file is retried. To assure that there are no false assessments + of the staleness of the lock file, the operations periodically touch the + lock file to update the modified date. + + Following is the definition of the extra field with all of the information + required to enable the above append and compress operations and their + recovery if interrupted. Multi-byte values are stored little endian + (consistent with the gzip format). File pointers are eight bytes long. + The crc's and lengths for the gzip trailer are four bytes long. (Note that + the length at the end of a gzip file is used for error checking only, and + for large files is actually the length modulo 2^32.) The stored block + length is two bytes long. The gzip extra field two-byte identification is + "ap" for append. It is assumed that writing the extra field to the file is + an "atomic" operation. That is, either all of the extra field is written + to the file, or none of it is, if the operation is interrupted right at the + point of updating the extra field. This is a reasonable assumption, since + the extra field is within the first 52 bytes of the file, which is smaller + than any expected block size for a mass storage device (usually 512 bytes or + larger). + + Extra field (35 bytes): + - Pointer to first stored block length -- this points to the two-byte length + of the first stored block, which is followed by the two-byte, one's + complement of that length. The stored block length is preceded by the + three-bit header of the stored block, which is the actual start of the + stored block in the deflate format. See the bit offset field below. + - Pointer to the last stored block length. This is the same as above, but + for the last stored block of the uncompressed data in the gzip file. + Initially this is the same as the first stored block length pointer. + When the stored block gets to 16K (see the MAX_STORE define), then a new + stored block as added, at which point the last stored block length pointer + is different from the first stored block length pointer. When they are + different, the first bit of the last stored block header is eight bits, or + one byte back from the block length. + - Compressed data crc and length. This is the crc and length of the data + that is in the compressed portion of the deflate stream. These are used + only in the event that the foo.add file containing the data to compress is + lost after a compress operation is interrupted. + - Total data crc and length. This is the crc and length of all of the data + stored in the gzip file, compressed and uncompressed. It is used to + reconstruct the gzip trailer when compressing, as well as when recovering + interrupted operations. + - Final stored block length. This is used to quickly find where to append, + and allows the restoration of the original final stored block state when + an append operation is interrupted. + - First stored block start as the number of bits back from the final stored + block first length byte. This value is in the range of 3..10, and is + stored as the low three bits of the final byte of the extra field after + subtracting three (0..7). This allows the last-block bit of the stored + block header to be updated when a new stored block is added, for the case + when the first stored block and the last stored block are the same. (When + they are different, the numbers of bits back is known to be eight.) This + also allows for new compressed data to be appended to the old compressed + data in the compress operation, overwriting the previous first stored + block, or for the compressed data to be terminated and a valid gzip file + reconstructed on the off chance that a compression operation was + interrupted and the data to compress in the foo.add file was deleted. + - The operation in process. This is the next two bits in the last byte (the + bits under the mask 0x18). The are interpreted as 0: nothing in process, + 1: append in process, 2: compress in process, 3: replace in process. + - The top three bits of the last byte in the extra field are reserved and + are currently set to zero. + + Main procedure: + - Exclusively create the foo.lock file using the O_CREAT and O_EXCL modes of + the system open() call. If the modify time of an existing lock file is + more than PATIENCE seconds old, then the lock file is deleted and the + exclusive create is retried. + - Load the extra field from the foo.gz file, and see if an operation was in + progress but not completed. If so, apply the recovery procedure below. + - Perform the append procedure with the provided data. + - If the uncompressed data in the foo.gz file is 1MB or more, apply the + compress procedure. + - Delete the foo.lock file. + + Append procedure: + - Put what to append in the foo.add file so that the operation can be + restarted if this procedure is interrupted. + - Mark the foo.gz extra field with the append operation in progress. + + Restore the original last-block bit and stored block length of the last + stored block from the information in the extra field, in case a previous + append operation was interrupted. + - Append the provided data to the last stored block, creating new stored + blocks as needed and updating the stored blocks last-block bits and + lengths. + - Update the crc and length with the new data, and write the gzip trailer. + - Write over the extra field (with a single write operation) with the new + pointers, lengths, and crc's, and mark the gzip file as not in process. + Though there is still a foo.add file, it will be ignored since nothing + is in process. If a foo.add file is leftover from a previously + completed operation, it is truncated when writing new data to it. + - Delete the foo.add file. + + Compress and replace procedures: + - Read all of the uncompressed data in the stored blocks in foo.gz and write + it to foo.add. Also write foo.temp with the last 32K of that data to + provide a dictionary for the next invocation of this procedure. + - Rewrite the extra field marking foo.gz with a compression in process. + * If there is no data provided to compress (due to a missing foo.add file + when recovering), reconstruct and truncate the foo.gz file to contain + only the previous compressed data and proceed to the step after the next + one. Otherwise ... + - Compress the data with the dictionary in foo.dict, and write to the + foo.gz file starting at the bit immediately following the last previously + compressed block. If there is no foo.dict, proceed anyway with the + compression at slightly reduced efficiency. (For the foo.dict file to be + missing requires some external failure beyond simply the interruption of + a compress operation.) During this process, the foo.lock file is + periodically touched to assure that that file is not considered stale by + another process before we're done. The deflation is terminated with a + non-last empty static block (10 bits long), that is then located and + written over by a last-bit-set empty stored block. + - Append the crc and length of the data in the gzip file (previously + calculated during the append operations). + - Write over the extra field with the updated stored block offsets, bits + back, crc's, and lengths, and mark foo.gz as in process for a replacement + of the dictionary. + @ Delete the foo.add file. + - Replace foo.dict with foo.temp. + - Write over the extra field, marking foo.gz as complete. + + Recovery procedure: + - If not a replace recovery, read in the foo.add file, and provide that data + to the appropriate recovery below. If there is no foo.add file, provide + a zero data length to the recovery. In that case, the append recovery + restores the foo.gz to the previous compressed + uncompressed data state. + For the compress recovery, a missing foo.add file results in foo.gz being + restored to the previous compressed-only data state. + - Append recovery: + - Pick up append at + step above + - Compress recovery: + - Pick up compress at * step above + - Replace recovery: + - Pick up compress at @ step above + - Log the repair with a date stamp in foo.repairs + */ + +#include <sys/types.h> +#include <stdio.h> /* rename, fopen, fprintf, fclose */ +#include <stdlib.h> /* malloc, free */ +#include <string.h> /* strlen, strrchr, strcpy, strncpy, strcmp */ +#include <fcntl.h> /* open */ +#include <unistd.h> /* lseek, read, write, close, unlink, sleep, */ + /* ftruncate, fsync */ +#include <errno.h> /* errno */ +#include <time.h> /* time, ctime */ +#include <sys/stat.h> /* stat */ +#include <sys/time.h> /* utimes */ +#include "zlib.h" /* crc32 */ + +#include "gzlog.h" /* header for external access */ + +#define local static +typedef unsigned int uint; +typedef unsigned long ulong; + +/* Macro for debugging to deterministically force recovery operations */ +#ifdef GZLOG_DEBUG + #include <setjmp.h> /* longjmp */ + jmp_buf gzlog_jump; /* where to go back to */ + int gzlog_bail = 0; /* which point to bail at (1..8) */ + int gzlog_count = -1; /* number of times through to wait */ +# define BAIL(n) do { if (n == gzlog_bail && gzlog_count-- == 0) \ + longjmp(gzlog_jump, gzlog_bail); } while (0) +#else +# define BAIL(n) +#endif + +/* how old the lock file can be in seconds before considering it stale */ +#define PATIENCE 300 + +/* maximum stored block size in Kbytes -- must be in 1..63 */ +#define MAX_STORE 16 + +/* number of stored Kbytes to trigger compression (must be >= 32 to allow + dictionary construction, and <= 204 * MAX_STORE, in order for >> 10 to + discard the stored block headers contribution of five bytes each) */ +#define TRIGGER 1024 + +/* size of a deflate dictionary (this cannot be changed) */ +#define DICT 32768U + +/* values for the operation (2 bits) */ +#define NO_OP 0 +#define APPEND_OP 1 +#define COMPRESS_OP 2 +#define REPLACE_OP 3 + +/* macros to extract little-endian integers from an unsigned byte buffer */ +#define PULL2(p) ((p)[0]+((uint)((p)[1])<<8)) +#define PULL4(p) (PULL2(p)+((ulong)PULL2(p+2)<<16)) +#define PULL8(p) (PULL4(p)+((off_t)PULL4(p+4)<<32)) + +/* macros to store integers into a byte buffer in little-endian order */ +#define PUT2(p,a) do {(p)[0]=a;(p)[1]=(a)>>8;} while(0) +#define PUT4(p,a) do {PUT2(p,a);PUT2(p+2,a>>16);} while(0) +#define PUT8(p,a) do {PUT4(p,a);PUT4(p+4,a>>32);} while(0) + +/* internal structure for log information */ +#define LOGID "\106\035\172" /* should be three non-zero characters */ +struct log { + char id[4]; /* contains LOGID to detect inadvertent overwrites */ + int fd; /* file descriptor for .gz file, opened read/write */ + char *path; /* allocated path, e.g. "/var/log/foo" or "foo" */ + char *end; /* end of path, for appending suffices such as ".gz" */ + off_t first; /* offset of first stored block first length byte */ + int back; /* location of first block id in bits back from first */ + uint stored; /* bytes currently in last stored block */ + off_t last; /* offset of last stored block first length byte */ + ulong ccrc; /* crc of compressed data */ + ulong clen; /* length (modulo 2^32) of compressed data */ + ulong tcrc; /* crc of total data */ + ulong tlen; /* length (modulo 2^32) of total data */ + time_t lock; /* last modify time of our lock file */ +}; + +/* gzip header for gzlog */ +local unsigned char log_gzhead[] = { + 0x1f, 0x8b, /* magic gzip id */ + 8, /* compression method is deflate */ + 4, /* there is an extra field (no file name) */ + 0, 0, 0, 0, /* no modification time provided */ + 0, 0xff, /* no extra flags, no OS specified */ + 39, 0, 'a', 'p', 35, 0 /* extra field with "ap" subfield */ + /* 35 is EXTRA, 39 is EXTRA + 4 */ +}; + +#define HEAD sizeof(log_gzhead) /* should be 16 */ + +/* initial gzip extra field content (52 == HEAD + EXTRA + 1) */ +local unsigned char log_gzext[] = { + 52, 0, 0, 0, 0, 0, 0, 0, /* offset of first stored block length */ + 52, 0, 0, 0, 0, 0, 0, 0, /* offset of last stored block length */ + 0, 0, 0, 0, 0, 0, 0, 0, /* compressed data crc and length */ + 0, 0, 0, 0, 0, 0, 0, 0, /* total data crc and length */ + 0, 0, /* final stored block data length */ + 5 /* op is NO_OP, last bit 8 bits back */ +}; + +#define EXTRA sizeof(log_gzext) /* should be 35 */ + +/* initial gzip data and trailer */ +local unsigned char log_gzbody[] = { + 1, 0, 0, 0xff, 0xff, /* empty stored block (last) */ + 0, 0, 0, 0, /* crc */ + 0, 0, 0, 0 /* uncompressed length */ +}; + +#define BODY sizeof(log_gzbody) + +/* Exclusively create foo.lock in order to negotiate exclusive access to the + foo.* files. If the modify time of an existing lock file is greater than + PATIENCE seconds in the past, then consider the lock file to have been + abandoned, delete it, and try the exclusive create again. Save the lock + file modify time for verification of ownership. Return 0 on success, or -1 + on failure, usually due to an access restriction or invalid path. Note that + if stat() or unlink() fails, it may be due to another process noticing the + abandoned lock file a smidge sooner and deleting it, so those are not + flagged as an error. */ +local int log_lock(struct log *log) +{ + int fd; + struct stat st; + + strcpy(log->end, ".lock"); + while ((fd = open(log->path, O_CREAT | O_EXCL, 0644)) < 0) { + if (errno != EEXIST) + return -1; + if (stat(log->path, &st) == 0 && time(NULL) - st.st_mtime > PATIENCE) { + unlink(log->path); + continue; + } + sleep(2); /* relinquish the CPU for two seconds while waiting */ + } + close(fd); + if (stat(log->path, &st) == 0) + log->lock = st.st_mtime; + return 0; +} + +/* Update the modify time of the lock file to now, in order to prevent another + task from thinking that the lock is stale. Save the lock file modify time + for verification of ownership. */ +local void log_touch(struct log *log) +{ + struct stat st; + + strcpy(log->end, ".lock"); + utimes(log->path, NULL); + if (stat(log->path, &st) == 0) + log->lock = st.st_mtime; +} + +/* Check the log file modify time against what is expected. Return true if + this is not our lock. If it is our lock, touch it to keep it. */ +local int log_check(struct log *log) +{ + struct stat st; + + strcpy(log->end, ".lock"); + if (stat(log->path, &st) || st.st_mtime != log->lock) + return 1; + log_touch(log); + return 0; +} + +/* Unlock a previously acquired lock, but only if it's ours. */ +local void log_unlock(struct log *log) +{ + if (log_check(log)) + return; + strcpy(log->end, ".lock"); + unlink(log->path); + log->lock = 0; +} + +/* Check the gzip header and read in the extra field, filling in the values in + the log structure. Return op on success or -1 if the gzip header was not as + expected. op is the current operation in progress last written to the extra + field. This assumes that the gzip file has already been opened, with the + file descriptor log->fd. */ +local int log_head(struct log *log) +{ + int op; + unsigned char buf[HEAD + EXTRA]; + + if (lseek(log->fd, 0, SEEK_SET) < 0 || + read(log->fd, buf, HEAD + EXTRA) != HEAD + EXTRA || + memcmp(buf, log_gzhead, HEAD)) { + return -1; + } + log->first = PULL8(buf + HEAD); + log->last = PULL8(buf + HEAD + 8); + log->ccrc = PULL4(buf + HEAD + 16); + log->clen = PULL4(buf + HEAD + 20); + log->tcrc = PULL4(buf + HEAD + 24); + log->tlen = PULL4(buf + HEAD + 28); + log->stored = PULL2(buf + HEAD + 32); + log->back = 3 + (buf[HEAD + 34] & 7); + op = (buf[HEAD + 34] >> 3) & 3; + return op; +} + +/* Write over the extra field contents, marking the operation as op. Use fsync + to assure that the device is written to, and in the requested order. This + operation, and only this operation, is assumed to be atomic in order to + assure that the log is recoverable in the event of an interruption at any + point in the process. Return -1 if the write to foo.gz failed. */ +local int log_mark(struct log *log, int op) +{ + int ret; + unsigned char ext[EXTRA]; + + PUT8(ext, log->first); + PUT8(ext + 8, log->last); + PUT4(ext + 16, log->ccrc); + PUT4(ext + 20, log->clen); + PUT4(ext + 24, log->tcrc); + PUT4(ext + 28, log->tlen); + PUT2(ext + 32, log->stored); + ext[34] = log->back - 3 + (op << 3); + fsync(log->fd); + ret = lseek(log->fd, HEAD, SEEK_SET) < 0 || + write(log->fd, ext, EXTRA) != EXTRA ? -1 : 0; + fsync(log->fd); + return ret; +} + +/* Rewrite the last block header bits and subsequent zero bits to get to a byte + boundary, setting the last block bit if last is true, and then write the + remainder of the stored block header (length and one's complement). Leave + the file pointer after the end of the last stored block data. Return -1 if + there is a read or write failure on the foo.gz file */ +local int log_last(struct log *log, int last) +{ + int back, len, mask; + unsigned char buf[6]; + + /* determine the locations of the bytes and bits to modify */ + back = log->last == log->first ? log->back : 8; + len = back > 8 ? 2 : 1; /* bytes back from log->last */ + mask = 0x80 >> ((back - 1) & 7); /* mask for block last-bit */ + + /* get the byte to modify (one or two back) into buf[0] -- don't need to + read the byte if the last-bit is eight bits back, since in that case + the entire byte will be modified */ + buf[0] = 0; + if (back != 8 && (lseek(log->fd, log->last - len, SEEK_SET) < 0 || + read(log->fd, buf, 1) != 1)) + return -1; + + /* change the last-bit of the last stored block as requested -- note + that all bits above the last-bit are set to zero, per the type bits + of a stored block being 00 and per the convention that the bits to + bring the stream to a byte boundary are also zeros */ + buf[1] = 0; + buf[2 - len] = (*buf & (mask - 1)) + (last ? mask : 0); + + /* write the modified stored block header and lengths, move the file + pointer to after the last stored block data */ + PUT2(buf + 2, log->stored); + PUT2(buf + 4, log->stored ^ 0xffff); + return lseek(log->fd, log->last - len, SEEK_SET) < 0 || + write(log->fd, buf + 2 - len, len + 4) != len + 4 || + lseek(log->fd, log->stored, SEEK_CUR) < 0 ? -1 : 0; +} + +/* Append len bytes from data to the locked and open log file. len may be zero + if recovering and no .add file was found. In that case, the previous state + of the foo.gz file is restored. The data is appended uncompressed in + deflate stored blocks. Return -1 if there was an error reading or writing + the foo.gz file. */ +local int log_append(struct log *log, unsigned char *data, size_t len) +{ + uint put; + off_t end; + unsigned char buf[8]; + + /* set the last block last-bit and length, in case recovering an + interrupted append, then position the file pointer to append to the + block */ + if (log_last(log, 1)) + return -1; + + /* append, adding stored blocks and updating the offset of the last stored + block as needed, and update the total crc and length */ + while (len) { + /* append as much as we can to the last block */ + put = (MAX_STORE << 10) - log->stored; + if (put > len) + put = (uint)len; + if (put) { + if (write(log->fd, data, put) != put) + return -1; + BAIL(1); + log->tcrc = crc32(log->tcrc, data, put); + log->tlen += put; + log->stored += put; + data += put; + len -= put; + } + + /* if we need to, add a new empty stored block */ + if (len) { + /* mark current block as not last */ + if (log_last(log, 0)) + return -1; + + /* point to new, empty stored block */ + log->last += 4 + log->stored + 1; + log->stored = 0; + } + + /* mark last block as last, update its length */ + if (log_last(log, 1)) + return -1; + BAIL(2); + } + + /* write the new crc and length trailer, and truncate just in case (could + be recovering from partial append with a missing foo.add file) */ + PUT4(buf, log->tcrc); + PUT4(buf + 4, log->tlen); + if (write(log->fd, buf, 8) != 8 || + (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end)) + return -1; + + /* write the extra field, marking the log file as done, delete .add file */ + if (log_mark(log, NO_OP)) + return -1; + strcpy(log->end, ".add"); + unlink(log->path); /* ignore error, since may not exist */ + return 0; +} + +/* Replace the foo.dict file with the foo.temp file. Also delete the foo.add + file, since the compress operation may have been interrupted before that was + done. Returns 1 if memory could not be allocated, or -1 if reading or + writing foo.gz fails, or if the rename fails for some reason other than + foo.temp not existing. foo.temp not existing is a permitted error, since + the replace operation may have been interrupted after the rename is done, + but before foo.gz is marked as complete. */ +local int log_replace(struct log *log) +{ + int ret; + char *dest; + + /* delete foo.add file */ + strcpy(log->end, ".add"); + unlink(log->path); /* ignore error, since may not exist */ + BAIL(3); + + /* rename foo.name to foo.dict, replacing foo.dict if it exists */ + strcpy(log->end, ".dict"); + dest = malloc(strlen(log->path) + 1); + if (dest == NULL) + return -2; + strcpy(dest, log->path); + strcpy(log->end, ".temp"); + ret = rename(log->path, dest); + free(dest); + if (ret && errno != ENOENT) + return -1; + BAIL(4); + + /* mark the foo.gz file as done */ + return log_mark(log, NO_OP); +} + +/* Compress the len bytes at data and append the compressed data to the + foo.gz deflate data immediately after the previous compressed data. This + overwrites the previous uncompressed data, which was stored in foo.add + and is the data provided in data[0..len-1]. If this operation is + interrupted, it picks up at the start of this routine, with the foo.add + file read in again. If there is no data to compress (len == 0), then we + simply terminate the foo.gz file after the previously compressed data, + appending a final empty stored block and the gzip trailer. Return -1 if + reading or writing the log.gz file failed, or -2 if there was a memory + allocation failure. */ +local int log_compress(struct log *log, unsigned char *data, size_t len) +{ + int fd; + uint got, max; + ssize_t dict; + off_t end; + z_stream strm; + unsigned char buf[DICT]; + + /* compress and append compressed data */ + if (len) { + /* set up for deflate, allocating memory */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + if (deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -15, 8, + Z_DEFAULT_STRATEGY) != Z_OK) + return -2; + + /* read in dictionary (last 32K of data that was compressed) */ + strcpy(log->end, ".dict"); + fd = open(log->path, O_RDONLY, 0); + if (fd >= 0) { + dict = read(fd, buf, DICT); + close(fd); + if (dict < 0) { + deflateEnd(&strm); + return -1; + } + if (dict) + deflateSetDictionary(&strm, buf, (uint)dict); + } + log_touch(log); + + /* prime deflate with last bits of previous block, position write + pointer to write those bits and overwrite what follows */ + if (lseek(log->fd, log->first - (log->back > 8 ? 2 : 1), + SEEK_SET) < 0 || + read(log->fd, buf, 1) != 1 || lseek(log->fd, -1, SEEK_CUR) < 0) { + deflateEnd(&strm); + return -1; + } + deflatePrime(&strm, (8 - log->back) & 7, *buf); + + /* compress, finishing with a partial non-last empty static block */ + strm.next_in = data; + max = (((uint)0 - 1) >> 1) + 1; /* in case int smaller than size_t */ + do { + strm.avail_in = len > max ? max : (uint)len; + len -= strm.avail_in; + do { + strm.avail_out = DICT; + strm.next_out = buf; + deflate(&strm, len ? Z_NO_FLUSH : Z_PARTIAL_FLUSH); + got = DICT - strm.avail_out; + if (got && write(log->fd, buf, got) != got) { + deflateEnd(&strm); + return -1; + } + log_touch(log); + } while (strm.avail_out == 0); + } while (len); + deflateEnd(&strm); + BAIL(5); + + /* find start of empty static block -- scanning backwards the first one + bit is the second bit of the block, if the last byte is zero, then + we know the byte before that has a one in the top bit, since an + empty static block is ten bits long */ + if ((log->first = lseek(log->fd, -1, SEEK_CUR)) < 0 || + read(log->fd, buf, 1) != 1) + return -1; + log->first++; + if (*buf) { + log->back = 1; + while ((*buf & ((uint)1 << (8 - log->back++))) == 0) + ; /* guaranteed to terminate, since *buf != 0 */ + } + else + log->back = 10; + + /* update compressed crc and length */ + log->ccrc = log->tcrc; + log->clen = log->tlen; + } + else { + /* no data to compress -- fix up existing gzip stream */ + log->tcrc = log->ccrc; + log->tlen = log->clen; + } + + /* complete and truncate gzip stream */ + log->last = log->first; + log->stored = 0; + PUT4(buf, log->tcrc); + PUT4(buf + 4, log->tlen); + if (log_last(log, 1) || write(log->fd, buf, 8) != 8 || + (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end)) + return -1; + BAIL(6); + + /* mark as being in the replace operation */ + if (log_mark(log, REPLACE_OP)) + return -1; + + /* execute the replace operation and mark the file as done */ + return log_replace(log); +} + +/* log a repair record to the .repairs file */ +local void log_log(struct log *log, int op, char *record) +{ + time_t now; + FILE *rec; + + now = time(NULL); + strcpy(log->end, ".repairs"); + rec = fopen(log->path, "a"); + if (rec == NULL) + return; + fprintf(rec, "%.24s %s recovery: %s\n", ctime(&now), op == APPEND_OP ? + "append" : (op == COMPRESS_OP ? "compress" : "replace"), record); + fclose(rec); + return; +} + +/* Recover the interrupted operation op. First read foo.add for recovering an + append or compress operation. Return -1 if there was an error reading or + writing foo.gz or reading an existing foo.add, or -2 if there was a memory + allocation failure. */ +local int log_recover(struct log *log, int op) +{ + int fd, ret = 0; + unsigned char *data = NULL; + size_t len = 0; + struct stat st; + + /* log recovery */ + log_log(log, op, "start"); + + /* load foo.add file if expected and present */ + if (op == APPEND_OP || op == COMPRESS_OP) { + strcpy(log->end, ".add"); + if (stat(log->path, &st) == 0 && st.st_size) { + len = (size_t)(st.st_size); + if ((off_t)len != st.st_size || + (data = malloc(st.st_size)) == NULL) { + log_log(log, op, "allocation failure"); + return -2; + } + if ((fd = open(log->path, O_RDONLY, 0)) < 0) { + free(data); + log_log(log, op, ".add file read failure"); + return -1; + } + ret = (size_t)read(fd, data, len) != len; + close(fd); + if (ret) { + free(data); + log_log(log, op, ".add file read failure"); + return -1; + } + log_log(log, op, "loaded .add file"); + } + else + log_log(log, op, "missing .add file!"); + } + + /* recover the interrupted operation */ + switch (op) { + case APPEND_OP: + ret = log_append(log, data, len); + break; + case COMPRESS_OP: + ret = log_compress(log, data, len); + break; + case REPLACE_OP: + ret = log_replace(log); + } + + /* log status */ + log_log(log, op, ret ? "failure" : "complete"); + + /* clean up */ + if (data != NULL) + free(data); + return ret; +} + +/* Close the foo.gz file (if open) and release the lock. */ +local void log_close(struct log *log) +{ + if (log->fd >= 0) + close(log->fd); + log->fd = -1; + log_unlock(log); +} + +/* Open foo.gz, verify the header, and load the extra field contents, after + first creating the foo.lock file to gain exclusive access to the foo.* + files. If foo.gz does not exist or is empty, then write the initial header, + extra, and body content of an empty foo.gz log file. If there is an error + creating the lock file due to access restrictions, or an error reading or + writing the foo.gz file, or if the foo.gz file is not a proper log file for + this object (e.g. not a gzip file or does not contain the expected extra + field), then return true. If there is an error, the lock is released. + Otherwise, the lock is left in place. */ +local int log_open(struct log *log) +{ + int op; + + /* release open file resource if left over -- can occur if lock lost + between gzlog_open() and gzlog_write() */ + if (log->fd >= 0) + close(log->fd); + log->fd = -1; + + /* negotiate exclusive access */ + if (log_lock(log) < 0) + return -1; + + /* open the log file, foo.gz */ + strcpy(log->end, ".gz"); + log->fd = open(log->path, O_RDWR | O_CREAT, 0644); + if (log->fd < 0) { + log_close(log); + return -1; + } + + /* if new, initialize foo.gz with an empty log, delete old dictionary */ + if (lseek(log->fd, 0, SEEK_END) == 0) { + if (write(log->fd, log_gzhead, HEAD) != HEAD || + write(log->fd, log_gzext, EXTRA) != EXTRA || + write(log->fd, log_gzbody, BODY) != BODY) { + log_close(log); + return -1; + } + strcpy(log->end, ".dict"); + unlink(log->path); + } + + /* verify log file and load extra field information */ + if ((op = log_head(log)) < 0) { + log_close(log); + return -1; + } + + /* check for interrupted process and if so, recover */ + if (op != NO_OP && log_recover(log, op)) { + log_close(log); + return -1; + } + + /* touch the lock file to prevent another process from grabbing it */ + log_touch(log); + return 0; +} + +/* See gzlog.h for the description of the external methods below */ +gzlog *gzlog_open(char *path) +{ + size_t n; + struct log *log; + + /* check arguments */ + if (path == NULL || *path == 0) + return NULL; + + /* allocate and initialize log structure */ + log = malloc(sizeof(struct log)); + if (log == NULL) + return NULL; + strcpy(log->id, LOGID); + log->fd = -1; + + /* save path and end of path for name construction */ + n = strlen(path); + log->path = malloc(n + 9); /* allow for ".repairs" */ + if (log->path == NULL) { + free(log); + return NULL; + } + strcpy(log->path, path); + log->end = log->path + n; + + /* gain exclusive access and verify log file -- may perform a + recovery operation if needed */ + if (log_open(log)) { + free(log->path); + free(log); + return NULL; + } + + /* return pointer to log structure */ + return log; +} + +/* gzlog_compress() return values: + 0: all good + -1: file i/o error (usually access issue) + -2: memory allocation failure + -3: invalid log pointer argument */ +int gzlog_compress(gzlog *logd) +{ + int fd, ret; + uint block; + size_t len, next; + unsigned char *data, buf[5]; + struct log *log = logd; + + /* check arguments */ + if (log == NULL || strcmp(log->id, LOGID)) + return -3; + + /* see if we lost the lock -- if so get it again and reload the extra + field information (it probably changed), recover last operation if + necessary */ + if (log_check(log) && log_open(log)) + return -1; + + /* create space for uncompressed data */ + len = ((size_t)(log->last - log->first) & ~(((size_t)1 << 10) - 1)) + + log->stored; + if ((data = malloc(len)) == NULL) + return -2; + + /* do statement here is just a cheap trick for error handling */ + do { + /* read in the uncompressed data */ + if (lseek(log->fd, log->first - 1, SEEK_SET) < 0) + break; + next = 0; + while (next < len) { + if (read(log->fd, buf, 5) != 5) + break; + block = PULL2(buf + 1); + if (next + block > len || + read(log->fd, (char *)data + next, block) != block) + break; + next += block; + } + if (lseek(log->fd, 0, SEEK_CUR) != log->last + 4 + log->stored) + break; + log_touch(log); + + /* write the uncompressed data to the .add file */ + strcpy(log->end, ".add"); + fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + break; + ret = (size_t)write(fd, data, len) != len; + if (ret | close(fd)) + break; + log_touch(log); + + /* write the dictionary for the next compress to the .temp file */ + strcpy(log->end, ".temp"); + fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + break; + next = DICT > len ? len : DICT; + ret = (size_t)write(fd, (char *)data + len - next, next) != next; + if (ret | close(fd)) + break; + log_touch(log); + + /* roll back to compressed data, mark the compress in progress */ + log->last = log->first; + log->stored = 0; + if (log_mark(log, COMPRESS_OP)) + break; + BAIL(7); + + /* compress and append the data (clears mark) */ + ret = log_compress(log, data, len); + free(data); + return ret; + } while (0); + + /* broke out of do above on i/o error */ + free(data); + return -1; +} + +/* gzlog_write() return values: + 0: all good + -1: file i/o error (usually access issue) + -2: memory allocation failure + -3: invalid log pointer argument */ +int gzlog_write(gzlog *logd, void *data, size_t len) +{ + int fd, ret; + struct log *log = logd; + + /* check arguments */ + if (log == NULL || strcmp(log->id, LOGID)) + return -3; + if (data == NULL || len <= 0) + return 0; + + /* see if we lost the lock -- if so get it again and reload the extra + field information (it probably changed), recover last operation if + necessary */ + if (log_check(log) && log_open(log)) + return -1; + + /* create and write .add file */ + strcpy(log->end, ".add"); + fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + return -1; + ret = (size_t)write(fd, data, len) != len; + if (ret | close(fd)) + return -1; + log_touch(log); + + /* mark log file with append in progress */ + if (log_mark(log, APPEND_OP)) + return -1; + BAIL(8); + + /* append data (clears mark) */ + if (log_append(log, data, len)) + return -1; + + /* check to see if it's time to compress -- if not, then done */ + if (((log->last - log->first) >> 10) + (log->stored >> 10) < TRIGGER) + return 0; + + /* time to compress */ + return gzlog_compress(log); +} + +/* gzlog_close() return values: + 0: ok + -3: invalid log pointer argument */ +int gzlog_close(gzlog *logd) +{ + struct log *log = logd; + + /* check arguments */ + if (log == NULL || strcmp(log->id, LOGID)) + return -3; + + /* close the log file and release the lock */ + log_close(log); + + /* free structure and return */ + if (log->path != NULL) + free(log->path); + strcpy(log->id, "bad"); + free(log); + return 0; +} diff --git a/deps/zlib-1.3.1/examples/gzlog.h b/deps/zlib-1.3.1/examples/gzlog.h new file mode 100644 index 0000000..4f05109 --- /dev/null +++ b/deps/zlib-1.3.1/examples/gzlog.h @@ -0,0 +1,91 @@ +/* gzlog.h + Copyright (C) 2004, 2008, 2012 Mark Adler, all rights reserved + version 2.2, 14 Aug 2012 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + +/* Version History: + 1.0 26 Nov 2004 First version + 2.0 25 Apr 2008 Complete redesign for recovery of interrupted operations + Interface changed slightly in that now path is a prefix + Compression now occurs as needed during gzlog_write() + gzlog_write() now always leaves the log file as valid gzip + 2.1 8 Jul 2012 Fix argument checks in gzlog_compress() and gzlog_write() + 2.2 14 Aug 2012 Clean up signed comparisons + */ + +/* + The gzlog object allows writing short messages to a gzipped log file, + opening the log file locked for small bursts, and then closing it. The log + object works by appending stored (uncompressed) data to the gzip file until + 1 MB has been accumulated. At that time, the stored data is compressed, and + replaces the uncompressed data in the file. The log file is truncated to + its new size at that time. After each write operation, the log file is a + valid gzip file that can decompressed to recover what was written. + + The gzlog operations can be interrupted at any point due to an application or + system crash, and the log file will be recovered the next time the log is + opened with gzlog_open(). + */ + +#ifndef GZLOG_H +#define GZLOG_H + +/* gzlog object type */ +typedef void gzlog; + +/* Open a gzlog object, creating the log file if it does not exist. Return + NULL on error. Note that gzlog_open() could take a while to complete if it + has to wait to verify that a lock is stale (possibly for five minutes), or + if there is significant contention with other instantiations of this object + when locking the resource. path is the prefix of the file names created by + this object. If path is "foo", then the log file will be "foo.gz", and + other auxiliary files will be created and destroyed during the process: + "foo.dict" for a compression dictionary, "foo.temp" for a temporary (next) + dictionary, "foo.add" for data being added or compressed, "foo.lock" for the + lock file, and "foo.repairs" to log recovery operations performed due to + interrupted gzlog operations. A gzlog_open() followed by a gzlog_close() + will recover a previously interrupted operation, if any. */ +gzlog *gzlog_open(char *path); + +/* Write to a gzlog object. Return zero on success, -1 if there is a file i/o + error on any of the gzlog files (this should not happen if gzlog_open() + succeeded, unless the device has run out of space or leftover auxiliary + files have permissions or ownership that prevent their use), -2 if there is + a memory allocation failure, or -3 if the log argument is invalid (e.g. if + it was not created by gzlog_open()). This function will write data to the + file uncompressed, until 1 MB has been accumulated, at which time that data + will be compressed. The log file will be a valid gzip file upon successful + return. */ +int gzlog_write(gzlog *log, void *data, size_t len); + +/* Force compression of any uncompressed data in the log. This should be used + sparingly, if at all. The main application would be when a log file will + not be appended to again. If this is used to compress frequently while + appending, it will both significantly increase the execution time and + reduce the compression ratio. The return codes are the same as for + gzlog_write(). */ +int gzlog_compress(gzlog *log); + +/* Close a gzlog object. Return zero on success, -3 if the log argument is + invalid. The log object is freed, and so cannot be referenced again. */ +int gzlog_close(gzlog *log); + +#endif diff --git a/deps/zlib-1.3.1/examples/gznorm.c b/deps/zlib-1.3.1/examples/gznorm.c new file mode 100644 index 0000000..68e0a0f --- /dev/null +++ b/deps/zlib-1.3.1/examples/gznorm.c @@ -0,0 +1,470 @@ +/* gznorm.c -- normalize a gzip stream + * Copyright (C) 2018 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * Version 1.0 7 Oct 2018 Mark Adler */ + +// gznorm takes a gzip stream, potentially containing multiple members, and +// converts it to a gzip stream with a single member. In addition the gzip +// header is normalized, removing the file name and time stamp, and setting the +// other header contents (XFL, OS) to fixed values. gznorm does not recompress +// the data, so it is fast, but no advantage is gained from the history that +// could be available across member boundaries. + +#include <stdio.h> // fread, fwrite, putc, fflush, ferror, fprintf, + // vsnprintf, stdout, stderr, NULL, FILE +#include <stdlib.h> // malloc, free +#include <string.h> // strerror +#include <errno.h> // errno +#include <stdarg.h> // va_list, va_start, va_end +#include "zlib.h" // inflateInit2, inflate, inflateReset, inflateEnd, + // z_stream, z_off_t, crc32_combine, Z_NULL, Z_BLOCK, + // Z_OK, Z_STREAM_END, Z_BUF_ERROR, Z_DATA_ERROR, + // Z_MEM_ERROR + +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) +# include <fcntl.h> +# include <io.h> +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif + +#define local static + +// printf to an allocated string. Return the string, or NULL if the printf or +// allocation fails. +local char *aprintf(char *fmt, ...) { + // Get the length of the result of the printf. + va_list args; + va_start(args, fmt); + int len = vsnprintf(NULL, 0, fmt, args); + va_end(args); + if (len < 0) + return NULL; + + // Allocate the required space and printf to it. + char *str = malloc(len + 1); + if (str == NULL) + return NULL; + va_start(args, fmt); + vsnprintf(str, len + 1, fmt, args); + va_end(args); + return str; +} + +// Return with an error, putting an allocated error message in *err. Doing an +// inflateEnd() on an already ended state, or one with state set to Z_NULL, is +// permitted. +#define BYE(...) \ + do { \ + inflateEnd(&strm); \ + *err = aprintf(__VA_ARGS__); \ + return 1; \ + } while (0) + +// Chunk size for buffered reads and for decompression. Twice this many bytes +// will be allocated on the stack by gzip_normalize(). Must fit in an unsigned. +#define CHUNK 16384 + +// Read a gzip stream from in and write an equivalent normalized gzip stream to +// out. If given no input, an empty gzip stream will be written. If successful, +// 0 is returned, and *err is set to NULL. On error, 1 is returned, where the +// details of the error are returned in *err, a pointer to an allocated string. +// +// The input may be a stream with multiple gzip members, which is converted to +// a single gzip member on the output. Each gzip member is decompressed at the +// level of deflate blocks. This enables clearing the last-block bit, shifting +// the compressed data to concatenate to the previous member's compressed data, +// which can end at an arbitrary bit boundary, and identifying stored blocks in +// order to resynchronize those to byte boundaries. The deflate compressed data +// is terminated with a 10-bit empty fixed block. If any members on the input +// end with a 10-bit empty fixed block, then that block is excised from the +// stream. This avoids appending empty fixed blocks for every normalization, +// and assures that gzip_normalize applied a second time will not change the +// input. The pad bits after stored block headers and after the final deflate +// block are all forced to zeros. +local int gzip_normalize(FILE *in, FILE *out, char **err) { + // initialize the inflate engine to process a gzip member + z_stream strm; + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + if (inflateInit2(&strm, 15 + 16) != Z_OK) + BYE("out of memory"); + + // State while processing the input gzip stream. + enum { // BETWEEN -> HEAD -> BLOCK -> TAIL -> BETWEEN -> ... + BETWEEN, // between gzip members (must end in this state) + HEAD, // reading a gzip header + BLOCK, // reading deflate blocks + TAIL // reading a gzip trailer + } state = BETWEEN; // current component being processed + unsigned long crc = 0; // accumulated CRC of uncompressed data + unsigned long len = 0; // accumulated length of uncompressed data + unsigned long buf = 0; // deflate stream bit buffer of num bits + int num = 0; // number of bits in buf (at bottom) + + // Write a canonical gzip header (no mod time, file name, comment, extra + // block, or extra flags, and OS is marked as unknown). + fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); + + // Process the gzip stream from in until reaching the end of the input, + // encountering invalid input, or experiencing an i/o error. + int more; // true if not at the end of the input + do { + // State inside this loop. + unsigned char *put; // next input buffer location to process + int prev; // number of bits from previous block in + // the bit buffer, or -1 if not at the + // start of a block + unsigned long long memb; // uncompressed length of member + size_t tail; // number of trailer bytes read (0..8) + unsigned long part; // accumulated trailer component + + // Get the next chunk of input from in. + unsigned char dat[CHUNK]; + strm.avail_in = fread(dat, 1, CHUNK, in); + if (strm.avail_in == 0) + break; + more = strm.avail_in == CHUNK; + strm.next_in = put = dat; + + // Run that chunk of input through the inflate engine to exhaustion. + do { + // At this point it is assured that strm.avail_in > 0. + + // Inflate until the end of a gzip component (header, deflate + // block, trailer) is reached, or until all of the chunk is + // consumed. The resulting decompressed data is discarded, though + // the total size of the decompressed data in each member is + // tracked, for the calculation of the total CRC. + do { + // inflate and handle any errors + unsigned char scrap[CHUNK]; + strm.avail_out = CHUNK; + strm.next_out = scrap; + int ret = inflate(&strm, Z_BLOCK); + if (ret == Z_MEM_ERROR) + BYE("out of memory"); + if (ret == Z_DATA_ERROR) + BYE("input invalid: %s", strm.msg); + if (ret != Z_OK && ret != Z_BUF_ERROR && ret != Z_STREAM_END) + BYE("internal error"); + + // Update the number of uncompressed bytes generated in this + // member. The actual count (not modulo 2^32) is required to + // correctly compute the total CRC. + unsigned got = CHUNK - strm.avail_out; + memb += got; + if (memb < got) + BYE("overflow error"); + + // Continue to process this chunk until it is consumed, or + // until the end of a component (header, deflate block, or + // trailer) is reached. + } while (strm.avail_out == 0 && (strm.data_type & 0x80) == 0); + + // Since strm.avail_in was > 0 for the inflate call, some input was + // just consumed. It is therefore assured that put < strm.next_in. + + // Disposition the consumed component or part of a component. + switch (state) { + case BETWEEN: + state = HEAD; + // Fall through to HEAD when some or all of the header is + // processed. + + case HEAD: + // Discard the header. + if (strm.data_type & 0x80) { + // End of header reached -- deflate blocks follow. + put = strm.next_in; + prev = num; + memb = 0; + state = BLOCK; + } + break; + + case BLOCK: + // Copy the deflate stream to the output, but with the + // last-block-bit cleared. Re-synchronize stored block + // headers to the output byte boundaries. The bytes at + // put..strm.next_in-1 is the compressed data that has been + // processed and is ready to be copied to the output. + + // At this point, it is assured that new compressed data is + // available, i.e., put < strm.next_in. If prev is -1, then + // that compressed data starts in the middle of a deflate + // block. If prev is not -1, then the bits in the bit + // buffer, possibly combined with the bits in *put, contain + // the three-bit header of the new deflate block. In that + // case, prev is the number of bits from the previous block + // that remain in the bit buffer. Since num is the number + // of bits in the bit buffer, we have that num - prev is + // the number of bits from the new block currently in the + // bit buffer. + + // If strm.data_type & 0xc0 is 0x80, then the last byte of + // the available compressed data includes the last bits of + // the end of a deflate block. In that case, that last byte + // also has strm.data_type & 0x1f bits of the next deflate + // block, in the range 0..7. If strm.data_type & 0xc0 is + // 0xc0, then the last byte of the compressed data is the + // end of the deflate stream, followed by strm.data_type & + // 0x1f pad bits, also in the range 0..7. + + // Set bits to the number of bits not yet consumed from the + // last byte. If we are at the end of the block, bits is + // either the number of bits in the last byte belonging to + // the next block, or the number of pad bits after the + // final block. In either of those cases, bits is in the + // range 0..7. + ; // (required due to C syntax oddity) + int bits = strm.data_type & 0x1f; + + if (prev != -1) { + // We are at the start of a new block. Clear the last + // block bit, and check for special cases. If it is a + // stored block, then emit the header and pad to the + // next byte boundary. If it is a final, empty fixed + // block, then excise it. + + // Some or all of the three header bits for this block + // may already be in the bit buffer. Load any remaining + // header bits into the bit buffer. + if (num - prev < 3) { + buf += (unsigned long)*put++ << num; + num += 8; + } + + // Set last to have a 1 in the position of the last + // block bit in the bit buffer. + unsigned long last = (unsigned long)1 << prev; + + if (((buf >> prev) & 7) == 3) { + // This is a final fixed block. Load at least ten + // bits from this block, including the header, into + // the bit buffer. We already have at least three, + // so at most one more byte needs to be loaded. + if (num - prev < 10) { + if (put == strm.next_in) + // Need to go get and process more input. + // We'll end up back here to finish this. + break; + buf += (unsigned long)*put++ << num; + num += 8; + } + if (((buf >> prev) & 0x3ff) == 3) { + // That final fixed block is empty. Delete it + // to avoid adding an empty block every time a + // gzip stream is normalized. + num = prev; + buf &= last - 1; // zero the pad bits + } + } + else if (((buf >> prev) & 6) == 0) { + // This is a stored block. Flush to the next + // byte boundary after the three-bit header. + num = (prev + 10) & ~7; + buf &= last - 1; // zero the pad bits + } + + // Clear the last block bit. + buf &= ~last; + + // Write out complete bytes in the bit buffer. + while (num >= 8) { + putc(buf, out); + buf >>= 8; + num -= 8; + } + + // If no more bytes left to process, then we have + // consumed the byte that had bits from the next block. + if (put == strm.next_in) + bits = 0; + } + + // We are done handling the deflate block header. Now copy + // all or almost all of the remaining compressed data that + // has been processed so far. Don't copy one byte at the + // end if it contains bits from the next deflate block or + // pad bits at the end of a deflate block. + + // mix is 1 if we are at the end of a deflate block, and if + // some of the bits in the last byte follow this block. mix + // is 0 if we are in the middle of a deflate block, if the + // deflate block ended on a byte boundary, or if all of the + // compressed data processed so far has been consumed. + int mix = (strm.data_type & 0x80) && bits; + + // Copy all of the processed compressed data to the output, + // except for the last byte if it contains bits from the + // next deflate block or pad bits at the end of the deflate + // stream. Copy the data after shifting in num bits from + // buf in front of it, leaving num bits from the end of the + // compressed data in buf when done. + unsigned char *end = strm.next_in - mix; + if (put < end) { + if (num) + // Insert num bits from buf before the data being + // copied. + do { + buf += (unsigned)(*put++) << num; + putc(buf, out); + buf >>= 8; + } while (put < end); + else { + // No shifting needed -- write directly. + fwrite(put, 1, end - put, out); + put = end; + } + } + + // Process the last processed byte if it wasn't written. + if (mix) { + // Load the last byte into the bit buffer. + buf += (unsigned)(*put++) << num; + num += 8; + + if (strm.data_type & 0x40) { + // We are at the end of the deflate stream and + // there are bits pad bits. Discard the pad bits + // and write a byte to the output, if available. + // Leave the num bits left over in buf to prepend + // to the next deflate stream. + num -= bits; + if (num >= 8) { + putc(buf, out); + num -= 8; + buf >>= 8; + } + + // Force the pad bits in the bit buffer to zeros. + buf &= ((unsigned long)1 << num) - 1; + + // Don't need to set prev here since going to TAIL. + } + else + // At the end of an internal deflate block. Leave + // the last byte in the bit buffer to examine on + // the next entry to BLOCK, when more bits from the + // next block will be available. + prev = num - bits; // number of bits in buffer + // from current block + } + + // Don't have a byte left over, so we are in the middle of + // a deflate block, or the deflate block ended on a byte + // boundary. Set prev appropriately for the next entry into + // BLOCK. + else if (strm.data_type & 0x80) + // The block ended on a byte boundary, so no header + // bits are in the bit buffer. + prev = num; + else + // In the middle of a deflate block, so no header here. + prev = -1; + + // Check for the end of the deflate stream. + if ((strm.data_type & 0xc0) == 0xc0) { + // That ends the deflate stream on the input side, the + // pad bits were discarded, and any remaining bits from + // the last block in the stream are saved in the bit + // buffer to prepend to the next stream. Process the + // gzip trailer next. + tail = 0; + part = 0; + state = TAIL; + } + break; + + case TAIL: + // Accumulate available trailer bytes to update the total + // CRC and the total uncompressed length. + do { + part = (part >> 8) + ((unsigned long)(*put++) << 24); + tail++; + if (tail == 4) { + // Update the total CRC. + z_off_t len2 = memb; + if (len2 < 0 || (unsigned long long)len2 != memb) + BYE("overflow error"); + crc = crc ? crc32_combine(crc, part, len2) : part; + part = 0; + } + else if (tail == 8) { + // Update the total uncompressed length. (It's ok + // if this sum is done modulo 2^32.) + len += part; + + // At the end of a member. Set up to inflate an + // immediately following gzip member. (If we made + // it this far, then the trailer was valid.) + if (inflateReset(&strm) != Z_OK) + BYE("internal error"); + state = BETWEEN; + break; + } + } while (put < strm.next_in); + break; + } + + // Process the input buffer until completely consumed. + } while (strm.avail_in > 0); + + // Process input until end of file, invalid input, or i/o error. + } while (more); + + // Done with the inflate engine. + inflateEnd(&strm); + + // Verify the validity of the input. + if (state != BETWEEN) + BYE("input invalid: incomplete gzip stream"); + + // Write the remaining deflate stream bits, followed by a terminating + // deflate fixed block. + buf += (unsigned long)3 << num; + putc(buf, out); + putc(buf >> 8, out); + if (num > 6) + putc(0, out); + + // Write the gzip trailer, which is the CRC and the uncompressed length + // modulo 2^32, both in little-endian order. + putc(crc, out); + putc(crc >> 8, out); + putc(crc >> 16, out); + putc(crc >> 24, out); + putc(len, out); + putc(len >> 8, out); + putc(len >> 16, out); + putc(len >> 24, out); + fflush(out); + + // Check for any i/o errors. + if (ferror(in) || ferror(out)) + BYE("i/o error: %s", strerror(errno)); + + // All good! + *err = NULL; + return 0; +} + +// Normalize the gzip stream on stdin, writing the result to stdout. +int main(void) { + // Avoid end-of-line conversions on evil operating systems. + SET_BINARY_MODE(stdin); + SET_BINARY_MODE(stdout); + + // Normalize from stdin to stdout, returning 1 on error, 0 if ok. + char *err; + int ret = gzip_normalize(stdin, stdout, &err); + if (ret) + fprintf(stderr, "gznorm error: %s\n", err); + free(err); + return ret; +} diff --git a/deps/zlib-1.3.1/examples/zlib_how.html b/deps/zlib-1.3.1/examples/zlib_how.html new file mode 100644 index 0000000..43271b9 --- /dev/null +++ b/deps/zlib-1.3.1/examples/zlib_how.html @@ -0,0 +1,549 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" + "http://www.w3.org/TR/html4/loose.dtd"> +<html> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> +<title>zlib Usage Example</title> +<!-- Copyright (c) 2004-2023 Mark Adler. --> +</head> +<body bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#00A000"> +<h2 align="center"> zlib Usage Example </h2> +We often get questions about how the <tt>deflate()</tt> and <tt>inflate()</tt> functions should be used. +Users wonder when they should provide more input, when they should use more output, +what to do with a <tt>Z_BUF_ERROR</tt>, how to make sure the process terminates properly, and +so on. So for those who have read <tt>zlib.h</tt> (a few times), and +would like further edification, below is an annotated example in C of simple routines to compress and decompress +from an input file to an output file using <tt>deflate()</tt> and <tt>inflate()</tt> respectively. The +annotations are interspersed between lines of the code. So please read between the lines. +We hope this helps explain some of the intricacies of <em>zlib</em>. +<p> +Without further ado, here is the program <a href="zpipe.c"><tt>zpipe.c</tt></a>: +<pre><b> +/* zpipe.c: example of proper use of zlib's inflate() and deflate() + Not copyrighted -- provided to the public domain + Version 1.4 11 December 2005 Mark Adler */ + +/* Version history: + 1.0 30 Oct 2004 First version + 1.1 8 Nov 2004 Add void casting for unused return values + Use switch statement for inflate() return values + 1.2 9 Nov 2004 Add assertions to document zlib guarantees + 1.3 6 Apr 2005 Remove incorrect assertion in inf() + 1.4 11 Dec 2005 Add hack to avoid MSDOS end-of-line conversions + Avoid some compiler warnings for input and output buffers + */ +</b></pre><!-- --> +We now include the header files for the required definitions. From +<tt>stdio.h</tt> we use <tt>fopen()</tt>, <tt>fread()</tt>, <tt>fwrite()</tt>, +<tt>feof()</tt>, <tt>ferror()</tt>, and <tt>fclose()</tt> for file i/o, and +<tt>fputs()</tt> for error messages. From <tt>string.h</tt> we use +<tt>strcmp()</tt> for command line argument processing. +From <tt>assert.h</tt> we use the <tt>assert()</tt> macro. +From <tt>zlib.h</tt> +we use the basic compression functions <tt>deflateInit()</tt>, +<tt>deflate()</tt>, and <tt>deflateEnd()</tt>, and the basic decompression +functions <tt>inflateInit()</tt>, <tt>inflate()</tt>, and +<tt>inflateEnd()</tt>. +<pre><b> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include "zlib.h" +</b></pre><!-- --> +This is an ugly hack required to avoid corruption of the input and output data on +Windows/MS-DOS systems. Without this, those systems would assume that the input and output +files are text, and try to convert the end-of-line characters from one standard to +another. That would corrupt binary data, and in particular would render the compressed data unusable. +This sets the input and output to binary which suppresses the end-of-line conversions. +<tt>SET_BINARY_MODE()</tt> will be used later on <tt>stdin</tt> and <tt>stdout</tt>, at the beginning of <tt>main()</tt>. +<pre><b> +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) +# include <fcntl.h> +# include <io.h> +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif +</b></pre><!-- --> +<tt>CHUNK</tt> is simply the buffer size for feeding data to and pulling data +from the <em>zlib</em> routines. Larger buffer sizes would be more efficient, +especially for <tt>inflate()</tt>. If the memory is available, buffers sizes +on the order of 128K or 256K bytes should be used. +<pre><b> +#define CHUNK 16384 +</b></pre><!-- --> +The <tt>def()</tt> routine compresses data from an input file to an output file. The output data +will be in the <em>zlib</em> format, which is different from the <em>gzip</em> or <em>zip</em> +formats. The <em>zlib</em> format has a very small header of only two bytes to identify it as +a <em>zlib</em> stream and to provide decoding information, and a four-byte trailer with a fast +check value to verify the integrity of the uncompressed data after decoding. +<pre><b> +/* Compress from file source to file dest until EOF on source. + def() returns Z_OK on success, Z_MEM_ERROR if memory could not be + allocated for processing, Z_STREAM_ERROR if an invalid compression + level is supplied, Z_VERSION_ERROR if the version of zlib.h and the + version of the library linked do not match, or Z_ERRNO if there is + an error reading or writing the files. */ +int def(FILE *source, FILE *dest, int level) +{ +</b></pre> +Here are the local variables for <tt>def()</tt>. <tt>ret</tt> will be used for <em>zlib</em> +return codes. <tt>flush</tt> will keep track of the current flushing state for <tt>deflate()</tt>, +which is either no flushing, or flush to completion after the end of the input file is reached. +<tt>have</tt> is the amount of data returned from <tt>deflate()</tt>. The <tt>strm</tt> structure +is used to pass information to and from the <em>zlib</em> routines, and to maintain the +<tt>deflate()</tt> state. <tt>in</tt> and <tt>out</tt> are the input and output buffers for +<tt>deflate()</tt>. +<pre><b> + int ret, flush; + unsigned have; + z_stream strm; + unsigned char in[CHUNK]; + unsigned char out[CHUNK]; +</b></pre><!-- --> +The first thing we do is to initialize the <em>zlib</em> state for compression using +<tt>deflateInit()</tt>. This must be done before the first use of <tt>deflate()</tt>. +The <tt>zalloc</tt>, <tt>zfree</tt>, and <tt>opaque</tt> fields in the <tt>strm</tt> +structure must be initialized before calling <tt>deflateInit()</tt>. Here they are +set to the <em>zlib</em> constant <tt>Z_NULL</tt> to request that <em>zlib</em> use +the default memory allocation routines. An application may also choose to provide +custom memory allocation routines here. <tt>deflateInit()</tt> will allocate on the +order of 256K bytes for the internal state. +(See <a href="zlib_tech.html"><em>zlib Technical Details</em></a>.) +<p> +<tt>deflateInit()</tt> is called with a pointer to the structure to be initialized and +the compression level, which is an integer in the range of -1 to 9. Lower compression +levels result in faster execution, but less compression. Higher levels result in +greater compression, but slower execution. The <em>zlib</em> constant Z_DEFAULT_COMPRESSION, +equal to -1, +provides a good compromise between compression and speed and is equivalent to level 6. +Level 0 actually does no compression at all, and in fact expands the data slightly to produce +the <em>zlib</em> format (it is not a byte-for-byte copy of the input). +More advanced applications of <em>zlib</em> +may use <tt>deflateInit2()</tt> here instead. Such an application may want to reduce how +much memory will be used, at some price in compression. Or it may need to request a +<em>gzip</em> header and trailer instead of a <em>zlib</em> header and trailer, or raw +encoding with no header or trailer at all. +<p> +We must check the return value of <tt>deflateInit()</tt> against the <em>zlib</em> constant +<tt>Z_OK</tt> to make sure that it was able to +allocate memory for the internal state, and that the provided arguments were valid. +<tt>deflateInit()</tt> will also check that the version of <em>zlib</em> that the <tt>zlib.h</tt> +file came from matches the version of <em>zlib</em> actually linked with the program. This +is especially important for environments in which <em>zlib</em> is a shared library. +<p> +Note that an application can initialize multiple, independent <em>zlib</em> streams, which can +operate in parallel. The state information maintained in the structure allows the <em>zlib</em> +routines to be reentrant. +<pre><b> + /* allocate deflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + ret = deflateInit(&strm, level); + if (ret != Z_OK) + return ret; +</b></pre><!-- --> +With the pleasantries out of the way, now we can get down to business. The outer <tt>do</tt>-loop +reads all of the input file and exits at the bottom of the loop once end-of-file is reached. +This loop contains the only call of <tt>deflate()</tt>. So we must make sure that all of the +input data has been processed and that all of the output data has been generated and consumed +before we fall out of the loop at the bottom. +<pre><b> + /* compress until end of file */ + do { +</b></pre> +We start off by reading data from the input file. The number of bytes read is put directly +into <tt>avail_in</tt>, and a pointer to those bytes is put into <tt>next_in</tt>. We also +check to see if end-of-file on the input has been reached using feof(). +If we are at the end of file, then <tt>flush</tt> is set to the +<em>zlib</em> constant <tt>Z_FINISH</tt>, which is later passed to <tt>deflate()</tt> to +indicate that this is the last chunk of input data to compress. +If we are not yet at the end of the input, then the <em>zlib</em> +constant <tt>Z_NO_FLUSH</tt> will be passed to <tt>deflate</tt> to indicate that we are still +in the middle of the uncompressed data. +<p> +If there is an error in reading from the input file, the process is aborted with +<tt>deflateEnd()</tt> being called to free the allocated <em>zlib</em> state before returning +the error. We wouldn't want a memory leak, now would we? <tt>deflateEnd()</tt> can be called +at any time after the state has been initialized. Once that's done, <tt>deflateInit()</tt> (or +<tt>deflateInit2()</tt>) would have to be called to start a new compression process. There is +no point here in checking the <tt>deflateEnd()</tt> return code. The deallocation can't fail. +<pre><b> + strm.avail_in = fread(in, 1, CHUNK, source); + if (ferror(source)) { + (void)deflateEnd(&strm); + return Z_ERRNO; + } + flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; + strm.next_in = in; +</b></pre><!-- --> +The inner <tt>do</tt>-loop passes our chunk of input data to <tt>deflate()</tt>, and then +keeps calling <tt>deflate()</tt> until it is done producing output. Once there is no more +new output, <tt>deflate()</tt> is guaranteed to have consumed all of the input, i.e., +<tt>avail_in</tt> will be zero. +<pre><b> + /* run deflate() on input until output buffer not full, finish + compression if all of source has been read in */ + do { +</b></pre> +Output space is provided to <tt>deflate()</tt> by setting <tt>avail_out</tt> to the number +of available output bytes and <tt>next_out</tt> to a pointer to that space. +<pre><b> + strm.avail_out = CHUNK; + strm.next_out = out; +</b></pre> +Now we call the compression engine itself, <tt>deflate()</tt>. It takes as many of the +<tt>avail_in</tt> bytes at <tt>next_in</tt> as it can process, and writes as many as +<tt>avail_out</tt> bytes to <tt>next_out</tt>. Those counters and pointers are then +updated past the input data consumed and the output data written. It is the amount of +output space available that may limit how much input is consumed. +Hence the inner loop to make sure that +all of the input is consumed by providing more output space each time. Since <tt>avail_in</tt> +and <tt>next_in</tt> are updated by <tt>deflate()</tt>, we don't have to mess with those +between <tt>deflate()</tt> calls until it's all used up. +<p> +The parameters to <tt>deflate()</tt> are a pointer to the <tt>strm</tt> structure containing +the input and output information and the internal compression engine state, and a parameter +indicating whether and how to flush data to the output. Normally <tt>deflate</tt> will consume +several K bytes of input data before producing any output (except for the header), in order +to accumulate statistics on the data for optimum compression. It will then put out a burst of +compressed data, and proceed to consume more input before the next burst. Eventually, +<tt>deflate()</tt> +must be told to terminate the stream, complete the compression with provided input data, and +write out the trailer check value. <tt>deflate()</tt> will continue to compress normally as long +as the flush parameter is <tt>Z_NO_FLUSH</tt>. Once the <tt>Z_FINISH</tt> parameter is provided, +<tt>deflate()</tt> will begin to complete the compressed output stream. However depending on how +much output space is provided, <tt>deflate()</tt> may have to be called several times until it +has provided the complete compressed stream, even after it has consumed all of the input. The flush +parameter must continue to be <tt>Z_FINISH</tt> for those subsequent calls. +<p> +There are other values of the flush parameter that are used in more advanced applications. You can +force <tt>deflate()</tt> to produce a burst of output that encodes all of the input data provided +so far, even if it wouldn't have otherwise, for example to control data latency on a link with +compressed data. You can also ask that <tt>deflate()</tt> do that as well as erase any history up to +that point so that what follows can be decompressed independently, for example for random access +applications. Both requests will degrade compression by an amount depending on how often such +requests are made. +<p> +<tt>deflate()</tt> has a return value that can indicate errors, yet we do not check it here. Why +not? Well, it turns out that <tt>deflate()</tt> can do no wrong here. Let's go through +<tt>deflate()</tt>'s return values and dispense with them one by one. The possible values are +<tt>Z_OK</tt>, <tt>Z_STREAM_END</tt>, <tt>Z_STREAM_ERROR</tt>, or <tt>Z_BUF_ERROR</tt>. <tt>Z_OK</tt> +is, well, ok. <tt>Z_STREAM_END</tt> is also ok and will be returned for the last call of +<tt>deflate()</tt>. This is already guaranteed by calling <tt>deflate()</tt> with <tt>Z_FINISH</tt> +until it has no more output. <tt>Z_STREAM_ERROR</tt> is only possible if the stream is not +initialized properly, but we did initialize it properly. There is no harm in checking for +<tt>Z_STREAM_ERROR</tt> here, for example to check for the possibility that some +other part of the application inadvertently clobbered the memory containing the <em>zlib</em> state. +<tt>Z_BUF_ERROR</tt> will be explained further below, but +suffice it to say that this is simply an indication that <tt>deflate()</tt> could not consume +more input or produce more output. <tt>deflate()</tt> can be called again with more output space +or more available input, which it will be in this code. +<pre><b> + ret = deflate(&strm, flush); /* no bad return value */ + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ +</b></pre> +Now we compute how much output <tt>deflate()</tt> provided on the last call, which is the +difference between how much space was provided before the call, and how much output space +is still available after the call. Then that data, if any, is written to the output file. +We can then reuse the output buffer for the next call of <tt>deflate()</tt>. Again if there +is a file i/o error, we call <tt>deflateEnd()</tt> before returning to avoid a memory leak. +<pre><b> + have = CHUNK - strm.avail_out; + if (fwrite(out, 1, have, dest) != have || ferror(dest)) { + (void)deflateEnd(&strm); + return Z_ERRNO; + } +</b></pre> +The inner <tt>do</tt>-loop is repeated until the last <tt>deflate()</tt> call fails to fill the +provided output buffer. Then we know that <tt>deflate()</tt> has done as much as it can with +the provided input, and that all of that input has been consumed. We can then fall out of this +loop and reuse the input buffer. +<p> +The way we tell that <tt>deflate()</tt> has no more output is by seeing that it did not fill +the output buffer, leaving <tt>avail_out</tt> greater than zero. However suppose that +<tt>deflate()</tt> has no more output, but just so happened to exactly fill the output buffer! +<tt>avail_out</tt> is zero, and we can't tell that <tt>deflate()</tt> has done all it can. +As far as we know, <tt>deflate()</tt> +has more output for us. So we call it again. But now <tt>deflate()</tt> produces no output +at all, and <tt>avail_out</tt> remains unchanged as <tt>CHUNK</tt>. That <tt>deflate()</tt> call +wasn't able to do anything, either consume input or produce output, and so it returns +<tt>Z_BUF_ERROR</tt>. (See, I told you I'd cover this later.) However this is not a problem at +all. Now we finally have the desired indication that <tt>deflate()</tt> is really done, +and so we drop out of the inner loop to provide more input to <tt>deflate()</tt>. +<p> +With <tt>flush</tt> set to <tt>Z_FINISH</tt>, this final set of <tt>deflate()</tt> calls will +complete the output stream. Once that is done, subsequent calls of <tt>deflate()</tt> would return +<tt>Z_STREAM_ERROR</tt> if the flush parameter is not <tt>Z_FINISH</tt>, and do no more processing +until the state is reinitialized. +<p> +Some applications of <em>zlib</em> have two loops that call <tt>deflate()</tt> +instead of the single inner loop we have here. The first loop would call +without flushing and feed all of the data to <tt>deflate()</tt>. The second loop would call +<tt>deflate()</tt> with no more +data and the <tt>Z_FINISH</tt> parameter to complete the process. As you can see from this +example, that can be avoided by simply keeping track of the current flush state. +<pre><b> + } while (strm.avail_out == 0); + assert(strm.avail_in == 0); /* all input will be used */ +</b></pre><!-- --> +Now we check to see if we have already processed all of the input file. That information was +saved in the <tt>flush</tt> variable, so we see if that was set to <tt>Z_FINISH</tt>. If so, +then we're done and we fall out of the outer loop. We're guaranteed to get <tt>Z_STREAM_END</tt> +from the last <tt>deflate()</tt> call, since we ran it until the last chunk of input was +consumed and all of the output was generated. +<pre><b> + /* done when last data in file processed */ + } while (flush != Z_FINISH); + assert(ret == Z_STREAM_END); /* stream will be complete */ +</b></pre><!-- --> +The process is complete, but we still need to deallocate the state to avoid a memory leak +(or rather more like a memory hemorrhage if you didn't do this). Then +finally we can return with a happy return value. +<pre><b> + /* clean up and return */ + (void)deflateEnd(&strm); + return Z_OK; +} +</b></pre><!-- --> +Now we do the same thing for decompression in the <tt>inf()</tt> routine. <tt>inf()</tt> +decompresses what is hopefully a valid <em>zlib</em> stream from the input file and writes the +uncompressed data to the output file. Much of the discussion above for <tt>def()</tt> +applies to <tt>inf()</tt> as well, so the discussion here will focus on the differences between +the two. +<pre><b> +/* Decompress from file source to file dest until stream ends or EOF. + inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be + allocated for processing, Z_DATA_ERROR if the deflate data is + invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and + the version of the library linked do not match, or Z_ERRNO if there + is an error reading or writing the files. */ +int inf(FILE *source, FILE *dest) +{ +</b></pre> +The local variables have the same functionality as they do for <tt>def()</tt>. The +only difference is that there is no <tt>flush</tt> variable, since <tt>inflate()</tt> +can tell from the <em>zlib</em> stream itself when the stream is complete. +<pre><b> + int ret; + unsigned have; + z_stream strm; + unsigned char in[CHUNK]; + unsigned char out[CHUNK]; +</b></pre><!-- --> +The initialization of the state is the same, except that there is no compression level, +of course, and two more elements of the structure are initialized. <tt>avail_in</tt> +and <tt>next_in</tt> must be initialized before calling <tt>inflateInit()</tt>. This +is because the application has the option to provide the start of the zlib stream in +order for <tt>inflateInit()</tt> to have access to information about the compression +method to aid in memory allocation. In the current implementation of <em>zlib</em> +(up through versions 1.2.x), the method-dependent memory allocations are deferred to the first call of +<tt>inflate()</tt> anyway. However those fields must be initialized since later versions +of <em>zlib</em> that provide more compression methods may take advantage of this interface. +In any case, no decompression is performed by <tt>inflateInit()</tt>, so the +<tt>avail_out</tt> and <tt>next_out</tt> fields do not need to be initialized before calling. +<p> +Here <tt>avail_in</tt> is set to zero and <tt>next_in</tt> is set to <tt>Z_NULL</tt> to +indicate that no input data is being provided. +<pre><b> + /* allocate inflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit(&strm); + if (ret != Z_OK) + return ret; +</b></pre><!-- --> +The outer <tt>do</tt>-loop decompresses input until <tt>inflate()</tt> indicates +that it has reached the end of the compressed data and has produced all of the uncompressed +output. This is in contrast to <tt>def()</tt> which processes all of the input file. +If end-of-file is reached before the compressed data self-terminates, then the compressed +data is incomplete and an error is returned. +<pre><b> + /* decompress until deflate stream ends or end of file */ + do { +</b></pre> +We read input data and set the <tt>strm</tt> structure accordingly. If we've reached the +end of the input file, then we leave the outer loop and report an error, since the +compressed data is incomplete. Note that we may read more data than is eventually consumed +by <tt>inflate()</tt>, if the input file continues past the <em>zlib</em> stream. +For applications where <em>zlib</em> streams are embedded in other data, this routine would +need to be modified to return the unused data, or at least indicate how much of the input +data was not used, so the application would know where to pick up after the <em>zlib</em> stream. +<pre><b> + strm.avail_in = fread(in, 1, CHUNK, source); + if (ferror(source)) { + (void)inflateEnd(&strm); + return Z_ERRNO; + } + if (strm.avail_in == 0) + break; + strm.next_in = in; +</b></pre><!-- --> +The inner <tt>do</tt>-loop has the same function it did in <tt>def()</tt>, which is to +keep calling <tt>inflate()</tt> until has generated all of the output it can with the +provided input. +<pre><b> + /* run inflate() on input until output buffer not full */ + do { +</b></pre> +Just like in <tt>def()</tt>, the same output space is provided for each call of <tt>inflate()</tt>. +<pre><b> + strm.avail_out = CHUNK; + strm.next_out = out; +</b></pre> +Now we run the decompression engine itself. There is no need to adjust the flush parameter, since +the <em>zlib</em> format is self-terminating. The main difference here is that there are +return values that we need to pay attention to. <tt>Z_DATA_ERROR</tt> +indicates that <tt>inflate()</tt> detected an error in the <em>zlib</em> compressed data format, +which means that either the data is not a <em>zlib</em> stream to begin with, or that the data was +corrupted somewhere along the way since it was compressed. The other error to be processed is +<tt>Z_MEM_ERROR</tt>, which can occur since memory allocation is deferred until <tt>inflate()</tt> +needs it, unlike <tt>deflate()</tt>, whose memory is allocated at the start by <tt>deflateInit()</tt>. +<p> +Advanced applications may use +<tt>deflateSetDictionary()</tt> to prime <tt>deflate()</tt> with a set of likely data to improve the +first 32K or so of compression. This is noted in the <em>zlib</em> header, so <tt>inflate()</tt> +requests that that dictionary be provided before it can start to decompress. Without the dictionary, +correct decompression is not possible. For this routine, we have no idea what the dictionary is, +so the <tt>Z_NEED_DICT</tt> indication is converted to a <tt>Z_DATA_ERROR</tt>. +<p> +<tt>inflate()</tt> can also return <tt>Z_STREAM_ERROR</tt>, which should not be possible here, +but could be checked for as noted above for <tt>def()</tt>. <tt>Z_BUF_ERROR</tt> does not need to be +checked for here, for the same reasons noted for <tt>def()</tt>. <tt>Z_STREAM_END</tt> will be +checked for later. +<pre><b> + ret = inflate(&strm, Z_NO_FLUSH); + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + switch (ret) { + case Z_NEED_DICT: + ret = Z_DATA_ERROR; /* and fall through */ + case Z_DATA_ERROR: + case Z_MEM_ERROR: + (void)inflateEnd(&strm); + return ret; + } +</b></pre> +The output of <tt>inflate()</tt> is handled identically to that of <tt>deflate()</tt>. +<pre><b> + have = CHUNK - strm.avail_out; + if (fwrite(out, 1, have, dest) != have || ferror(dest)) { + (void)inflateEnd(&strm); + return Z_ERRNO; + } +</b></pre> +The inner <tt>do</tt>-loop ends when <tt>inflate()</tt> has no more output as indicated +by not filling the output buffer, just as for <tt>deflate()</tt>. In this case, we cannot +assert that <tt>strm.avail_in</tt> will be zero, since the deflate stream may end before the file +does. +<pre><b> + } while (strm.avail_out == 0); +</b></pre><!-- --> +The outer <tt>do</tt>-loop ends when <tt>inflate()</tt> reports that it has reached the +end of the input <em>zlib</em> stream, has completed the decompression and integrity +check, and has provided all of the output. This is indicated by the <tt>inflate()</tt> +return value <tt>Z_STREAM_END</tt>. The inner loop is guaranteed to leave <tt>ret</tt> +equal to <tt>Z_STREAM_END</tt> if the last chunk of the input file read contained the end +of the <em>zlib</em> stream. So if the return value is not <tt>Z_STREAM_END</tt>, the +loop continues to read more input. +<pre><b> + /* done when inflate() says it's done */ + } while (ret != Z_STREAM_END); +</b></pre><!-- --> +At this point, decompression successfully completed, or we broke out of the loop due to no +more data being available from the input file. If the last <tt>inflate()</tt> return value +is not <tt>Z_STREAM_END</tt>, then the <em>zlib</em> stream was incomplete and a data error +is returned. Otherwise, we return with a happy return value. Of course, <tt>inflateEnd()</tt> +is called first to avoid a memory leak. +<pre><b> + /* clean up and return */ + (void)inflateEnd(&strm); + return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; +} +</b></pre><!-- --> +That ends the routines that directly use <em>zlib</em>. The following routines make this +a command-line program by running data through the above routines from <tt>stdin</tt> to +<tt>stdout</tt>, and handling any errors reported by <tt>def()</tt> or <tt>inf()</tt>. +<p> +<tt>zerr()</tt> is used to interpret the possible error codes from <tt>def()</tt> +and <tt>inf()</tt>, as detailed in their comments above, and print out an error message. +Note that these are only a subset of the possible return values from <tt>deflate()</tt> +and <tt>inflate()</tt>. +<pre><b> +/* report a zlib or i/o error */ +void zerr(int ret) +{ + fputs("zpipe: ", stderr); + switch (ret) { + case Z_ERRNO: + if (ferror(stdin)) + fputs("error reading stdin\n", stderr); + if (ferror(stdout)) + fputs("error writing stdout\n", stderr); + break; + case Z_STREAM_ERROR: + fputs("invalid compression level\n", stderr); + break; + case Z_DATA_ERROR: + fputs("invalid or incomplete deflate data\n", stderr); + break; + case Z_MEM_ERROR: + fputs("out of memory\n", stderr); + break; + case Z_VERSION_ERROR: + fputs("zlib version mismatch!\n", stderr); + } +} +</b></pre><!-- --> +Here is the <tt>main()</tt> routine used to test <tt>def()</tt> and <tt>inf()</tt>. The +<tt>zpipe</tt> command is simply a compression pipe from <tt>stdin</tt> to <tt>stdout</tt>, if +no arguments are given, or it is a decompression pipe if <tt>zpipe -d</tt> is used. If any other +arguments are provided, no compression or decompression is performed. Instead a usage +message is displayed. Examples are <tt>zpipe < foo.txt > foo.txt.z</tt> to compress, and +<tt>zpipe -d < foo.txt.z > foo.txt</tt> to decompress. +<pre><b> +/* compress or decompress from stdin to stdout */ +int main(int argc, char **argv) +{ + int ret; + + /* avoid end-of-line conversions */ + SET_BINARY_MODE(stdin); + SET_BINARY_MODE(stdout); + + /* do compression if no arguments */ + if (argc == 1) { + ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION); + if (ret != Z_OK) + zerr(ret); + return ret; + } + + /* do decompression if -d specified */ + else if (argc == 2 && strcmp(argv[1], "-d") == 0) { + ret = inf(stdin, stdout); + if (ret != Z_OK) + zerr(ret); + return ret; + } + + /* otherwise, report usage */ + else { + fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); + return 1; + } +} +</b></pre> +<hr> +<i>Last modified 24 January 2023<br> +Copyright © 2004-2023 Mark Adler</i><br> +<a rel="license" href="http://creativecommons.org/licenses/by-nd/4.0/"> +<img alt="Creative Commons License" style="border-width:0" +src="https://i.creativecommons.org/l/by-nd/4.0/88x31.png"></a> +<a rel="license" href="http://creativecommons.org/licenses/by-nd/4.0/"> +Creative Commons Attribution-NoDerivatives 4.0 International License</a>. +</body> +</html> diff --git a/deps/zlib-1.3.1/examples/zpipe.c b/deps/zlib-1.3.1/examples/zpipe.c new file mode 100644 index 0000000..83535d1 --- /dev/null +++ b/deps/zlib-1.3.1/examples/zpipe.c @@ -0,0 +1,205 @@ +/* zpipe.c: example of proper use of zlib's inflate() and deflate() + Not copyrighted -- provided to the public domain + Version 1.4 11 December 2005 Mark Adler */ + +/* Version history: + 1.0 30 Oct 2004 First version + 1.1 8 Nov 2004 Add void casting for unused return values + Use switch statement for inflate() return values + 1.2 9 Nov 2004 Add assertions to document zlib guarantees + 1.3 6 Apr 2005 Remove incorrect assertion in inf() + 1.4 11 Dec 2005 Add hack to avoid MSDOS end-of-line conversions + Avoid some compiler warnings for input and output buffers + */ + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include "zlib.h" + +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) +# include <fcntl.h> +# include <io.h> +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif + +#define CHUNK 16384 + +/* Compress from file source to file dest until EOF on source. + def() returns Z_OK on success, Z_MEM_ERROR if memory could not be + allocated for processing, Z_STREAM_ERROR if an invalid compression + level is supplied, Z_VERSION_ERROR if the version of zlib.h and the + version of the library linked do not match, or Z_ERRNO if there is + an error reading or writing the files. */ +int def(FILE *source, FILE *dest, int level) +{ + int ret, flush; + unsigned have; + z_stream strm; + unsigned char in[CHUNK]; + unsigned char out[CHUNK]; + + /* allocate deflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + ret = deflateInit(&strm, level); + if (ret != Z_OK) + return ret; + + /* compress until end of file */ + do { + strm.avail_in = fread(in, 1, CHUNK, source); + if (ferror(source)) { + (void)deflateEnd(&strm); + return Z_ERRNO; + } + flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; + strm.next_in = in; + + /* run deflate() on input until output buffer not full, finish + compression if all of source has been read in */ + do { + strm.avail_out = CHUNK; + strm.next_out = out; + ret = deflate(&strm, flush); /* no bad return value */ + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + have = CHUNK - strm.avail_out; + if (fwrite(out, 1, have, dest) != have || ferror(dest)) { + (void)deflateEnd(&strm); + return Z_ERRNO; + } + } while (strm.avail_out == 0); + assert(strm.avail_in == 0); /* all input will be used */ + + /* done when last data in file processed */ + } while (flush != Z_FINISH); + assert(ret == Z_STREAM_END); /* stream will be complete */ + + /* clean up and return */ + (void)deflateEnd(&strm); + return Z_OK; +} + +/* Decompress from file source to file dest until stream ends or EOF. + inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be + allocated for processing, Z_DATA_ERROR if the deflate data is + invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and + the version of the library linked do not match, or Z_ERRNO if there + is an error reading or writing the files. */ +int inf(FILE *source, FILE *dest) +{ + int ret; + unsigned have; + z_stream strm; + unsigned char in[CHUNK]; + unsigned char out[CHUNK]; + + /* allocate inflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit(&strm); + if (ret != Z_OK) + return ret; + + /* decompress until deflate stream ends or end of file */ + do { + strm.avail_in = fread(in, 1, CHUNK, source); + if (ferror(source)) { + (void)inflateEnd(&strm); + return Z_ERRNO; + } + if (strm.avail_in == 0) + break; + strm.next_in = in; + + /* run inflate() on input until output buffer not full */ + do { + strm.avail_out = CHUNK; + strm.next_out = out; + ret = inflate(&strm, Z_NO_FLUSH); + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + switch (ret) { + case Z_NEED_DICT: + ret = Z_DATA_ERROR; /* and fall through */ + case Z_DATA_ERROR: + case Z_MEM_ERROR: + (void)inflateEnd(&strm); + return ret; + } + have = CHUNK - strm.avail_out; + if (fwrite(out, 1, have, dest) != have || ferror(dest)) { + (void)inflateEnd(&strm); + return Z_ERRNO; + } + } while (strm.avail_out == 0); + + /* done when inflate() says it's done */ + } while (ret != Z_STREAM_END); + + /* clean up and return */ + (void)inflateEnd(&strm); + return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; +} + +/* report a zlib or i/o error */ +void zerr(int ret) +{ + fputs("zpipe: ", stderr); + switch (ret) { + case Z_ERRNO: + if (ferror(stdin)) + fputs("error reading stdin\n", stderr); + if (ferror(stdout)) + fputs("error writing stdout\n", stderr); + break; + case Z_STREAM_ERROR: + fputs("invalid compression level\n", stderr); + break; + case Z_DATA_ERROR: + fputs("invalid or incomplete deflate data\n", stderr); + break; + case Z_MEM_ERROR: + fputs("out of memory\n", stderr); + break; + case Z_VERSION_ERROR: + fputs("zlib version mismatch!\n", stderr); + } +} + +/* compress or decompress from stdin to stdout */ +int main(int argc, char **argv) +{ + int ret; + + /* avoid end-of-line conversions */ + SET_BINARY_MODE(stdin); + SET_BINARY_MODE(stdout); + + /* do compression if no arguments */ + if (argc == 1) { + ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION); + if (ret != Z_OK) + zerr(ret); + return ret; + } + + /* do decompression if -d specified */ + else if (argc == 2 && strcmp(argv[1], "-d") == 0) { + ret = inf(stdin, stdout); + if (ret != Z_OK) + zerr(ret); + return ret; + } + + /* otherwise, report usage */ + else { + fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); + return 1; + } +} diff --git a/deps/zlib-1.3.1/examples/zran.c b/deps/zlib-1.3.1/examples/zran.c new file mode 100644 index 0000000..d313595 --- /dev/null +++ b/deps/zlib-1.3.1/examples/zran.c @@ -0,0 +1,533 @@ +/* zran.c -- example of deflate stream indexing and random access + * Copyright (C) 2005, 2012, 2018, 2023 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * Version 1.4 13 Apr 2023 Mark Adler */ + +/* Version History: + 1.0 29 May 2005 First version + 1.1 29 Sep 2012 Fix memory reallocation error + 1.2 14 Oct 2018 Handle gzip streams with multiple members + Add a header file to facilitate usage in applications + 1.3 18 Feb 2023 Permit raw deflate streams as well as zlib and gzip + Permit crossing gzip member boundaries when extracting + Support a size_t size when extracting (was an int) + Do a binary search over the index for an access point + Expose the access point type to enable save and load + 1.4 13 Apr 2023 Add a NOPRIME define to not use inflatePrime() + */ + +// Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary() +// for random access of a compressed file. A file containing a raw deflate +// stream is provided on the command line. The compressed stream is decoded in +// its entirety, and an index built with access points about every SPAN bytes +// in the uncompressed output. The compressed file is left open, and can then +// be read randomly, having to decompress on the average SPAN/2 uncompressed +// bytes before getting to the desired block of data. +// +// An access point can be created at the start of any deflate block, by saving +// the starting file offset and bit of that block, and the 32K bytes of +// uncompressed data that precede that block. Also the uncompressed offset of +// that block is saved to provide a reference for locating a desired starting +// point in the uncompressed stream. deflate_index_build() decompresses the +// input raw deflate stream a block at a time, and at the end of each block +// decides if enough uncompressed data has gone by to justify the creation of a +// new access point. If so, that point is saved in a data structure that grows +// as needed to accommodate the points. +// +// To use the index, an offset in the uncompressed data is provided, for which +// the latest access point at or preceding that offset is located in the index. +// The input file is positioned to the specified location in the index, and if +// necessary the first few bits of the compressed data is read from the file. +// inflate is initialized with those bits and the 32K of uncompressed data, and +// decompression then proceeds until the desired offset in the file is reached. +// Then decompression continues to read the requested uncompressed data from +// the file. +// +// There is some fair bit of overhead to starting inflation for the random +// access, mainly copying the 32K byte dictionary. If small pieces of the file +// are being accessed, it would make sense to implement a cache to hold some +// lookahead to avoid many calls to deflate_index_extract() for small lengths. +// +// Another way to build an index would be to use inflateCopy(). That would not +// be constrained to have access points at block boundaries, but would require +// more memory per access point, and could not be saved to a file due to the +// use of pointers in the state. The approach here allows for storage of the +// index in a file. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include "zlib.h" +#include "zran.h" + +#define WINSIZE 32768U // sliding window size +#define CHUNK 16384 // file input buffer size + +// See comments in zran.h. +void deflate_index_free(struct deflate_index *index) { + if (index != NULL) { + free(index->list); + free(index); + } +} + +// Add an access point to the list. If out of memory, deallocate the existing +// list and return NULL. index->mode is temporarily the allocated number of +// access points, until it is time for deflate_index_build() to return. Then +// index->mode is set to the mode of inflation. +static struct deflate_index *add_point(struct deflate_index *index, int bits, + off_t in, off_t out, unsigned left, + unsigned char *window) { + if (index == NULL) { + // The list is empty. Create it, starting with eight access points. + index = malloc(sizeof(struct deflate_index)); + if (index == NULL) + return NULL; + index->have = 0; + index->mode = 8; + index->list = malloc(sizeof(point_t) * index->mode); + if (index->list == NULL) { + free(index); + return NULL; + } + } + + else if (index->have == index->mode) { + // The list is full. Make it bigger. + index->mode <<= 1; + point_t *next = realloc(index->list, sizeof(point_t) * index->mode); + if (next == NULL) { + deflate_index_free(index); + return NULL; + } + index->list = next; + } + + // Fill in the access point and increment how many we have. + point_t *next = (point_t *)(index->list) + index->have++; + if (index->have < 0) { + // Overflowed the int! + deflate_index_free(index); + return NULL; + } + next->out = out; + next->in = in; + next->bits = bits; + if (left) + memcpy(next->window, window + WINSIZE - left, left); + if (left < WINSIZE) + memcpy(next->window + left, window, WINSIZE - left); + + // Return the index, which may have been newly allocated or destroyed. + return index; +} + +// Decompression modes. These are the inflateInit2() windowBits parameter. +#define RAW -15 +#define ZLIB 15 +#define GZIP 31 + +// See comments in zran.h. +int deflate_index_build(FILE *in, off_t span, struct deflate_index **built) { + // Set up inflation state. + z_stream strm = {0}; // inflate engine (gets fired up later) + unsigned char buf[CHUNK]; // input buffer + unsigned char win[WINSIZE] = {0}; // output sliding window + off_t totin = 0; // total bytes read from input + off_t totout = 0; // total bytes uncompressed + int mode = 0; // mode: RAW, ZLIB, or GZIP (0 => not set yet) + + // Decompress from in, generating access points along the way. + int ret; // the return value from zlib, or Z_ERRNO + off_t last; // last access point uncompressed offset + struct deflate_index *index = NULL; // list of access points + do { + // Assure available input, at least until reaching EOF. + if (strm.avail_in == 0) { + strm.avail_in = fread(buf, 1, sizeof(buf), in); + totin += strm.avail_in; + strm.next_in = buf; + if (strm.avail_in < sizeof(buf) && ferror(in)) { + ret = Z_ERRNO; + break; + } + + if (mode == 0) { + // At the start of the input -- determine the type. Assume raw + // if it is neither zlib nor gzip. This could in theory result + // in a false positive for zlib, but in practice the fill bits + // after a stored block are always zeros, so a raw stream won't + // start with an 8 in the low nybble. + mode = strm.avail_in == 0 ? RAW : // empty -- will fail + (strm.next_in[0] & 0xf) == 8 ? ZLIB : + strm.next_in[0] == 0x1f ? GZIP : + /* else */ RAW; + ret = inflateInit2(&strm, mode); + if (ret != Z_OK) + break; + } + } + + // Assure available output. This rotates the output through, for use as + // a sliding window on the uncompressed data. + if (strm.avail_out == 0) { + strm.avail_out = sizeof(win); + strm.next_out = win; + } + + if (mode == RAW && index == NULL) + // We skip the inflate() call at the start of raw deflate data in + // order generate an access point there. Set data_type to imitate + // the end of a header. + strm.data_type = 0x80; + else { + // Inflate and update the number of uncompressed bytes. + unsigned before = strm.avail_out; + ret = inflate(&strm, Z_BLOCK); + totout += before - strm.avail_out; + } + + if ((strm.data_type & 0xc0) == 0x80 && + (index == NULL || totout - last >= span)) { + // We are at the end of a header or a non-last deflate block, so we + // can add an access point here. Furthermore, we are either at the + // very start for the first access point, or there has been span or + // more uncompressed bytes since the last access point, so we want + // to add an access point here. + index = add_point(index, strm.data_type & 7, totin - strm.avail_in, + totout, strm.avail_out, win); + if (index == NULL) { + ret = Z_MEM_ERROR; + break; + } + last = totout; + } + + if (ret == Z_STREAM_END && mode == GZIP && + (strm.avail_in || ungetc(getc(in), in) != EOF)) + // There is more input after the end of a gzip member. Reset the + // inflate state to read another gzip member. On success, this will + // set ret to Z_OK to continue decompressing. + ret = inflateReset2(&strm, GZIP); + + // Keep going until Z_STREAM_END or error. If the compressed data ends + // prematurely without a file read error, Z_BUF_ERROR is returned. + } while (ret == Z_OK); + inflateEnd(&strm); + + if (ret != Z_STREAM_END) { + // An error was encountered. Discard the index and return a negative + // error code. + deflate_index_free(index); + return ret == Z_NEED_DICT ? Z_DATA_ERROR : ret; + } + + // Shrink the index to only the occupied access points and return it. + index->mode = mode; + index->length = totout; + point_t *list = realloc(index->list, sizeof(point_t) * index->have); + if (list == NULL) { + // Seems like a realloc() to make something smaller should always work, + // but just in case. + deflate_index_free(index); + return Z_MEM_ERROR; + } + index->list = list; + *built = index; + return index->have; +} + +#ifdef NOPRIME +// Support zlib versions before 1.2.3 (July 2005), or incomplete zlib clones +// that do not have inflatePrime(). + +# define INFLATEPRIME inflatePreface + +// Append the low bits bits of value to in[] at bit position *have, updating +// *have. value must be zero above its low bits bits. bits must be positive. +// This assumes that any bits above the *have bits in the last byte are zeros. +// That assumption is preserved on return, as any bits above *have + bits in +// the last byte written will be set to zeros. +static inline void append_bits(unsigned value, int bits, + unsigned char *in, int *have) { + in += *have >> 3; // where the first bits from value will go + int k = *have & 7; // the number of bits already there + *have += bits; + if (k) + *in |= value << k; // write value above the low k bits + else + *in = value; + k = 8 - k; // the number of bits just appended + while (bits > k) { + value >>= k; // drop the bits appended + bits -= k; + k = 8; // now at a byte boundary + *++in = value; + } +} + +// Insert enough bits in the form of empty deflate blocks in front of the +// low bits bits of value, in order to bring the sequence to a byte boundary. +// Then feed that to inflate(). This does what inflatePrime() does, except that +// a negative value of bits is not supported. bits must be in 0..16. If the +// arguments are invalid, Z_STREAM_ERROR is returned. Otherwise the return +// value from inflate() is returned. +static int inflatePreface(z_stream *strm, int bits, int value) { + // Check input. + if (strm == Z_NULL || bits < 0 || bits > 16) + return Z_STREAM_ERROR; + if (bits == 0) + return Z_OK; + value &= (2 << (bits - 1)) - 1; + + // An empty dynamic block with an odd number of bits (95). The high bit of + // the last byte is unused. + static const unsigned char dyn[] = { + 4, 0xe0, 0x81, 8, 0, 0, 0, 0, 0x20, 0xa8, 0xab, 0x1f + }; + const int dynlen = 95; // number of bits in the block + + // Build an input buffer for inflate that is a multiple of eight bits in + // length, and that ends with the low bits bits of value. + unsigned char in[(dynlen + 3 * 10 + 16 + 7) / 8]; + int have = 0; + if (bits & 1) { + // Insert an empty dynamic block to get to an odd number of bits, so + // when bits bits from value are appended, we are at an even number of + // bits. + memcpy(in, dyn, sizeof(dyn)); + have = dynlen; + } + while ((have + bits) & 7) + // Insert empty fixed blocks until appending bits bits would put us on + // a byte boundary. This will insert at most three fixed blocks. + append_bits(2, 10, in, &have); + + // Append the bits bits from value, which takes us to a byte boundary. + append_bits(value, bits, in, &have); + + // Deliver the input to inflate(). There is no output space provided, but + // inflate() can't get stuck waiting on output not ingesting all of the + // provided input. The reason is that there will be at most 16 bits of + // input from value after the empty deflate blocks (which themselves + // generate no output). At least ten bits are needed to generate the first + // output byte from a fixed block. The last two bytes of the buffer have to + // be ingested in order to get ten bits, which is the most that value can + // occupy. + strm->avail_in = have >> 3; + strm->next_in = in; + strm->avail_out = 0; + strm->next_out = in; // not used, but can't be NULL + return inflate(strm, Z_NO_FLUSH); +} + +#else +# define INFLATEPRIME inflatePrime +#endif + +// See comments in zran.h. +ptrdiff_t deflate_index_extract(FILE *in, struct deflate_index *index, + off_t offset, unsigned char *buf, size_t len) { + // Do a quick sanity check on the index. + if (index == NULL || index->have < 1 || index->list[0].out != 0) + return Z_STREAM_ERROR; + + // If nothing to extract, return zero bytes extracted. + if (len == 0 || offset < 0 || offset >= index->length) + return 0; + + // Find the access point closest to but not after offset. + int lo = -1, hi = index->have; + point_t *point = index->list; + while (hi - lo > 1) { + int mid = (lo + hi) >> 1; + if (offset < point[mid].out) + hi = mid; + else + lo = mid; + } + point += lo; + + // Initialize the input file and prime the inflate engine to start there. + int ret = fseeko(in, point->in - (point->bits ? 1 : 0), SEEK_SET); + if (ret == -1) + return Z_ERRNO; + int ch = 0; + if (point->bits && (ch = getc(in)) == EOF) + return ferror(in) ? Z_ERRNO : Z_BUF_ERROR; + z_stream strm = {0}; + ret = inflateInit2(&strm, RAW); + if (ret != Z_OK) + return ret; + if (point->bits) + INFLATEPRIME(&strm, point->bits, ch >> (8 - point->bits)); + inflateSetDictionary(&strm, point->window, WINSIZE); + + // Skip uncompressed bytes until offset reached, then satisfy request. + unsigned char input[CHUNK]; + unsigned char discard[WINSIZE]; + offset -= point->out; // number of bytes to skip to get to offset + size_t left = len; // number of bytes left to read after offset + do { + if (offset) { + // Discard up to offset uncompressed bytes. + strm.avail_out = offset < WINSIZE ? (unsigned)offset : WINSIZE; + strm.next_out = discard; + } + else { + // Uncompress up to left bytes into buf. + strm.avail_out = left < UINT_MAX ? (unsigned)left : UINT_MAX; + strm.next_out = buf + len - left; + } + + // Uncompress, setting got to the number of bytes uncompressed. + if (strm.avail_in == 0) { + // Assure available input. + strm.avail_in = fread(input, 1, CHUNK, in); + if (strm.avail_in < CHUNK && ferror(in)) { + ret = Z_ERRNO; + break; + } + strm.next_in = input; + } + unsigned got = strm.avail_out; + ret = inflate(&strm, Z_NO_FLUSH); + got -= strm.avail_out; + + // Update the appropriate count. + if (offset) + offset -= got; + else + left -= got; + + // If we're at the end of a gzip member and there's more to read, + // continue to the next gzip member. + if (ret == Z_STREAM_END && index->mode == GZIP) { + // Discard the gzip trailer. + unsigned drop = 8; // length of gzip trailer + if (strm.avail_in >= drop) { + strm.avail_in -= drop; + strm.next_in += drop; + } + else { + // Read and discard the remainder of the gzip trailer. + drop -= strm.avail_in; + strm.avail_in = 0; + do { + if (getc(in) == EOF) + // The input does not have a complete trailer. + return ferror(in) ? Z_ERRNO : Z_BUF_ERROR; + } while (--drop); + } + + if (strm.avail_in || ungetc(getc(in), in) != EOF) { + // There's more after the gzip trailer. Use inflate to skip the + // gzip header and resume the raw inflate there. + inflateReset2(&strm, GZIP); + do { + if (strm.avail_in == 0) { + strm.avail_in = fread(input, 1, CHUNK, in); + if (strm.avail_in < CHUNK && ferror(in)) { + ret = Z_ERRNO; + break; + } + strm.next_in = input; + } + strm.avail_out = WINSIZE; + strm.next_out = discard; + ret = inflate(&strm, Z_BLOCK); // stop at end of header + } while (ret == Z_OK && (strm.data_type & 0x80) == 0); + if (ret != Z_OK) + break; + inflateReset2(&strm, RAW); + } + } + + // Continue until we have the requested data, the deflate data has + // ended, or an error is encountered. + } while (ret == Z_OK && left); + inflateEnd(&strm); + + // Return the number of uncompressed bytes read into buf, or the error. + return ret == Z_OK || ret == Z_STREAM_END ? len - left : ret; +} + +#ifdef TEST + +#define SPAN 1048576L // desired distance between access points +#define LEN 16384 // number of bytes to extract + +// Demonstrate the use of deflate_index_build() and deflate_index_extract() by +// processing the file provided on the command line, and extracting LEN bytes +// from 2/3rds of the way through the uncompressed output, writing that to +// stdout. An offset can be provided as the second argument, in which case the +// data is extracted from there instead. +int main(int argc, char **argv) { + // Open the input file. + if (argc < 2 || argc > 3) { + fprintf(stderr, "usage: zran file.raw [offset]\n"); + return 1; + } + FILE *in = fopen(argv[1], "rb"); + if (in == NULL) { + fprintf(stderr, "zran: could not open %s for reading\n", argv[1]); + return 1; + } + + // Get optional offset. + off_t offset = -1; + if (argc == 3) { + char *end; + offset = strtoll(argv[2], &end, 10); + if (*end || offset < 0) { + fprintf(stderr, "zran: %s is not a valid offset\n", argv[2]); + return 1; + } + } + + // Build index. + struct deflate_index *index = NULL; + int len = deflate_index_build(in, SPAN, &index); + if (len < 0) { + fclose(in); + switch (len) { + case Z_MEM_ERROR: + fprintf(stderr, "zran: out of memory\n"); + break; + case Z_BUF_ERROR: + fprintf(stderr, "zran: %s ended prematurely\n", argv[1]); + break; + case Z_DATA_ERROR: + fprintf(stderr, "zran: compressed data error in %s\n", argv[1]); + break; + case Z_ERRNO: + fprintf(stderr, "zran: read error on %s\n", argv[1]); + break; + default: + fprintf(stderr, "zran: error %d while building index\n", len); + } + return 1; + } + fprintf(stderr, "zran: built index with %d access points\n", len); + + // Use index by reading some bytes from an arbitrary offset. + unsigned char buf[LEN]; + if (offset == -1) + offset = ((index->length + 1) << 1) / 3; + ptrdiff_t got = deflate_index_extract(in, index, offset, buf, LEN); + if (got < 0) + fprintf(stderr, "zran: extraction failed: %s error\n", + got == Z_MEM_ERROR ? "out of memory" : "input corrupted"); + else { + fwrite(buf, 1, got, stdout); + fprintf(stderr, "zran: extracted %ld bytes at %lld\n", got, offset); + } + + // Clean up and exit. + deflate_index_free(index); + fclose(in); + return 0; +} + +#endif diff --git a/deps/zlib-1.3.1/examples/zran.h b/deps/zlib-1.3.1/examples/zran.h new file mode 100644 index 0000000..ebf780d --- /dev/null +++ b/deps/zlib-1.3.1/examples/zran.h @@ -0,0 +1,51 @@ +/* zran.h -- example of deflated stream indexing and random access + * Copyright (C) 2005, 2012, 2018, 2023 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * Version 1.3 18 Feb 2023 Mark Adler */ + +#include <stdio.h> +#include "zlib.h" + +// Access point. +typedef struct point { + off_t out; // offset in uncompressed data + off_t in; // offset in compressed file of first full byte + int bits; // 0, or number of bits (1-7) from byte at in-1 + unsigned char window[32768]; // preceding 32K of uncompressed data +} point_t; + +// Access point list. +struct deflate_index { + int have; // number of access points in list + int mode; // -15 for raw, 15 for zlib, or 31 for gzip + off_t length; // total length of uncompressed data + point_t *list; // allocated list of access points +}; + +// Make one pass through a zlib, gzip, or raw deflate compressed stream and +// build an index, with access points about every span bytes of uncompressed +// output. gzip files with multiple members are fully indexed. span should be +// chosen to balance the speed of random access against the memory requirements +// of the list, which is about 32K bytes per access point. The return value is +// the number of access points on success (>= 1), Z_MEM_ERROR for out of +// memory, Z_BUF_ERROR for a premature end of input, Z_DATA_ERROR for a format +// or verification error in the input file, or Z_ERRNO for a file read error. +// On success, *built points to the resulting index. +int deflate_index_build(FILE *in, off_t span, struct deflate_index **built); + +// Use the index to read len bytes from offset into buf. Return the number of +// bytes read or a negative error code. If data is requested past the end of +// the uncompressed data, then deflate_index_extract() will return a value less +// than len, indicating how much was actually read into buf. If given a valid +// index, this function should not return an error unless the file was modified +// somehow since the index was generated, given that deflate_index_build() had +// validated all of the input. If nevertheless there is a failure, Z_BUF_ERROR +// is returned if the compressed data ends prematurely, Z_DATA_ERROR if the +// deflate compressed data is not valid, Z_MEM_ERROR if out of memory, +// Z_STREAM_ERROR if the index is not valid, or Z_ERRNO if there is an error +// reading or seeking on the input file. +ptrdiff_t deflate_index_extract(FILE *in, struct deflate_index *index, + off_t offset, unsigned char *buf, size_t len); + +// Deallocate an index built by deflate_index_build(). +void deflate_index_free(struct deflate_index *index); |