summaryrefslogtreecommitdiff
path: root/tools/src/tinf/tinflate.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/src/tinf/tinflate.c')
-rw-r--r--tools/src/tinf/tinflate.c648
1 files changed, 648 insertions, 0 deletions
diff --git a/tools/src/tinf/tinflate.c b/tools/src/tinf/tinflate.c
new file mode 100644
index 0000000..c82526c
--- /dev/null
+++ b/tools/src/tinf/tinflate.c
@@ -0,0 +1,648 @@
+/*
+ * tinflate - tiny inflate
+ *
+ * Copyright (c) 2003-2019 Joergen Ibsen
+ *
+ * This version of tinfzlib was modified for use with m1n1.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must
+ * not claim that you wrote the original software. If you use this
+ * software in a product, an acknowledgment in the product
+ * documentation would be appreciated but is not required.
+ *
+ * 2. Altered source versions must be plainly marked as such, and must
+ * not be misrepresented as being the original software.
+ *
+ * 3. This notice may not be removed or altered from any source
+ * distribution.
+ */
+
+#include "tinf.h"
+
+#include <assert.h>
+#include <limits.h>
+
+#if defined(UINT_MAX) && (UINT_MAX) < 0xFFFFFFFFUL
+# error "tinf requires unsigned int to be at least 32-bit"
+#endif
+
+/* -- Internal data structures -- */
+
+struct tinf_tree {
+ unsigned short counts[16]; /* Number of codes with a given length */
+ unsigned short symbols[288]; /* Symbols sorted by code */
+ int max_sym;
+};
+
+struct tinf_data {
+ const unsigned char *source;
+ const unsigned char *source_end;
+ unsigned int tag;
+ int bitcount;
+ int overflow;
+
+ unsigned char *dest_start;
+ unsigned char *dest;
+ unsigned char *dest_end;
+
+ struct tinf_tree ltree; /* Literal/length tree */
+ struct tinf_tree dtree; /* Distance tree */
+};
+
+/* -- Utility functions -- */
+
+static unsigned int read_le16(const unsigned char *p)
+{
+ return ((unsigned int) p[0])
+ | ((unsigned int) p[1] << 8);
+}
+
+/* Build fixed Huffman trees */
+static void tinf_build_fixed_trees(struct tinf_tree *lt, struct tinf_tree *dt)
+{
+ int i;
+
+ /* Build fixed literal/length tree */
+ for (i = 0; i < 16; ++i) {
+ lt->counts[i] = 0;
+ }
+
+ lt->counts[7] = 24;
+ lt->counts[8] = 152;
+ lt->counts[9] = 112;
+
+ for (i = 0; i < 24; ++i) {
+ lt->symbols[i] = 256 + i;
+ }
+ for (i = 0; i < 144; ++i) {
+ lt->symbols[24 + i] = i;
+ }
+ for (i = 0; i < 8; ++i) {
+ lt->symbols[24 + 144 + i] = 280 + i;
+ }
+ for (i = 0; i < 112; ++i) {
+ lt->symbols[24 + 144 + 8 + i] = 144 + i;
+ }
+
+ lt->max_sym = 285;
+
+ /* Build fixed distance tree */
+ for (i = 0; i < 16; ++i) {
+ dt->counts[i] = 0;
+ }
+
+ dt->counts[5] = 32;
+
+ for (i = 0; i < 32; ++i) {
+ dt->symbols[i] = i;
+ }
+
+ dt->max_sym = 29;
+}
+
+/* Given an array of code lengths, build a tree */
+static int tinf_build_tree(struct tinf_tree *t, const unsigned char *lengths,
+ unsigned int num)
+{
+ unsigned short offs[16];
+ unsigned int i, num_codes, available;
+
+ assert(num <= 288);
+
+ for (i = 0; i < 16; ++i) {
+ t->counts[i] = 0;
+ }
+
+ t->max_sym = -1;
+
+ /* Count number of codes for each non-zero length */
+ for (i = 0; i < num; ++i) {
+ assert(lengths[i] <= 15);
+
+ if (lengths[i]) {
+ t->max_sym = i;
+ t->counts[lengths[i]]++;
+ }
+ }
+
+ /* Compute offset table for distribution sort */
+ for (available = 1, num_codes = 0, i = 0; i < 16; ++i) {
+ unsigned int used = t->counts[i];
+
+ /* Check length contains no more codes than available */
+ if (used > available) {
+ return TINF_DATA_ERROR;
+ }
+ available = 2 * (available - used);
+
+ offs[i] = num_codes;
+ num_codes += used;
+ }
+
+ /*
+ * Check all codes were used, or for the special case of only one
+ * code that it has length 1
+ */
+ if ((num_codes > 1 && available > 0)
+ || (num_codes == 1 && t->counts[1] != 1)) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Fill in symbols sorted by code */
+ for (i = 0; i < num; ++i) {
+ if (lengths[i]) {
+ t->symbols[offs[lengths[i]]++] = i;
+ }
+ }
+
+ /*
+ * For the special case of only one code (which will be 0) add a
+ * code 1 which results in a symbol that is too large
+ */
+ if (num_codes == 1) {
+ t->counts[1] = 2;
+ t->symbols[1] = t->max_sym + 1;
+ }
+
+ return TINF_OK;
+}
+
+/* -- Decode functions -- */
+
+static void tinf_refill(struct tinf_data *d, int num)
+{
+ assert(num >= 0 && num <= 32);
+
+ /* Read bytes until at least num bits available */
+ while (d->bitcount < num) {
+ if (d->source != d->source_end) {
+ d->tag |= (unsigned int) *d->source++ << d->bitcount;
+ }
+ else {
+ d->overflow = 1;
+ }
+ d->bitcount += 8;
+ }
+
+ assert(d->bitcount <= 32);
+}
+
+static unsigned int tinf_getbits_no_refill(struct tinf_data *d, int num)
+{
+ unsigned int bits;
+
+ assert(num >= 0 && num <= d->bitcount);
+
+ /* Get bits from tag */
+ bits = d->tag & ((1UL << num) - 1);
+
+ /* Remove bits from tag */
+ d->tag >>= num;
+ d->bitcount -= num;
+
+ return bits;
+}
+
+/* Get num bits from source stream */
+static unsigned int tinf_getbits(struct tinf_data *d, int num)
+{
+ tinf_refill(d, num);
+ return tinf_getbits_no_refill(d, num);
+}
+
+/* Read a num bit value from stream and add base */
+static unsigned int tinf_getbits_base(struct tinf_data *d, int num, int base)
+{
+ return base + (num ? tinf_getbits(d, num) : 0);
+}
+
+/* Given a data stream and a tree, decode a symbol */
+static int tinf_decode_symbol(struct tinf_data *d, const struct tinf_tree *t)
+{
+ int base = 0, offs = 0;
+ int len;
+
+ /*
+ * Get more bits while code index is above number of codes
+ *
+ * Rather than the actual code, we are computing the position of the
+ * code in the sorted order of codes, which is the index of the
+ * corresponding symbol.
+ *
+ * Conceptually, for each code length (level in the tree), there are
+ * counts[len] leaves on the left and internal nodes on the right.
+ * The index we have decoded so far is base + offs, and if that
+ * falls within the leaves we are done. Otherwise we adjust the range
+ * of offs and add one more bit to it.
+ */
+ for (len = 1; ; ++len) {
+ offs = 2 * offs + tinf_getbits(d, 1);
+
+ assert(len <= 15);
+
+ if (offs < t->counts[len]) {
+ break;
+ }
+
+ base += t->counts[len];
+ offs -= t->counts[len];
+ }
+
+ assert(base + offs >= 0 && base + offs < 288);
+
+ return t->symbols[base + offs];
+}
+
+/* Given a data stream, decode dynamic trees from it */
+static int tinf_decode_trees(struct tinf_data *d, struct tinf_tree *lt,
+ struct tinf_tree *dt)
+{
+ unsigned char lengths[288 + 32];
+
+ /* Special ordering of code length codes */
+ static const unsigned char clcidx[19] = {
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5,
+ 11, 4, 12, 3, 13, 2, 14, 1, 15
+ };
+
+ unsigned int hlit, hdist, hclen;
+ unsigned int i, num, length;
+ int res;
+
+ /* Get 5 bits HLIT (257-286) */
+ hlit = tinf_getbits_base(d, 5, 257);
+
+ /* Get 5 bits HDIST (1-32) */
+ hdist = tinf_getbits_base(d, 5, 1);
+
+ /* Get 4 bits HCLEN (4-19) */
+ hclen = tinf_getbits_base(d, 4, 4);
+
+ /*
+ * The RFC limits the range of HLIT to 286, but lists HDIST as range
+ * 1-32, even though distance codes 30 and 31 have no meaning. While
+ * we could allow the full range of HLIT and HDIST to make it possible
+ * to decode the fixed trees with this function, we consider it an
+ * error here.
+ *
+ * See also: https://github.com/madler/zlib/issues/82
+ */
+ if (hlit > 286 || hdist > 30) {
+ return TINF_DATA_ERROR;
+ }
+
+ for (i = 0; i < 19; ++i) {
+ lengths[i] = 0;
+ }
+
+ /* Read code lengths for code length alphabet */
+ for (i = 0; i < hclen; ++i) {
+ /* Get 3 bits code length (0-7) */
+ unsigned int clen = tinf_getbits(d, 3);
+
+ lengths[clcidx[i]] = clen;
+ }
+
+ /* Build code length tree (in literal/length tree to save space) */
+ res = tinf_build_tree(lt, lengths, 19);
+
+ if (res != TINF_OK) {
+ return res;
+ }
+
+ /* Check code length tree is not empty */
+ if (lt->max_sym == -1) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Decode code lengths for the dynamic trees */
+ for (num = 0; num < hlit + hdist; ) {
+ int sym = tinf_decode_symbol(d, lt);
+
+ if (sym > lt->max_sym) {
+ return TINF_DATA_ERROR;
+ }
+
+ switch (sym) {
+ case 16:
+ /* Copy previous code length 3-6 times (read 2 bits) */
+ if (num == 0) {
+ return TINF_DATA_ERROR;
+ }
+ sym = lengths[num - 1];
+ length = tinf_getbits_base(d, 2, 3);
+ break;
+ case 17:
+ /* Repeat code length 0 for 3-10 times (read 3 bits) */
+ sym = 0;
+ length = tinf_getbits_base(d, 3, 3);
+ break;
+ case 18:
+ /* Repeat code length 0 for 11-138 times (read 7 bits) */
+ sym = 0;
+ length = tinf_getbits_base(d, 7, 11);
+ break;
+ default:
+ /* Values 0-15 represent the actual code lengths */
+ length = 1;
+ break;
+ }
+
+ if (length > hlit + hdist - num) {
+ return TINF_DATA_ERROR;
+ }
+
+ while (length--) {
+ lengths[num++] = sym;
+ }
+ }
+
+ /* Check EOB symbol is present */
+ if (lengths[256] == 0) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Build dynamic trees */
+ res = tinf_build_tree(lt, lengths, hlit);
+
+ if (res != TINF_OK) {
+ return res;
+ }
+
+ res = tinf_build_tree(dt, lengths + hlit, hdist);
+
+ if (res != TINF_OK) {
+ return res;
+ }
+
+ return TINF_OK;
+}
+
+/* -- Block inflate functions -- */
+
+/* Given a stream and two trees, inflate a block of data */
+static int tinf_inflate_block_data(struct tinf_data *d, struct tinf_tree *lt,
+ struct tinf_tree *dt)
+{
+ /* Extra bits and base tables for length codes */
+ static const unsigned char length_bits[30] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
+ 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 4, 4, 5, 5, 5, 5, 0, 127
+ };
+
+ static const unsigned short length_base[30] = {
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
+ 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
+ 67, 83, 99, 115, 131, 163, 195, 227, 258, 0
+ };
+
+ /* Extra bits and base tables for distance codes */
+ static const unsigned char dist_bits[30] = {
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
+ 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
+ 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
+ };
+
+ static const unsigned short dist_base[30] = {
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25,
+ 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
+ 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
+ };
+
+ for (;;) {
+ int sym = tinf_decode_symbol(d, lt);
+
+ /* Check for overflow in bit reader */
+ if (d->overflow) {
+ return TINF_DATA_ERROR;
+ }
+
+ if (sym < 256) {
+ if (d->dest == d->dest_end) {
+ return TINF_BUF_ERROR;
+ }
+ *d->dest++ = sym;
+ }
+ else {
+ int length, dist, offs;
+ int i;
+
+ /* Check for end of block */
+ if (sym == 256) {
+ return TINF_OK;
+ }
+
+ /* Check sym is within range and distance tree is not empty */
+ if (sym > lt->max_sym || sym - 257 > 28 || dt->max_sym == -1) {
+ return TINF_DATA_ERROR;
+ }
+
+ sym -= 257;
+
+ /* Possibly get more bits from length code */
+ length = tinf_getbits_base(d, length_bits[sym],
+ length_base[sym]);
+
+ dist = tinf_decode_symbol(d, dt);
+
+ /* Check dist is within range */
+ if (dist > dt->max_sym || dist > 29) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Possibly get more bits from distance code */
+ offs = tinf_getbits_base(d, dist_bits[dist],
+ dist_base[dist]);
+
+ if (offs > d->dest - d->dest_start) {
+ return TINF_DATA_ERROR;
+ }
+
+ if (d->dest_end - d->dest < length) {
+ return TINF_BUF_ERROR;
+ }
+
+ /* Copy match */
+ for (i = 0; i < length; ++i) {
+ d->dest[i] = d->dest[i - offs];
+ }
+
+ d->dest += length;
+ }
+ }
+}
+
+/* Inflate an uncompressed block of data */
+static int tinf_inflate_uncompressed_block(struct tinf_data *d)
+{
+ unsigned int length, invlength;
+
+ if (d->source_end && d->source_end - d->source < 4) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Get length */
+ length = read_le16(d->source);
+
+ /* Get one's complement of length */
+ invlength = read_le16(d->source + 2);
+
+ /* Check length */
+ if (length != (~invlength & 0x0000FFFF)) {
+ return TINF_DATA_ERROR;
+ }
+
+ d->source += 4;
+
+ if (d->source_end && d->source_end - d->source < length) {
+ return TINF_DATA_ERROR;
+ }
+
+ if (d->dest_end - d->dest < length) {
+ return TINF_BUF_ERROR;
+ }
+
+ /* Copy block */
+ while (length--) {
+ *d->dest++ = *d->source++;
+ }
+
+ /* Make sure we start next block on a byte boundary */
+ d->tag = 0;
+ d->bitcount = 0;
+
+ return TINF_OK;
+}
+
+/* Inflate a block of data compressed with fixed Huffman trees */
+static int tinf_inflate_fixed_block(struct tinf_data *d)
+{
+ /* Build fixed Huffman trees */
+ tinf_build_fixed_trees(&d->ltree, &d->dtree);
+
+ /* Decode block using fixed trees */
+ return tinf_inflate_block_data(d, &d->ltree, &d->dtree);
+}
+
+/* Inflate a block of data compressed with dynamic Huffman trees */
+static int tinf_inflate_dynamic_block(struct tinf_data *d)
+{
+ /* Decode trees from stream */
+ int res = tinf_decode_trees(d, &d->ltree, &d->dtree);
+
+ if (res != TINF_OK) {
+ return res;
+ }
+
+ /* Decode block using decoded trees */
+ return tinf_inflate_block_data(d, &d->ltree, &d->dtree);
+}
+
+/* -- Public functions -- */
+
+/* Initialize global (static) data */
+void tinf_init(void)
+{
+ return;
+}
+
+/* Inflate stream from source to dest */
+int tinf_uncompress(void *dest, unsigned int *destLen,
+ const void *source, unsigned int *sourceLen)
+{
+ struct tinf_data d;
+ int bfinal;
+
+ /* Initialise data */
+ d.source = (const unsigned char *) source;
+ if (sourceLen && *sourceLen)
+ d.source_end = d.source + *sourceLen;
+ else
+ d.source_end = 0;
+ d.tag = 0;
+ d.bitcount = 0;
+ d.overflow = 0;
+
+ d.dest = (unsigned char *) dest;
+ d.dest_start = d.dest;
+ d.dest_end = d.dest + *destLen;
+
+ do {
+ unsigned int btype;
+ int res;
+
+ /* Read final block flag */
+ bfinal = tinf_getbits(&d, 1);
+
+ /* Read block type (2 bits) */
+ btype = tinf_getbits(&d, 2);
+
+ /* Decompress block */
+ switch (btype) {
+ case 0:
+ /* Decompress uncompressed block */
+ res = tinf_inflate_uncompressed_block(&d);
+ break;
+ case 1:
+ /* Decompress block with fixed Huffman trees */
+ res = tinf_inflate_fixed_block(&d);
+ break;
+ case 2:
+ /* Decompress block with dynamic Huffman trees */
+ res = tinf_inflate_dynamic_block(&d);
+ break;
+ default:
+ res = TINF_DATA_ERROR;
+ break;
+ }
+
+ if (res != TINF_OK) {
+ return res;
+ }
+ } while (!bfinal);
+
+ /* Check for overflow in bit reader */
+ if (d.overflow) {
+ return TINF_DATA_ERROR;
+ }
+
+ if (sourceLen) {
+ unsigned int slen = d.source - (const unsigned char *)source;
+ if (!*sourceLen)
+ *sourceLen = slen;
+ else if (*sourceLen != slen)
+ return TINF_DATA_ERROR;
+ }
+
+ *destLen = d.dest - d.dest_start;
+ return TINF_OK;
+}
+
+/* clang -g -O1 -fsanitize=fuzzer,address -DTINF_FUZZING tinflate.c */
+#if defined(TINF_FUZZING)
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+unsigned char depacked[64 * 1024];
+
+extern int
+LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
+{
+ if (size > UINT_MAX / 2) { return 0; }
+ unsigned int destLen = sizeof(depacked);
+ tinf_uncompress(depacked, &destLen, data, size);
+ return 0;
+}
+#endif