flate

deflate implementation
git clone git://git.suckless.org/flate
Log | Files | Refs | README

commit a832945746253cae75977ce7daa5a14ad34a3885
Author: nsz <nszabolcs@gmail.com>
Date:   Mon, 20 Apr 2009 11:54:33 +0200

raw inflate
Diffstat:
Makefile | 14++++++++++++++
README | 13+++++++++++++
inflate.c | 378+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 405 insertions(+), 0 deletions(-)

diff --git a/Makefile b/Makefile @@ -0,0 +1,14 @@ +CFLAGS=-g -Wall -ansi -pedantic +#CFLAGS=-Os -Wall -ansi -pedantic +LDFLAGS= +SRC=inflate.c +OBJ=${SRC:.c=.o} + +all: inflate +inflate: inflate.o + ${CC} -o $@ $^ ${LDFLAGS} +${OBJ}: Makefile +.c.o: + ${CC} -c ${CFLAGS} $< +clean: + rm -f ${OBJ} inflate diff --git a/README b/README @@ -0,0 +1,13 @@ +minimal deflate implementation + +source: + rfc1951 from ietf + tinf from ibsensoftware + puff from zlib + libflate from plan 9 + +build: + make + +features: + decode raw deflate compressed data diff --git a/inflate.c b/inflate.c @@ -0,0 +1,378 @@ +/* +TODO: + error check (src len, dst len) + fillbits(s, n) -> faster getbits() + clever io + p9 hufftable + int types +*/ + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; + +enum { + HuffBits = 16, /* maximum bits in a encoded code */ + Nlit = 256, /* number of lit codes */ + Nlen = 29, /* number of len codes */ + Nlitlen = Nlit + Nlen + 3, /* number of litlen codes + block end + 2 unused */ + Ndist = 30, /* number of offset codes */ + Nclen = 19, /* number of codelen codes */ + LenShift = 10 /* code = len<<LenShift|code */ +/* LitlenBits = 7,*/ /* number of bits in litlen decode table */ +/* DistBits = 6,*/ /* number of bits in offset decode table */ +/* ClenBits = 6*/ /* number of bits in code len decode table */ +}; + +typedef struct { + ushort count[HuffBits]; /* code bit len -> count */ + ushort symbol[Nlitlen]; /* litlen code -> symbol */ +} HuffTree; + +typedef struct { + const uchar *src; + uint bits; + uint nbits; + + uchar *dst; + uint *dstlen; + + HuffTree ltree; /* dynamic lit/len tree */ + HuffTree dtree; /* dynamic distance tree */ +} FlateStream; + + +static HuffTree ltree; /* fixed lit/len tree */ +static HuffTree dtree; /* fixed distance tree */ + +/* extra bits and base tables for length codes */ +static uchar lenbits[Nlen] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, + 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 0 +}; + +static ushort lenbase[Nlen]; + +/* extra bits and base tables for distance codes */ +static uchar distbits[Ndist] = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 +}; + +static ushort distbase[Ndist]; + +/* ordering of code lengths */ +static const uchar clenorder[Nclen] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 +}; + +static void init_base_tables(void) { + int i, base; + + for (base = 3, i = 0; i < Nlen; i++) { + lenbase[i] = base; + base += 1 << lenbits[i]; + } + lenbase[Nlen-1]--; /* deflate bug */ + + for (base = 1, i = 0; i < Ndist; i++) { + distbase[i] = base; + base += 1 << distbits[i]; + } +} + +static void init_fixed_trees(void) { + int i; + + ltree.count[7] = 24; + ltree.count[8] = 152; + ltree.count[9] = 112; + for (i = 0; i < 24; i++) + ltree.symbol[i] = 256 + i; + for (i = 0; i < 144; ++i) + ltree.symbol[24 + i] = i; + for (i = 0; i < 8; ++i) + ltree.symbol[24 + 144 + i] = 280 + i; + for (i = 0; i < 112; ++i) + ltree.symbol[24 + 144 + 8 + i] = 144 + i; + + dtree.count[5] = Ndist; + for (i = 0; i < Ndist; ++i) + dtree.symbol[i] = i; +} + +/* given an array of code lengths, build a huffman code tree */ +static void build_tree(HuffTree *t, const uchar *lens, uint n) { + int offs[HuffBits]; + int i, sum; + + for (i = 0; i < HuffBits; i++) + t->count[i] = 0; + + /* count code lengths and calc first code for each length */ + for (i = 0; i < n; i++) + t->count[lens[i]]++; + t->count[0] = 0; + for (sum = 0, i = 0; i < HuffBits; i++) { + offs[i] = sum; + sum += t->count[i]; + } + + /* create code->symbol translation table (symbols sorted by code) */ + for (i = 0; i < n; i++) + if (lens[i]) + t->symbol[offs[lens[i]]++] = i; +} + +/* + * decode functions + */ + +/* get one bit from s->src stream */ +static uint getbit(FlateStream *s) { + uint bit; + + if (!s->nbits--) { + s->bits = *s->src++; + s->nbits = 7; + } + bit = s->bits & 1; + s->bits >>= 1; + return bit; +} + +/* get n bits from s->src stream */ +static uint getbits(FlateStream *s, int n) { + uint bits = 0; + int i; + + if (n == 0) + return 0; + for (i = 0; i < n; i++) + bits |= getbit(s) << i; + return bits; +} + +/* decode a symbol from stream with tree */ +static uint decode_symbol(FlateStream *s, HuffTree *t) { + int sum = 0, cur = 0, len = 0; + + /* get bits while code value is above sum */ + do { + cur <<= 1; + cur |= getbit(s); + len++; + sum += t->count[len]; + cur -= t->count[len]; + } while (cur >= 0); + + return t->symbol[sum + cur]; +} + +/* decode dynamic trees from stream */ +static void decode_trees(FlateStream *s, HuffTree *lt, HuffTree *dt) { + HuffTree ctree; + uchar lens[Nlitlen+Ndist]; + uint nlit, ndist, nclen; + uint i; + + nlit = 257 + getbits(s, 5); + ndist = 1 + getbits(s, 5); + nclen = 4 + getbits(s, 4); + /*if (nlit > Nlitlen || ndist > Ndist) */ + + /* build code length tree */ + for (i = 0; i < Nclen; i++) + lens[i] = 0; + for (i = 0; i < nclen; i++) + lens[clenorder[i]] = getbits(s, 3); + build_tree(&ctree, lens, Nclen); + + /* decode code lengths for the dynamic trees */ + for (i = 0; i < nlit + ndist; ) { + uint sym = decode_symbol(s, &ctree); + uint len; + uchar c; + + if (sym < 16) { + lens[i++] = sym; + } else if (sym == 16) { + /* copy previous code length 3-6 times */ + c = lens[i - 1]; + for (len = 3 + getbits(s, 2); len; len--) + lens[i++] = c; + } else if (sym == 17) { + /* repeat 0 for 3-10 times */ + for (len = 3 + getbits(s, 3); len; len--) + lens[i++] = 0; + } else if (sym == 18) { + /* repeat 0 for 11-138 times */ + for (len = 11 + getbits(s, 7); len; len--) + lens[i++] = 0; + } else { + /* error */ + } + } + + /* build dynamic trees */ + build_tree(lt, lens, nlit); + build_tree(dt, lens + nlit, ndist); +} + +/* decode a block of data from stream with trees */ +static int decode_block(FlateStream *s, HuffTree *lt, HuffTree *dt) { + uchar *start = s->dst; + uint sym; + + for (;;) { + sym = decode_symbol(s, lt); + + if (sym == 256) { + *s->dstlen += s->dst - start; + return 0; + } + if (sym < 256) + *s->dst++ = sym; + else { + int len, dist; + int i; + + sym -= 257; + if (sym >= Nlen) + /* error */; + len = lenbase[sym] + getbits(s, lenbits[sym]); + sym = decode_symbol(s, dt); + if (sym >= Ndist) + /* error */; + dist = distbase[sym] + getbits(s, distbits[sym]); + + /* copy match */ + for (i = 0; i < len; i++) + s->dst[i] = s->dst[i - dist]; + s->dst += len; + } + } +} + +/* inflate an uncompressed block of data */ +static int inflate_uncompressed_block(FlateStream *s) { + uint len, invlen; + + len = (s->src[1] << 8) | s->src[0]; + invlen = (s->src[3] << 8) | s->src[2]; + s->src += 4; + + if (len != (~invlen & 0x0000ffff)) + /* error */; + + /* start next block on a byte boundary */ + s->nbits = 0; + + *s->dstlen += len; + while (len--) + *s->dst++ = *s->src++; + return 0; +} + +/* inflate a block of data compressed with fixed huffman trees */ +static int inflate_fixed_block(FlateStream *s) { + return decode_block(s, &ltree, &dtree); +} + +/* inflate a block of data compressed with dynamic huffman trees */ +static int inflate_dynamic_block(FlateStream *s) { + decode_trees(s, &s->ltree, &s->dtree); + return decode_block(s, &s->ltree, &s->dtree); +} + +/* + * extern functions + */ + +/* initialize global (static) data */ +void inflate_init(void) { + init_fixed_trees(); + init_base_tables(); +} + +/* inflate stream from src to dst */ +int inflate(void *dst, uint *dstlen, const void *src, uint srclen) { + FlateStream s; + uint bfinal; + + s.src = src; + s.nbits = 0; + s.dst = dst; + s.dstlen = dstlen; + *dstlen = 0; + do { + uint btype; + int res; + + /* read final block flag */ + bfinal = getbit(&s); + + /* read block type */ + btype = getbits(&s, 2); + + /* decompress block */ + switch (btype) { + case 0: + res = inflate_uncompressed_block(&s); + break; + case 1: + res = inflate_fixed_block(&s); + break; + case 2: + res = inflate_dynamic_block(&s); + break; + default: + return -1; + } + if (res != 0) + return -1; + } while (!bfinal); + return 0; +} + + +#include <stdio.h> +#include <stdlib.h> + +unsigned char *readall(char *name, uint *len) { + ulong size = 1 << 20; + uchar *buf; + FILE *in; + + in = fopen(name, "r"); + if (in == NULL) + return NULL; + buf = malloc(size); + *len = fread(buf, 1, size, in); + fclose(in); + return buf; +} + +int main(int argc, char **argv) { + int ret; + uchar *src; + uint srclen, dstlen=1<<16; + uchar *dst; + + if (argc < 2) + return -1; + src = readall(argv[1], &srclen); + inflate_init(); + ret = inflate(dst = malloc(dstlen), &dstlen, src, srclen); + if (ret) + puts("inflate: error\n"); + else + printf("inflate: uncompressed %u bytes\n", dstlen); + puts((char *)dst); + free(dst); + free(src); + return ret; +}