flate

deflate implementation
git clone git://git.suckless.org/flate
Log | Files | Refs | README

commit 4843dbb8d8c3204eafcd979d45e4181afd9cf858
parent 8f660689897a22f13b6b83c3f797c4392d381086
Author: nsz <nszabolcs@gmail.com>
Date:   Tue, 21 Apr 2009 14:47:12 +0200

unrolled decode_symbol 20% faster
Diffstat:
Makefile | 8++++----
inflate.c | 65++++++++++++++++++++++++++++++++++++++++++++++++-----------------
2 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,5 +1,5 @@ -CFLAGS=-g -Wall -ansi -pedantic -#CFLAGS=-Os -Wall -ansi -pedantic +#CFLAGS=-g -Wall -ansi -pedantic +CFLAGS=-Os -Wall -ansi -pedantic LDFLAGS= SRC=inflate.c OBJ=${SRC:.c=.o} @@ -15,11 +15,11 @@ clean: gcov: inflate.c - gcc -fprofile-arcs -ftest-coverage -pg ${CFLAGS} $< + gcc -fprofile-arcs -ftest-coverage -pg -g -Wall $< ./a.out a.dat > /dev/null gcov -b $< > /dev/null gprof a.out > $<.gprof - gcc ${CFLAGS} $< + gcc -g -Wall $< valgrind -v --leak-check=yes ./a.out a.dat > /dev/null 2> a.valgrind grep ERROR a.valgrind grep alloc a.valgrind diff --git a/inflate.c b/inflate.c @@ -4,9 +4,7 @@ TODO: clever io p9 hufftable int types - speedup: - fillbits(s, n) -> faster getbits() - loop unroll in decode_symbol + fillbits(s, n) ? */ typedef unsigned char uchar; @@ -100,7 +98,7 @@ static void init_fixed_trees(void) { dtree.symbol[i] = i; } -/* given an array of code lengths, build a huffman code tree */ +/* build huffman code tree from code lengths */ static void build_tree(HuffTree *t, const uchar *lens, uint n) { int offs[HuffBits]; int i, sum; @@ -150,19 +148,52 @@ static uint getbits(FlateStream *s, int n) { /* decode a symbol from stream with tree */ static uint decode_symbol(FlateStream *s, HuffTree *t) { - int sum = 0, cur = 0, len = 0; - - /* get bits while code value is above sum */ - do { - len++; - if (len >= HuffBits) - /* error */; - sum += t->count[len]; + int sum = 0, cur = 0; + ushort *count = t->count + 1; + uint nbits = s->nbits; + uint bits = s->bits; + + /* loop unroll, nbits, bits: approx 20% speedup */ + while (nbits--) { + cur |= bits & 1; + bits >>= 1; + sum += *count; + cur -= *count; + if (cur < 0) + goto found; cur <<= 1; - cur |= getbit(s); - cur -= t->count[len]; - } while (cur >= 0); - + count++; + } + bits = *s->src++; + nbits = 8; + while (nbits--) { + cur |= bits & 1; + bits >>= 1; + sum += *count; + cur -= *count; + if (cur < 0) + goto found; + cur <<= 1; + count++; + } + bits = *s->src++; + nbits = 8; + while (nbits--) { + cur |= bits & 1; + bits >>= 1; + sum += *count; + cur -= *count; + if (cur < 0) + goto found_check; + cur <<= 1; + count++; + } +found_check: + if (count >= t->count + HuffBits) + /* error */; +found: + s->bits = bits; + s->nbits = nbits; return t->symbol[sum + cur]; } @@ -210,7 +241,7 @@ static void decode_trees(FlateStream *s, HuffTree *lt, HuffTree *dt) { } else /* error */; } - /* build dynamic trees */ + /* build dynamic huffman trees */ build_tree(lt, lens, nlit); build_tree(dt, lens + nlit, ndist); }