commit 4843dbb8d8c3204eafcd979d45e4181afd9cf858
parent 8f660689897a22f13b6b83c3f797c4392d381086
Author: nsz <nszabolcs@gmail.com>
Date: Tue, 21 Apr 2009 14:47:12 +0200
unrolled decode_symbol 20% faster
Diffstat:
Makefile | | | 8 | ++++---- |
inflate.c | | | 65 | ++++++++++++++++++++++++++++++++++++++++++++++++----------------- |
2 files changed, 52 insertions(+), 21 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
-CFLAGS=-g -Wall -ansi -pedantic
-#CFLAGS=-Os -Wall -ansi -pedantic
+#CFLAGS=-g -Wall -ansi -pedantic
+CFLAGS=-Os -Wall -ansi -pedantic
LDFLAGS=
SRC=inflate.c
OBJ=${SRC:.c=.o}
@@ -15,11 +15,11 @@ clean:
gcov: inflate.c
- gcc -fprofile-arcs -ftest-coverage -pg ${CFLAGS} $<
+ gcc -fprofile-arcs -ftest-coverage -pg -g -Wall $<
./a.out a.dat > /dev/null
gcov -b $< > /dev/null
gprof a.out > $<.gprof
- gcc ${CFLAGS} $<
+ gcc -g -Wall $<
valgrind -v --leak-check=yes ./a.out a.dat > /dev/null 2> a.valgrind
grep ERROR a.valgrind
grep alloc a.valgrind
diff --git a/inflate.c b/inflate.c
@@ -4,9 +4,7 @@ TODO:
clever io
p9 hufftable
int types
- speedup:
- fillbits(s, n) -> faster getbits()
- loop unroll in decode_symbol
+ fillbits(s, n) ?
*/
typedef unsigned char uchar;
@@ -100,7 +98,7 @@ static void init_fixed_trees(void) {
dtree.symbol[i] = i;
}
-/* given an array of code lengths, build a huffman code tree */
+/* build huffman code tree from code lengths */
static void build_tree(HuffTree *t, const uchar *lens, uint n) {
int offs[HuffBits];
int i, sum;
@@ -150,19 +148,52 @@ static uint getbits(FlateStream *s, int n) {
/* decode a symbol from stream with tree */
static uint decode_symbol(FlateStream *s, HuffTree *t) {
- int sum = 0, cur = 0, len = 0;
-
- /* get bits while code value is above sum */
- do {
- len++;
- if (len >= HuffBits)
- /* error */;
- sum += t->count[len];
+ int sum = 0, cur = 0;
+ ushort *count = t->count + 1;
+ uint nbits = s->nbits;
+ uint bits = s->bits;
+
+ /* loop unroll, nbits, bits: approx 20% speedup */
+ while (nbits--) {
+ cur |= bits & 1;
+ bits >>= 1;
+ sum += *count;
+ cur -= *count;
+ if (cur < 0)
+ goto found;
cur <<= 1;
- cur |= getbit(s);
- cur -= t->count[len];
- } while (cur >= 0);
-
+ count++;
+ }
+ bits = *s->src++;
+ nbits = 8;
+ while (nbits--) {
+ cur |= bits & 1;
+ bits >>= 1;
+ sum += *count;
+ cur -= *count;
+ if (cur < 0)
+ goto found;
+ cur <<= 1;
+ count++;
+ }
+ bits = *s->src++;
+ nbits = 8;
+ while (nbits--) {
+ cur |= bits & 1;
+ bits >>= 1;
+ sum += *count;
+ cur -= *count;
+ if (cur < 0)
+ goto found_check;
+ cur <<= 1;
+ count++;
+ }
+found_check:
+ if (count >= t->count + HuffBits)
+ /* error */;
+found:
+ s->bits = bits;
+ s->nbits = nbits;
return t->symbol[sum + cur];
}
@@ -210,7 +241,7 @@ static void decode_trees(FlateStream *s, HuffTree *lt, HuffTree *dt) {
} else
/* error */;
}
- /* build dynamic trees */
+ /* build dynamic huffman trees */
build_tree(lt, lens, nlit);
build_tree(dt, lens + nlit, ndist);
}