commit afad3e61ac3e644dbe7e21ce0b60c8c9c6d8aaa1
parent 0a6feb60f9b8d1f591f15692fee108f043ed2fdf
Author: nsz <nszabolcs@gmail.com>
Date: Wed, 13 May 2009 00:38:59 +0200
decode_block and decode_symbol optimizations
Diffstat:
Makefile | | | 8 | ++++---- |
inflate.c | | | 94 | ++++++++++++++++++++++++++++++++++++++++++++----------------------------------- |
2 files changed, 57 insertions(+), 45 deletions(-)
diff --git a/Makefile b/Makefile
@@ -22,7 +22,7 @@ gcov: inflate.c
gcov -b $< > /dev/null
gprof a.out > $<.gprof
gcc -g -Wall $<
- cat d.dat | valgrind -v --leak-check=yes ./a.out > /dev/null 2> a.valgrind
- grep ERROR a.valgrind
- grep alloc a.valgrind
- rm a.out a.valgrind *.gcno *.gcda gmon.out
+# cat d.dat | valgrind -v --leak-check=yes ./a.out > /dev/null 2> a.valgrind
+# grep ERROR a.valgrind
+# grep alloc a.valgrind
+ rm -f a.out a.valgrind *.gcno *.gcda gmon.out
diff --git a/inflate.c b/inflate.c
@@ -267,21 +267,30 @@ static uint getbits(Stream *s, int n) {
/* decode a symbol from stream with huff code */
static uint decode_symbol(Stream *s, Huff *huff) {
- int sum, cur;
- ushort *count;
uint huffbits = huff->nbits;
uint streambits = s->nbits;
uint bits = s->bits;
uint mask = (1 << huffbits) - 1;
Entry entry;
- /* avail src should be checked outside */
- while (streambits < huffbits) {
- /* TODO: decode_symbol is performace bottleneck, do it faster */
- if (!checksrc(s))
- return 0;
- bits |= (*s->src++ << streambits);
- streambits += 8;
+ /* get enough bite efficiently */
+ if (streambits < huffbits) {
+ if (s->src + 2 < s->srcend) {
+ /* we assume huffbits <= 9 */
+ bits |= *s->src++ << streambits;
+ streambits += 8;
+ bits |= *s->src++ << streambits;
+ streambits += 8;
+ bits |= *s->src++ << streambits;
+ streambits += 8;
+ } else
+ /* rare case, we assume EOB length >= huffbits */
+ do {
+ if (!checksrc(s))
+ return 0;
+ bits |= *s->src++ << streambits;
+ streambits += 8;
+ } while (streambits < huffbits);
}
entry = huff->table[bits & mask];
if (entry.len > 0) {
@@ -292,27 +301,30 @@ static uint decode_symbol(Stream *s, Huff *huff) {
s->error = FlateCorrupted;
return 0;
}
-
- /* code is longer than huffbits: bitwise decode the rest */
s->bits = bits >> huffbits;
s->nbits = streambits - huffbits;
- cur = entry.sym;
- sum = huff->sum;
- count = huff->count + huffbits + 1;
- for (;;) {
- cur |= getbit(s);
- sum += *count;
- cur -= *count;
- if (cur < 0)
- break;
- cur <<= 1;
- count++;
- if (count == huff->count + CodeBits) {
- s->error = FlateCorrupted;
- return 0;
+ /* code is longer than huffbits: bitwise decode the rest */
+ {
+ int cur = entry.sym;
+ int sum = huff->sum;
+ /* TODO: count[0..huffbits] is never needed */
+ ushort *count = huff->count + huffbits + 1;
+
+ for (;;) {
+ cur |= getbit(s);
+ sum += *count;
+ cur -= *count;
+ if (cur < 0)
+ break;
+ cur <<= 1;
+ count++;
+ if (count == huff->count + CodeBits) {
+ s->error = FlateCorrupted;
+ return 0;
+ }
}
+ return huff->symbol[sum + cur];
}
- return huff->symbol[sum + cur];
}
/* decode dynamic huffman code trees from stream */
@@ -377,19 +389,16 @@ static void decode_huffs(Stream *s) {
/* decode a block of data from stream with trees */
static void decode_block(Stream *s, Huff *lhuff, Huff *dhuff) {
- uint sym;
-
for (;;) {
- sym = decode_symbol(s, lhuff);
+ uint sym = decode_symbol(s, lhuff);
+
if (s->error != FlateOk)
return;
- if (sym == 256)
- return;
if (sym < 256) {
s->win[s->pos++] = sym;
if (!checkpos(s))
return;
- } else {
+ } else if (sym > 256) {
uint len, dist;
sym -= 257;
@@ -412,33 +421,36 @@ static void decode_block(Stream *s, Huff *lhuff, Huff *dhuff) {
if (s->pos + len <= WinSize) {
uint pos = s->pos;
- while (len >= 4) {
+ /* lenbase[sym] >= 3 */
+ do {
s->win[pos] = s->win[(pos - dist) & WinMask];
pos++;
s->win[pos] = s->win[(pos - dist) & WinMask];
pos++;
s->win[pos] = s->win[(pos - dist) & WinMask];
pos++;
+ len -= 3;
+ } while (len >= 3);
+ if (len--) {
s->win[pos] = s->win[(pos - dist) & WinMask];
pos++;
- len -= 4;
- }
- while (len--) {
- s->win[pos] = s->win[(pos - dist) & WinMask];
- pos++;
+ if (len) {
+ s->win[pos] = s->win[(pos - dist) & WinMask];
+ pos++;
+ }
}
s->pos = pos;
if (!checkpos(s))
return;
- } else {
+ } else
while (len--) {
s->win[s->pos] = s->win[(s->pos - dist) & WinMask];
s->pos++;
if (!checkpos(s))
return;
}
- }
- }
+ } else /* EOB: sym == 256 */
+ return;
}
}