flate

deflate implementation
git clone git://git.suckless.org/flate
Log | Files | Refs | README

commit afad3e61ac3e644dbe7e21ce0b60c8c9c6d8aaa1
parent 0a6feb60f9b8d1f591f15692fee108f043ed2fdf
Author: nsz <nszabolcs@gmail.com>
Date:   Wed, 13 May 2009 00:38:59 +0200

decode_block and decode_symbol optimizations
Diffstat:
Makefile | 8++++----
inflate.c | 94++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
2 files changed, 57 insertions(+), 45 deletions(-)

diff --git a/Makefile b/Makefile @@ -22,7 +22,7 @@ gcov: inflate.c gcov -b $< > /dev/null gprof a.out > $<.gprof gcc -g -Wall $< - cat d.dat | valgrind -v --leak-check=yes ./a.out > /dev/null 2> a.valgrind - grep ERROR a.valgrind - grep alloc a.valgrind - rm a.out a.valgrind *.gcno *.gcda gmon.out +# cat d.dat | valgrind -v --leak-check=yes ./a.out > /dev/null 2> a.valgrind +# grep ERROR a.valgrind +# grep alloc a.valgrind + rm -f a.out a.valgrind *.gcno *.gcda gmon.out diff --git a/inflate.c b/inflate.c @@ -267,21 +267,30 @@ static uint getbits(Stream *s, int n) { /* decode a symbol from stream with huff code */ static uint decode_symbol(Stream *s, Huff *huff) { - int sum, cur; - ushort *count; uint huffbits = huff->nbits; uint streambits = s->nbits; uint bits = s->bits; uint mask = (1 << huffbits) - 1; Entry entry; - /* avail src should be checked outside */ - while (streambits < huffbits) { - /* TODO: decode_symbol is performace bottleneck, do it faster */ - if (!checksrc(s)) - return 0; - bits |= (*s->src++ << streambits); - streambits += 8; + /* get enough bite efficiently */ + if (streambits < huffbits) { + if (s->src + 2 < s->srcend) { + /* we assume huffbits <= 9 */ + bits |= *s->src++ << streambits; + streambits += 8; + bits |= *s->src++ << streambits; + streambits += 8; + bits |= *s->src++ << streambits; + streambits += 8; + } else + /* rare case, we assume EOB length >= huffbits */ + do { + if (!checksrc(s)) + return 0; + bits |= *s->src++ << streambits; + streambits += 8; + } while (streambits < huffbits); } entry = huff->table[bits & mask]; if (entry.len > 0) { @@ -292,27 +301,30 @@ static uint decode_symbol(Stream *s, Huff *huff) { s->error = FlateCorrupted; return 0; } - - /* code is longer than huffbits: bitwise decode the rest */ s->bits = bits >> huffbits; s->nbits = streambits - huffbits; - cur = entry.sym; - sum = huff->sum; - count = huff->count + huffbits + 1; - for (;;) { - cur |= getbit(s); - sum += *count; - cur -= *count; - if (cur < 0) - break; - cur <<= 1; - count++; - if (count == huff->count + CodeBits) { - s->error = FlateCorrupted; - return 0; + /* code is longer than huffbits: bitwise decode the rest */ + { + int cur = entry.sym; + int sum = huff->sum; + /* TODO: count[0..huffbits] is never needed */ + ushort *count = huff->count + huffbits + 1; + + for (;;) { + cur |= getbit(s); + sum += *count; + cur -= *count; + if (cur < 0) + break; + cur <<= 1; + count++; + if (count == huff->count + CodeBits) { + s->error = FlateCorrupted; + return 0; + } } + return huff->symbol[sum + cur]; } - return huff->symbol[sum + cur]; } /* decode dynamic huffman code trees from stream */ @@ -377,19 +389,16 @@ static void decode_huffs(Stream *s) { /* decode a block of data from stream with trees */ static void decode_block(Stream *s, Huff *lhuff, Huff *dhuff) { - uint sym; - for (;;) { - sym = decode_symbol(s, lhuff); + uint sym = decode_symbol(s, lhuff); + if (s->error != FlateOk) return; - if (sym == 256) - return; if (sym < 256) { s->win[s->pos++] = sym; if (!checkpos(s)) return; - } else { + } else if (sym > 256) { uint len, dist; sym -= 257; @@ -412,33 +421,36 @@ static void decode_block(Stream *s, Huff *lhuff, Huff *dhuff) { if (s->pos + len <= WinSize) { uint pos = s->pos; - while (len >= 4) { + /* lenbase[sym] >= 3 */ + do { s->win[pos] = s->win[(pos - dist) & WinMask]; pos++; s->win[pos] = s->win[(pos - dist) & WinMask]; pos++; s->win[pos] = s->win[(pos - dist) & WinMask]; pos++; + len -= 3; + } while (len >= 3); + if (len--) { s->win[pos] = s->win[(pos - dist) & WinMask]; pos++; - len -= 4; - } - while (len--) { - s->win[pos] = s->win[(pos - dist) & WinMask]; - pos++; + if (len) { + s->win[pos] = s->win[(pos - dist) & WinMask]; + pos++; + } } s->pos = pos; if (!checkpos(s)) return; - } else { + } else while (len--) { s->win[s->pos] = s->win[(s->pos - dist) & WinMask]; s->pos++; if (!checkpos(s)) return; } - } - } + } else /* EOB: sym == 256 */ + return; } }