flate

deflate implementation
git clone git://git.suckless.org/flate
Log | Files | Refs | README

commit c10b847118d0735b2f65b002110a781ac2e8f0a5
parent b598e8d90d3680b89058f1bdc63ff598461091af
Author: nsz <nszabolcs@gmail.com>
Date:   Sun, 23 Aug 2009 04:56:25 +0200

-avail +endpos +skip +fillwin mods (wip)
Diffstat:
Makefile | 4++--
crc.c | 2+-
deflate.c | 189++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
sflate.c | 6+++---
4 files changed, 127 insertions(+), 74 deletions(-)

diff --git a/Makefile b/Makefile @@ -30,8 +30,8 @@ profinf: inflate.c deflate.c sflate.c crc.c adler.c # grep alloc a.valgrind rm -f a.out a.valgrind *.gcno *.gcda gmon.out -profdef: inflate.c deflate.c sflate.c crc.c adler.c - gcc -O1 -fprofile-arcs -ftest-coverage -pg -g -Wall $+ +profdef: deflate.c inflate.c sflate.c crc.c adler.c + gcc -O0 -fprofile-arcs -ftest-coverage -pg -g -Wall $+ ./a.out <a.u >/dev/null gcov -b $+ >/dev/null gprof a.out >$<.gprof diff --git a/crc.c b/crc.c @@ -3,7 +3,7 @@ uint crctab[256]; void crc32init(void) { - static const uint poly = 0xEDB88320; + static const uint poly = 0xedb88320; int i,j; for (i = 0; i < 256; ++i) { diff --git a/deflate.c b/deflate.c @@ -2,6 +2,7 @@ #include <string.h> #include "flate.h" +#include <stdio.h> enum { CodeBits = 16, /* max number of bits in a code + 1 */ EOB = 256, /* end of block symbol */ @@ -18,12 +19,12 @@ enum { HashBits = 13, HashSize = 1 << HashBits, /* hash table size */ BigDist = 1 << 12, /* max match distance for short match length */ - MaxDist = WinSize - MaxMatch + 1, /* worst case available history */ - StartPos = WinSize - MaxMatch + 1, /* block start position */ - EndPos = 2*WinSize - MaxMatch + 1, /* block end position */ - DstSize = WinSize + MaxMatch + 6, /* worst case compressed block size */ + BlockSize = (1 << 15) - 234, /* --- */ + MaxDist = WinSize - MaxMatch, + WinBufSize = 2*WinSize + MaxMatch, + DstSize = BlockSize + MaxMatch + 6, /* worst case compressed block size */ LzSize = 1 << 13, /* lz buffer size */ - LzEnd = LzSize - 2, /* guard that lzbuf does not overflow in the next iteration */ + LzEnd = LzSize - 2, /* guard lzbuf not to overflow in the next iteration */ LzLitFlag = 1 << 15 /* marks literal run length in lz buffer */ }; @@ -42,7 +43,8 @@ typedef struct { int eof; /* end of input */ int pos; /* position in input win */ int startpos; /* block start pos in input win */ - int avail; /* available in input win */ + int endpos; /* end of available bytes in win */ + int skip; /* skipped hash chain updates (until next iter) */ Match prevm; /* previous (deferred) match */ uchar *src; /* input data (not yet in win) */ uchar *srcend; @@ -50,7 +52,7 @@ typedef struct { uchar *dstbegin; /* start position of unflushed data in dstbuf */ uint bits; int nbits; - uchar win[2*WinSize]; /* input window */ + uchar win[WinBufSize]; /* input window */ ushort head[HashSize]; /* position of hash chain heads */ ushort chain[WinSize]; /* hash chain */ LzCode lzbuf[LzSize]; /* literal run length, match len, match dist */ @@ -326,7 +328,7 @@ static void deflate_block(State *s) { uchar *p; int dynsize, fixsize, uncsize; int blocklen = s->pos - s->startpos; -/* int dyntree; */ + int dyntree; /* calc dynamic codes */ hufflens(llen, s->lfreq, Nlitlen, CodeBits-1); @@ -357,7 +359,7 @@ static void deflate_block(State *s) { if (c == 18) dynsize += 7; } -/* dyntree = dynsize - 3; */ + dyntree = dynsize - 3; for (lz = s->lzbuf, p = s->win + s->startpos; lz != s->lz; lz++) if (lz->bits & LzLitFlag) for (n = lz->n; n > 0; n--, p++) { @@ -380,7 +382,7 @@ static void deflate_block(State *s) { dynsize += llen[EOB]; /* emit block */ - putbits(s, s->eof, 1); + putbits(s, s->eof && s->pos == s->endpos, 1); if (dynsize < fixsize && dynsize < uncsize) { /* dynamic code */ putbits(s, 2, 2); @@ -415,9 +417,9 @@ static void deflate_block(State *s) { s->dst += blocklen; } /* -fprintf(stderr, "blen:%d [%d,%d] lzlen:%d dynlen:%d (tree:%d rate:%.3f) fixlen:%d (rate:%.3f) unclen:%d (rate:%.3f) avail:%d\n", +fprintf(stderr, "blen:%d [%d,%d] lzlen:%d dynlen:%d (tree:%d rate:%.3f) fixlen:%d (rate:%.3f) unclen:%d (rate:%.3f)\n", blocklen, s->startpos, s->pos, s->lz - s->lzbuf, dynsize, dyntree, dynsize/(float)blocklen, - fixsize, fixsize/(float)blocklen, uncsize, uncsize/(float)blocklen, s->avail); + fixsize, fixsize/(float)blocklen, uncsize, uncsize/(float)blocklen); */ } @@ -451,6 +453,7 @@ static void flushlit(State *s) { static void recordmatch(State *s, Match m) { int n; +/*fprintf(stderr, "m %d %d\n", m.len, m.dist);*/ flushlit(s); n = bisect(lenbase, Nlen, m.len); s->lz->n = n; @@ -466,6 +469,7 @@ static void recordmatch(State *s, Match m) { /* update literal run length */ static void recordlit(State *s, int c) { +/*fprintf(stderr, "l %c\n", c);*/ s->nlit++; s->lfreq[c]++; } @@ -477,46 +481,60 @@ static int gethash(uchar *p) { /* update hash chain at the current position */ static int updatechain(State *s) { - int h, i; - - if (s->avail < MinMatch) - return 0; - h = gethash(s->win + s->pos); - i = s->head[h]; - s->head[h] = s->pos; - if (i >= s->pos || s->pos - i > MaxDist) - i = 0; - s->chain[s->pos % WinSize] = i; - return i; + int hash, next = 0, p = s->pos, i = p; + + if (s->endpos - p < MinMatch) + p = s->endpos - MinMatch; + for (i -= s->skip; i <= p; i++) { + hash = gethash(s->win + i); + next = s->head[hash]; + s->head[hash] = i; + if (next >= i || i - next >= MaxDist) + next = 0; + s->chain[i % WinSize] = next; + } + s->skip = 0; + return next; } -/* find longest match, mpos is the next in the hash chain */ -static Match getmatch(State *s, int mpos) { +/* find longest match, next position in the hash chain is given */ +static Match getmatch(State *s, int next) { Match m = {0, MinMatch-1}; int len; - int maxlen = s->avail < MaxMatch ? s->avail : MaxMatch; int limit = s->pos - MaxDist; int chainlen = MaxChainLen; uchar *q; uchar *p = s->win + s->pos; - uchar *end = p + maxlen; + uchar *end = p + MaxMatch; do { - q = s->win + mpos; + q = s->win + next; +/*fprintf(stderr,"match: next:%d pos:%d limit:%d\n", next, s->pos, limit);*/ /* next match should be at least m.len+1 long */ if (q[m.len] != p[m.len] || q[m.len-1] != p[m.len-1] || q[0] != p[0]) continue; + /* loop unroll: MaxMatch % 10 == 8 */ +/* while (*++q == *++p && *++q == *++p + && *++q == *++p && *++q == *++p + && *++q == *++p && *++q == *++p + && *++q == *++p && ++p != end && *++q == *p + && *++q == *++p && *++q == *++p); +*/ while (++p != end && *++q == *p); - len = maxlen - (end - p); + len = MaxMatch - (end - p); p -= len; +/*fprintf(stderr,"match: len:%d dist:%d\n", len, s->pos - next);*/ if (len > m.len) { - m.dist = s->pos - mpos; + m.dist = s->pos - next; m.len = len; - if (len == maxlen) + if (len == MaxMatch) + return m; + if (s->pos + len >= s->endpos) { /* TODO: overflow */ + m.len = s->endpos - s->pos; return m; + } } - /* >= limit can be allowed except when limit == 0 */ - } while ((mpos = s->chain[mpos % WinSize]) > limit && --chainlen); + } while ((next = s->chain[next % WinSize]) > limit && --chainlen); if (m.len < MinMatch || (m.len == MinMatch && m.dist > BigDist)) m.len = 0; return m; @@ -533,27 +551,42 @@ static void startblock(State *s) { s->lfreq[EOB]++; } -/* if block should be ended then end it and shift input window */ -static int endblock(State *s) { +static int shiftwin(State *s) { int n; - if (s->pos >= EndPos || s->lz - s->lzbuf >= LzEnd || (s->eof && s->avail == 0)) { + if (s->startpos < WinSize) + return 0; + memmove(s->win, s->win + WinSize, WinBufSize - WinSize); + for (n = 0; n < HashSize; n++) + s->head[n] = s->head[n] > WinSize ? s->head[n] - WinSize : 0; + for (n = 0; n < WinSize; n++) { + s->chain[n] = s->chain[n] > WinSize ? s->chain[n] - WinSize : 0; +/* n++; + s->chain[n] = s->chain[n] > WinSize ? s->chain[n] - WinSize : 0; + n++; + s->chain[n] = s->chain[n] > WinSize ? s->chain[n] - WinSize : 0; + n++; + s->chain[n] = s->chain[n] > WinSize ? s->chain[n] - WinSize : 0; +*/ } + s->pos -= WinSize; + s->startpos -= WinSize; + s->endpos -= WinSize; + return 1; +} + +/* if block should be ended then end it and shift input window */ +static int endblock(State *s) { + if ((s->pos >= 2*WinSize && !shiftwin(s)) || + s->pos - s->startpos >= BlockSize || + s->lz - s->lzbuf >= LzEnd || + (s->eof && s->pos == s->endpos)) { /* deflate block */ flushlit(s); if (s->prevm.len) s->pos--; deflate_block(s); - if (s->eof) { + if (s->eof && s->pos == s->endpos) putbits(s, 0, 7); - return 1; - } - /* shift input window */ - memcpy(s->win, s->win + WinSize, WinSize); - for (n = 0; n < HashSize; n++) - s->head[n] = s->head[n] > WinSize ? s->head[n] - WinSize : 0; - for (n = 0; n < WinSize; n++) - s->chain[n] = s->chain[n] > WinSize ? s->chain[n] - WinSize : 0; - s->pos -= WinSize; return 1; } return 0; @@ -563,32 +596,44 @@ static int endblock(State *s) { static int fillwin(State *s) { int n, k; - /* s->avail < MaxMatch && s->pos < EndPos */ - if (!s->eof) { +/* todo: lzend */ + if (s->endpos < WinBufSize && !s->eof) { k = s->srcend - s->src; - n = 2*WinSize - s->pos - s->avail; /* s->avail + n >= MaxMatch */ + n = WinBufSize - s->endpos; if (k > n) k = n; - memcpy(s->win + s->pos + s->avail, s->src, k); + memcpy(s->win + s->endpos, s->src, k); s->src += k; - s->avail += k; - if (s->avail < MaxMatch) /* s->srcend == s->src */ + s->endpos += k; +/* +fprintf(stderr,"fill: pos:%d k:%d n:%d end:%d start:%d\n", s->pos, k, n, s->endpos, s->startpos); +*/ + if (s->endpos < WinBufSize) return 0; } return 1; } +static int calcguard(State *s) { + int p = s->endpos - MaxMatch; + int q = s->startpos + BlockSize; + + return p < q ? p : q; +} + /* deflate compress from s->src into s->dstbuf */ static int deflate_state(State *s) { Match m; - int head; + int next; + int guard; + LzCode *lzend; if (s->state == FlateIn) s->eof = s->src == s->srcend; else if (s->state == FlateOut) { if (s->dstbegin < s->dst) return (s->state = FlateOut); - if (s->eof) + if (s->eof && s->pos == s->endpos) return (s->state = FlateOk); startblock(s); if (s->prevm.len) @@ -596,32 +641,40 @@ static int deflate_state(State *s) { } else return s->state; + lzend = s->lzbuf + LzSize - 2; + guard = calcguard(s); +/*fprintf(stderr,"guard:%d pos:%d\n", guard, s->pos);*/ for (;;) { - if (s->avail < MaxMatch) { + if (s->pos >= guard || s->lz >= lzend) { +/*fprintf(stderr,"guard:%d pos:%d len:%d end:%d start:%d\n", guard, s->pos, s->pos - s->startpos, s->endpos, s->startpos);*/ if (endblock(s)) return (s->state = FlateOut); if (!fillwin(s)) return (s->state = FlateIn); + guard = calcguard(s); } - head = updatechain(s); - if (head) - m = getmatch(s, head); - if (head && m.len > s->prevm.len) { +/* +if (s->lz - s->lzbuf > LzEnd - 3) +fprintf(stderr, "near lzend: %d, len:%d\n", s->lz-s->lzbuf, s->pos - s->startpos); +*/ + next = updatechain(s); + if (next) +{ + m = getmatch(s, next); +/*fprintf(stderr, "pos:%d len:%d next:%d match:%d,%d\n", s->pos, s->pos - s->startpos, next, m.len, m.dist);*/ +} + if (next && m.len > s->prevm.len) { if (s->prevm.len) recordlit(s, s->win[s->pos-1]); s->prevm = m; } else if (s->prevm.len) { recordmatch(s, s->prevm); - s->prevm.len -= 2; - do { - s->pos++; - s->avail--; - updatechain(s); - } while (--s->prevm.len); + s->skip = s->prevm.len - 2; + s->prevm.len = 0; + s->pos += s->skip; } else recordlit(s, s->win[s->pos]); s->pos++; - s->avail--; } } @@ -650,9 +703,9 @@ static State *alloc_state(void) { s->state = FlateOut; s->src = s->srcend = 0; s->dst = s->dstbegin = s->dstbuf; - s->pos = s->startpos = StartPos; + s->pos = s->startpos = s->endpos = WinSize; s->eof = 0; - s->avail = 0; + s->skip = 0; s->prevm.len = 0; return s; } diff --git a/sflate.c b/sflate.c @@ -162,7 +162,7 @@ static uint dummysum(uchar *p, int n, uint sum) { int compress_stream(FILE *in, FILE *out) { FlateStream s; int k, n; - enum {Nin = 1<<13, Nout = 1<<15}; + enum {Nin = 1<<15, Nout = 1<<15}; s.in = malloc(Nin); s.out = malloc(Nout); @@ -218,7 +218,7 @@ int decompress_stream(FILE *in, FILE *out) { FlateStream s; uchar *begin; int k, n; - enum {Nin = 1<<13, Nout = 1<<15}; + enum {Nin = 1<<15, Nout = 1<<15}; s.in = begin = malloc(Nin); s.out = malloc(Nout); @@ -349,7 +349,7 @@ int main(int argc, char *argv[]) { break; } if (verbose == 'v') - fprintf(stderr, "in:%d out:%d checksum: 0x%08x (header:%d compressed len:%d footer:%d extra input bytes:%s)\n", + fprintf(stderr, "in:%d out:%d checksum: 0x%08x (header:%d data:%d footer:%d extra input:%s)\n", nin, nout, sum, headerlen, (comp == 'c' ? nout : nin) - headerlen - footerlen - extralen, footerlen, extralen ? "yes" : "no"); if (n != FlateOk)