flate

deflate implementation
git clone git://git.suckless.org/flate
Log | Files | Refs | README

commit 1e706566a0e7dacd026a988cc2f659e5a6657919
parent 21159561f3409a46a0949d92fc131f18784271b5
Author: nsz <nszabolcs@gmail.com>
Date:   Wed,  8 Jul 2009 07:50:28 +0200

deflate_simple simplification wip
Diffstat:
deflate_simple.c | 378++++++++++++++++++++++++++++++++-----------------------------------------------
1 file changed, 155 insertions(+), 223 deletions(-)

diff --git a/deflate_simple.c b/deflate_simple.c @@ -86,91 +86,6 @@ static uchar clenorder[Nclen] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; -static short gethash(uchar *p) { - return (257 * p[0] + 263 * p[1] + 269 * p[2]) % HashSize; -} - -static Match getmatch(State *s, uchar *src, int len) { - int i, j; - int pos; - int dist; - int lastdist = 0; - Match m = {0, 0}; - - if (len >= MinMatch) { - pos = s->head[gethash(src)]; - for (i = 0; i < MaxChainStep; i++) { - dist = (WinSize + s->pos - pos) % WinSize; - if (dist <= lastdist) - break; - lastdist = dist; - for (j = 0; j < len; j++) - if (src[j] != src[j - dist]) - break; - if (j > m.len) { - m.dist = dist; - m.len = j; - } - pos = s->chain[pos]; - } - if (m.len < MinMatch) - m.len = 0; - } - return m; -} - -static void emitlit(State *s, ushort lit); -static void emitmatch(State *s, Match m); - -static void lzcompress(State *s, uchar *src, int len) { - int step; - int hash; - Match m; - Match prevm = {0, 0}; - uchar prevc = '\0'; - - while (len > 0) { - m = getmatch(s, src, len); - if (m.len > 0) - if (prevm.len > 0) { - if (m.len > prevm.len + 1) { - emitlit(s, prevc); - prevm = m; - prevc = *src; - step = 1; - } else { - emitmatch(s, prevm); - step = prevm.len - 1; - prevm.len = 0; - } - } else { - prevm = m; - prevc = *src; - step = 1; - } - else - if (prevm.len > 0) { - emitmatch(s, prevm); - step = prevm.len - 1; - prevm.len = 0; - } else { - emitlit(s, *src); - step = 1; - } - while (step > 0) { - if (len >= MinMatch) { - hash = gethash(src); - s->chain[s->pos] = s->head[hash]; - s->head[hash] = s->pos; - s->pos = (s->pos + 1) % WinSize; - } - src++; - len--; - step--; - } - } -} - static uint revcode(uint c, int n) { int i; uint r = 0; @@ -209,173 +124,127 @@ static void huffcodes(ushort *codes, const uchar *lens, int n) { codes[i] = 0; } -/* min-heap for hufflen() */ +/* min-heap for hufflens() */ -typedef struct { - int k; - int v; -} HeapItem; - -static int heapparent(int n) {return (n - 1)/2;} -static int heapleft(int n) {return 2 * n + 1;} -static int heapright(int n) {return 2 * n + 2;} - -static void heappush(HeapItem *heap, int len, int k, int v) { - int n, p; - HeapItem tmp; - - heap[len].k = k; - heap[len].v = v; - n = len; - while (n > 0) { - p = heapparent(n); - if (heap[n].k < heap[p].k) { - tmp = heap[n]; - heap[n] = heap[p]; - heap[p] = tmp; - n = p; +static int heapparent(int n) {return (n - 2)/4 * 2;} +static int heapchild(int n) {return 2 * n + 2;} + +static int heappush(int *heap, int len, int w, int n) { + int p, c, tmp; + + c = len; + heap[len++] = n; + heap[len++] = w; + while (c > 0) { + p = heapparent(c); + if (heap[c+1] < heap[p+1]) { + tmp = heap[c]; heap[c] = heap[p]; heap[p] = tmp; + tmp = heap[c+1]; heap[c+1] = heap[p+1]; heap[p+1] = tmp; + c = p; } else break; } + return len; } -static void heappop(HeapItem *heap, int len, int *k, int *v) { - int n, lc, rc, c; - HeapItem tmp; +static int heappop(int *heap, int len, int *w, int *n) { + int p, c, tmp; - *k = heap[0].k; - *v = heap[0].v; + *n = heap[0]; + *w = heap[1]; + heap[1] = heap[--len]; heap[0] = heap[--len]; - n = 0; + p = 0; for (;;) { - lc = heapleft(n); - rc = heapright(n); - if (lc >= len) + c = heapchild(p); + if (c >= len) break; - else if (rc >= len || heap[lc].k < heap[rc].k) - c = lc; - else - c = rc; - if (heap[n].k > heap[c].k) { - tmp = heap[n]; - heap[n] = heap[c]; - heap[c] = tmp; + if (c+2 < len && heap[c+3] < heap[c+1]) + c += 2; + if (heap[p+1] > heap[c+1]) { + tmp = heap[p]; heap[p] = heap[c]; heap[c] = tmp; + tmp = heap[p+1]; heap[p+1] = heap[c+1]; heap[c+1] = tmp; } else break; - n = c; + p = c; } + return len; } /* symbol frequencies -> code lengths (limited to 255) */ -static void hufflens(uchar *lens, const int *freqs, int nsym) { +static void hufflens(uchar *lens, int *freqs, int nsym, int limit) { + /* 2 <= nsym <= Nlitlen, log(nsym) <= limit <= CodeBits-1 */ int parent[2*Nlitlen-1]; - int clen[2*Nlitlen-1]; - HeapItem heap[Nlitlen]; - int n, len; + int count[CodeBits]; + int heap[2*Nlitlen]; + int n, len, top, overflow; int i, j; - int fi, fj; + int wi, wj; - if (nsym > Nlitlen) - abort(); + for (n = 0; n < limit+1; n++) + count[n] = 0; for (len = n = 0; n < nsym; n++) if (freqs[n] > 0) - heappush(heap, len++, freqs[n], n); + len = heappush(heap, len, freqs[n], n); + else + lens[n] = 0; + /* deflate: fewer than two symbols: add new */ + for (n = 0; len < 4; n++) + if (freqs[n] == 0) + len = heappush(heap, len, ++freqs[n], n); /* build code tree */ - while (len > 1) { - heappop(heap, len--, &fi, &i); - heappop(heap, len--, &fj, &j); + top = len; + for (n = nsym; len > 2; n++) { + len = heappop(heap, len, &wi, &i); + len = heappop(heap, len, &wj, &j); parent[i] = n; parent[j] = n; - heappush(heap, len++, fi + fj, n++); + len = heappush(heap, len, wi + wj, n); + /* keep an ordered list of nodes at the end */ + heap[len+1] = i; + heap[len] = j; } - /* calc code length */ - clen[--n] = 0; - while (n--) - if (n >= nsym || freqs[n]) - clen[n] = 1 + clen[parent[n]]; - else - clen[n] = 0; - /* limit code length to 255 */ - for (n = 0; n < nsym; n++) - lens[n] = clen[n] > 255 ? 255 : clen[n]; -} - -/* F_n if n > 0 */ -static int fib(int n) { - int a, b, tmp; - - for (a = 0, b = 1; n > 1; n--) { - tmp = a; - a = b; - b += tmp; - } - return b; -} - -/* symbol frequencies -> (limited) code lengths */ -static void hufflenslimit(uchar *lens, int *freqs, int nsym, int limit) { - uint minfreq, sumfreq; - int i, adjust, maxprob, count; - - /* TODO: fewer than two symbols: add new */ - if (nsym < 2) - abort(); - count = 0; - for (i = 0; i < nsym; i++) - if (freqs[i] > 0) - count++; - if (count < 2) - for (i = 0; count > 0 && i < nsym; i++) - if (freqs[i] == 0) { - freqs[i] = 1; - count--; /* what? */ + /* calc code lengths (deflate: with limit) */ + overflow = 0; + parent[--n] = 0; + for (i = 2; i < top; i++) { + n = heap[i]; + if (n >= nsym) { + /* overwrite parent index with length */ + parent[n] = parent[parent[n]] + 1; + if (parent[n] > limit) + overflow++; + } else { + lens[n] = parent[parent[n]] + 1; + if (lens[n] > limit) { + lens[n] = limit; + overflow++; } - /* try building the codes without limit */ - hufflens(lens, freqs, nsym); - for (i = 0; i < nsym; i++) - if (lens[i] > limit) - break; - if (i == nsym) + count[lens[n]]++; + } + } + if (overflow == 0) return; - /* - * code length can be N bits or more only if the probability of a - * symbol is less than the reciprocal of the (N+2)th fibonacci number - * - * if hufflens() returned a long code, we add a constant to all the - * symbol frequencies and call hufflens() again - */ - maxprob = fib(limit + 3); - sumfreq = 0; - minfreq = (uint)-1; - for (i = 0; i < nsym; i++) - if (freqs[i]) { - if (minfreq > freqs[i]) - minfreq = freqs[i]; - sumfreq += freqs[i]; + /* modify code tree to fix overflow (from zlib) */ + while (overflow > 0) { + for (n = limit-1; count[n] == 0; n--); + count[n]--; + count[n+1] += 2; + count[limit]--; + overflow -= 2; + } + for (len = limit; len > 0; len--) + for (i = count[len]; i > 0;) { + n = heap[--top]; + if (n < nsym) { + lens[n] = len; + i--; + } } - if (minfreq > sumfreq / maxprob) - abort(); - /* - * adjust is the smallest integer such that - * (sumfreq + count * adjust)/(minfreq + adjust) < maxprob - */ - i = maxprob - count; - adjust = (sumfreq - minfreq * maxprob + i - 1) / i; - for (i = 0; i < nsym; i++) - if (freqs[i]) - freqs[i] += adjust; - /* rebuild the hufftree */ - hufflens(lens, freqs, nsym); - for (i = 0; i < nsym; i++) - if (lens[i] > limit) - abort(); } /* adding < 16 bits to dst */ static void outbits(State *s, uint bits, int nbits) { -/* -fprintf(stderr, "b:%3d n:%3d\n", bits, nbits); -*/ s->bits |= bits << s->nbits; s->nbits += nbits; while (s->nbits >= 8) { @@ -484,8 +353,8 @@ static void block(State *s, int dynlen, int fixlen) { else if (sym.type == SymDist) dfreq[sym.n]++; } - hufflenslimit(llen, lfreq, Nlitlen, CodeBits-1); - hufflenslimit(dlen, dfreq, Ndist, CodeBits-1); + hufflens(llen, lfreq, Nlitlen, CodeBits-1); + hufflens(dlen, dfreq, Ndist, CodeBits-1); huffcodes(lcode, llen, Nlitlen); huffcodes(dcode, dlen, Ndist); for (nlit = Nlitlen; nlit > Nlit && llen[nlit-1] == 0; nlit--); @@ -498,7 +367,7 @@ static void block(State *s, int dynlen, int fixlen) { if (sym.type == SymClen) cfreq[sym.n]++; } - hufflenslimit(clen, cfreq, Nclen, 7); + hufflens(clen, cfreq, Nclen, 7); huffcodes(ccode, clen, Nclen); for (nclen = Nclen; nclen > 4 && clen[clenorder[nclen-1]] == 0; nclen--); @@ -743,6 +612,65 @@ count++; } } +static short gethash(uchar *p) { + return (257 * p[0] + 263 * p[1] + 269 * p[2]) % HashSize; +} + +static Match getmatch(State *s, uchar *src, int len) { + int i, j; + int pos; + int dist; + int lastdist = 0; + Match m = {0, 0}; + + if (len < MinMatch) + return m; + pos = s->head[gethash(src)]; + for (i = 0; i < MaxChainStep; i++) { + dist = (WinSize + s->pos - pos) % WinSize; + if (dist <= lastdist) + break; + lastdist = dist; + for (j = 0; j < len; j++) + if (src[j] != src[j - dist]) + break; + if (j > m.len) { + m.dist = dist; + m.len = j; + } + pos = s->chain[pos]; + } + return m; +} + +static void lzcompress(State *s, uchar *src, int len) { + int step; + int hash; + Match m; + + while (len > 0) { + m = getmatch(s, src, len); + if (m.len >= MinMatch) { + emitmatch(s, m); + step = m.len; + } else { + emitlit(s, *src); + step = 1; + } + while (step > 0) { + if (len >= MinMatch) { + hash = gethash(src); + s->chain[s->pos] = s->head[hash]; + s->head[hash] = s->pos; + s->pos = (s->pos + 1) % WinSize; + } + src++; + len--; + step--; + } + } +} + static void lzinit(State *s) { int i; @@ -796,11 +724,15 @@ int main() { uchar *dst; int len; + src = malloc(Siz); dst = malloc(Siz); len = fread(src, 1, Siz, stdin); len = deflate(src, len, dst); fwrite(dst, 1, len, stdout); fprintf(stderr, "compressed: %d\n", len); + free(dst); + free(src); + return 0; }