flate

deflate implementation
git clone git://git.suckless.org/flate
Log | Files | Refs | README

commit 2dd25d387a3157e6e09d56db17ddc2a27b6d757e
parent cb1e87d4b17b038d4b65e0f9bc6ec3f4353d1394
Author: nsz <nszabolcs@gmail.com>
Date:   Sun, 26 Jul 2009 18:48:13 +0200

deflate implementation (forgot to add it earlier..)
Diffstat:
deflate.c | 676+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 676 insertions(+), 0 deletions(-)

diff --git a/deflate.c b/deflate.c @@ -0,0 +1,676 @@ +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +/* +TODO: + hufflen: parent+heap only + match: check backwards + block end guess with lcount, dcount + bisect len/distbase -> lookup table + xcode can be calculated into array xfreq + record good prev match + buffer window: ushort pos: let it overflow + rolling hash + save prevm instead roll back on block boundary + read until avail > MaxMatch + if read size == 0 then no more reads + +spec case tests: + empty block + huff overflow + all zero (rle) + dist = 32K +*/ + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; + +enum { + WinSize = 1 << 15, + RbufSize = 1 << 14, + RunlenFlag = 1 << 15, + MinMatch = 3, + MaxMatch = 258, + BigDist = 1 << 12, + MaxDist = WinSize - MaxMatch, + MaxChainLen = 256, + HashBits = 13, + HashSize = 1 << HashBits, + + CodeBits = 16, /* max number of bits in a code + 1 */ + EOB = 256, /* end of block symbol */ + Nlit = 256, /* number of lit codes */ + Nlen = 29, /* number of len codes */ + Nlitlen = Nlit + Nlen + 3, /* litlen codes + block end + 2 unused */ + Ndist = 30, /* number of distance codes */ + Nclen = 19 /* number of code length codes */ +}; + +typedef struct { + ushort dist; + ushort len; +} Match; + +typedef struct { + ushort n; + ushort bits; +} Runlen; + +/* TODO: alloc buffers on heap */ +typedef struct { + int pos; + int avail; + int blockstart; + int lastblock; + uchar *src; /* input (block start) */ + uchar *srcend; + uchar *dst; /* compressed output */ + uchar *dstend; + uint bits; + int nbits; + int lfreq[Nlitlen]; + int dfreq[Ndist]; + ushort head[HashSize]; /* position of hash chain heads */ + ushort chain[WinSize]; /* hash chains next */ + uchar win[2*WinSize]; + Runlen rbuf[RbufSize]; /* literal run length, match len, match dist */ + Runlen *rend; /* current pos in rbuf */ + int run; /* literal count */ +} State; + +static uchar fixllen[Nlitlen]; /* fixed lit/len huffman code tree */ +static ushort fixlcode[Nlitlen]; +static uchar fixdlen[Ndist]; /* fixed distance huffman code tree */ +static ushort fixdcode[Ndist]; + +/* base offset and extra bits tables */ +static uchar lenbits[Nlen] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 +}; +static ushort lenbase[Nlen] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258 +}; +static uchar distbits[Ndist] = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 +}; +static ushort distbase[Ndist] = { + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 +}; + +/* ordering of code lengths */ +static uchar clenorder[Nclen] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 +}; + + +static uint revcode(uint c, int n) { + int i; + uint r = 0; + + for (i = 0; i < n; i++) { + r = (r << 1) | (c & 1); + c >>= 1; + } + return r; +} + +/* build huffman code tree from code lengths */ +static void huffcodes(ushort *codes, const uchar *lens, int n) { + int c[CodeBits]; + int i, code, count; + + /* count code lengths and calc first code for each length */ + for (i = 0; i < CodeBits; i++) + c[i] = 0; + for (i = 0; i < n; i++) + c[lens[i]]++; + for (code = 0, i = 1; i < CodeBits; i++) { + count = c[i]; + c[i] = code; + code += count; + if (code > (1 << i)) + abort(); /* over-subscribed */ + code <<= 1; + } + if (code < (1 << i)) + /* incomplete */; + + for (i = 0; i < n; i++) + if (lens[i]) + codes[i] = revcode(c[lens[i]]++, lens[i]); + else + codes[i] = 0; +} + +static int heapparent(int n) {return (n - 2)/4 * 2;} +static int heapchild(int n) {return 2 * n + 2;} + +static int heappush(int *heap, int len, int w, int n) { + int p, c, tmp; + + c = len; + heap[len++] = n; + heap[len++] = w; + while (c > 0) { + p = heapparent(c); + if (heap[c+1] < heap[p+1]) { + tmp = heap[c]; heap[c] = heap[p]; heap[p] = tmp; + tmp = heap[c+1]; heap[c+1] = heap[p+1]; heap[p+1] = tmp; + c = p; + } else + break; + } + return len; +} + +static int heappop(int *heap, int len, int *w, int *n) { + int p, c, tmp; + + *n = heap[0]; + *w = heap[1]; + heap[1] = heap[--len]; + heap[0] = heap[--len]; + p = 0; + for (;;) { + c = heapchild(p); + if (c >= len) + break; + if (c+2 < len && heap[c+3] < heap[c+1]) + c += 2; + if (heap[p+1] > heap[c+1]) { + tmp = heap[p]; heap[p] = heap[c]; heap[c] = tmp; + tmp = heap[p+1]; heap[p+1] = heap[c+1]; heap[c+1] = tmp; + } else + break; + p = c; + } + return len; +} + +/* symbol frequencies -> code lengths (limited to 255) */ +static void hufflens(uchar *lens, int *freqs, int nsym, int limit) { + /* 2 <= nsym <= Nlitlen, log(nsym) <= limit <= CodeBits-1 */ + int parent[2*Nlitlen-1]; + int count[CodeBits]; + int heap[2*Nlitlen]; + int n, len, top, overflow; + int i, j; + int wi, wj; + + for (n = 0; n < limit+1; n++) + count[n] = 0; + for (len = n = 0; n < nsym; n++) + if (freqs[n] > 0) + len = heappush(heap, len, freqs[n], n); + else + lens[n] = 0; + /* deflate: fewer than two symbols: add new */ + for (n = 0; len < 4; n++) + if (freqs[n] == 0) + len = heappush(heap, len, ++freqs[n], n); + /* build code tree */ + top = len; + for (n = nsym; len > 2; n++) { + len = heappop(heap, len, &wi, &i); + len = heappop(heap, len, &wj, &j); + parent[i] = n; + parent[j] = n; + len = heappush(heap, len, wi + wj, n); + /* keep an ordered list of nodes at the end */ + heap[len+1] = i; + heap[len] = j; + } + /* calc code lengths (deflate: with limit) */ + overflow = 0; + parent[--n] = 0; + for (i = 2; i < top; i++) { + n = heap[i]; + if (n >= nsym) { + /* overwrite parent index with length */ + parent[n] = parent[parent[n]] + 1; + if (parent[n] > limit) + overflow++; + } else { + lens[n] = parent[parent[n]] + 1; + if (lens[n] > limit) { + lens[n] = limit; + overflow++; + } + count[lens[n]]++; + } + } + if (overflow == 0) + return; + /* modify code tree to fix overflow (from zlib) */ + while (overflow > 0) { + for (n = limit-1; count[n] == 0; n--); + count[n]--; + count[n+1] += 2; + count[limit]--; + overflow -= 2; + } + for (len = limit; len > 0; len--) + for (i = count[len]; i > 0;) { + n = heap[--top]; + if (n < nsym) { + lens[n] = len; + i--; + } + } +} + +/* output n (<= 16) bits */ +static void putbits(State *s, uint bits, int n) { + s->bits |= bits << s->nbits; + s->nbits += n; + while (s->nbits >= 8) { + *s->dst++ = s->bits & 0xff; + s->bits >>= 8; + s->nbits -= 8; + } +} + +static int clencodes(int *codes, int *extra, uchar *llen, int nlit, uchar *dlen, int ndist) { + int i, c, r, rr; + int n = 0; + + for (i = 0; i < nlit; i++) + codes[i] = llen[i]; + for (i = 0; i < ndist; i++) + codes[nlit + i] = dlen[i]; + for (i = 0; i < nlit + ndist;) { + /* run length encoding */ + c = codes[i]; + for (r = 1; i + r < nlit + ndist && codes[i + r] == c; r++); + i += r; + if (c == 0) { + while (r >= 11) { + rr = r > 138 ? 138 : r; + codes[n] = 18; + extra[n++] = rr - 11; + r -= rr; + } + if (r >= 3) { + codes[n] = 17; + extra[n++] = r - 3; + r = 0; + } + } + while (r--) { + codes[n++] = c; + while (r >= 3) { + rr = r > 6 ? 6 : r; + codes[n] = 16; + extra[n++] = rr - 3; + r -= rr; + } + } + } + return n; +} + +static void putblock(State *s, ushort *lcode, uchar *llen, ushort *dcode, uchar *dlen) { + int n; + Runlen *r; + uchar *pc; + + for (r = s->rbuf, pc = s->win + s->blockstart; r != s->rend; r++) + if (r->bits & RunlenFlag) + for (n = r->n; n > 0; n--, pc++) + putbits(s, lcode[*pc], llen[*pc]); + else { + pc += lenbase[r->n] + r->bits; + putbits(s, lcode[Nlit + r->n + 1], llen[Nlit + r->n + 1]); + putbits(s, r->bits, lenbits[r->n]); + r++; + putbits(s, dcode[r->n], dlen[r->n]); + putbits(s, r->bits, distbits[r->n]); + } + putbits(s, lcode[EOB], llen[EOB]); +} + +static void deflate_block(State *s, int blocklen) { + int cfreq[Nclen]; + uchar llen[Nlitlen], dlen[Ndist], clen[Nclen]; + ushort lcode[Nlitlen], dcode[Ndist], ccode[Nclen]; + int codes[Nlitlen + Ndist], extra[Nlitlen + Ndist]; + int i, c, n, ncodes; + int nlit, ndist, nclen; + int dynsize, fixsize, uncsize; + Runlen *r; + uchar *pc; +int dyntree; + + /* calc dynamic codes */ + hufflens(llen, s->lfreq, Nlitlen, CodeBits-1); + hufflens(dlen, s->dfreq, Ndist, CodeBits-1); + huffcodes(lcode, llen, Nlitlen); + huffcodes(dcode, dlen, Ndist); + for (nlit = Nlitlen; nlit > Nlit && llen[nlit-1] == 0; nlit--); + for (ndist = Ndist; ndist > 1 && dlen[ndist-1] == 0; ndist--); + ncodes = clencodes(codes, extra, llen, nlit, dlen, ndist); + memset(cfreq, 0, sizeof(cfreq)); + for (i = 0; i < ncodes; i++) + cfreq[codes[i]]++; + hufflens(clen, cfreq, Nclen, 7); + huffcodes(ccode, clen, Nclen); + for (nclen = Nclen; nclen > 4 && clen[clenorder[nclen-1]] == 0; nclen--); + + /* calc size */ + uncsize = 3 + 16 + 8 * blocklen + (16 - 3 - s->nbits) % 8; /* byte aligned */ + fixsize = 3; + dynsize = 3 + 5 + 5 + 4 + 3 * nclen; + for (i = 0; i < ncodes; i++) { + c = codes[i]; + dynsize += clen[c]; + if (c == 16) + dynsize += 2; + if (c == 17) + dynsize += 3; + if (c == 18) + dynsize += 7; + } +dyntree = dynsize - 3; + for (r = s->rbuf, pc = s->win + s->blockstart; r != s->rend; r++) + if (r->bits & RunlenFlag) + for (n = r->n; n > 0; n--, pc++) { + fixsize += fixllen[*pc]; + dynsize += llen[*pc]; + } + else { + pc += lenbase[r->n] + r->bits; + fixsize += fixllen[Nlit + r->n + 1]; + dynsize += llen[Nlit + r->n + 1]; + fixsize += lenbits[r->n]; + dynsize += lenbits[r->n]; + r++; + fixsize += fixdlen[r->n]; + dynsize += dlen[r->n]; + fixsize += distbits[r->n]; + dynsize += distbits[r->n]; + } + fixsize += fixllen[EOB]; + dynsize += llen[EOB]; + + /* emit block */ + putbits(s, s->lastblock, 1); + if (dynsize < fixsize && dynsize < uncsize) { + /* dynamic code */ + putbits(s, 2, 2); + putbits(s, nlit - 257, 5); + putbits(s, ndist - 1, 5); + putbits(s, nclen - 4, 4); + for (i = 0; i < nclen; i++) + putbits(s, clen[clenorder[i]], 3); + for (i = 0; i < ncodes; i++) { + c = codes[i]; + putbits(s, ccode[c], clen[c]); + if (c == 16) + putbits(s, extra[i], 2); + if (c == 17) + putbits(s, extra[i], 3); + if (c == 18) + putbits(s, extra[i], 7); + } + putblock(s, lcode, llen, dcode, dlen); + } else if (fixsize < uncsize) { + /* fixed code */ + putbits(s, 1, 2); + putblock(s, fixlcode, fixllen, fixdcode, fixdlen); + } else { + /* uncompressed */ + putbits(s, 0, 2); + putbits(s, 0, 7); /* align to byte boundary */ + s->nbits = 0; + putbits(s, blocklen, 16); + putbits(s, ~blocklen & 0xffff, 16); + memcpy(s->dst, s->win + s->blockstart, blocklen); + s->dst += blocklen; + } +fprintf(stderr, "blocklen: %d lzlen: %d dynsize: %d (tree: %d, rate: %.3f) fixsize: %d (rate: %.3f) uncsize: %d (rate: %.3f)\n", blocklen, s->rend - s->rbuf, + dynsize, dyntree, dynsize/(float)blocklen, fixsize, fixsize/(float)blocklen, uncsize, uncsize/(float)blocklen); +} + +static int bisect(ushort *base, int len, int n) { + int lo = 0; + int hi = len; + int k; + + while (lo < hi) { + k = (lo + hi) / 2; + if (n < base[k]) + hi = k; + else + lo = k + 1; + } + return lo - 1; +} + +static void flushlit(State *s) { + if (s->run) { + s->rend->bits = RunlenFlag; + s->rend->n = s->run; + s->rend++; + s->run = 0; + } +} + +static void recordmatch(State *s, Match m) { + int n; + + flushlit(s); +/*fprintf(stderr, "len:%u dist:%u\n", m.len, m.dist);*/ + n = bisect(lenbase, Nlen, m.len); + s->rend->n = n; + s->rend->bits = m.len - lenbase[n]; + s->rend++; + s->lfreq[Nlit + n + 1]++; + n = bisect(distbase, Ndist, m.dist); + s->rend->n = n; + s->rend->bits = m.dist - distbase[n]; + s->rend++; + s->dfreq[n]++; +} + +static void recordlit(State *s, int c) { +/*fprintf(stderr, "lit:%c\n", c);*/ + s->run++; + s->lfreq[c]++; +} + +/* multiplicative hash */ +static uint gethash(uchar *p) { + return (0x9e3779b1 * ((p[0]<<16) + (p[1]<<8) + p[2]) >> (32 - HashBits)) % HashSize; +} + +static ushort addtochain(State *s, ushort pos) { + ushort i, h; + + if (s->avail < MinMatch) /* TODO */ + return 0; + h = gethash(s->win + pos); /* TODO: rolling hash */ + i = s->head[h]; + s->head[h] = pos; + s->chain[pos % WinSize] = i; + return i; +} + +static Match getmatch(State *s, ushort pos, ushort mpos) { + Match m = {0, MinMatch-1}; + uchar *q; + uchar *p = s->win + pos; + uchar *end = p + MaxMatch; + ushort len; + ushort maxlen = s->avail < MaxMatch ? s->avail : MaxMatch; + ushort limit = pos - MaxDist; + ushort chainlen = MaxChainLen; +/* +uint h; +h = gethash(src); +if (h != gethash(src - dist)) +fprintf(stderr, "i %d, dist %d delta %d curpos %d dsrc %d h: %d head %d\n", i, dist, c.next, (ushort)(long)src, src - s->src, h, s->head[h]); +*/ + do { + q = s->win + mpos; + /* at least m.len+1 long */ + if (q[m.len] != p[m.len] || q[0] != p[0]) + continue; + /* loop unroll: MaxMatch % 10 == 8 */ + while (*++q == *++p && *++q == *++p + && *++q == *++p && *++q == *++p + && *++q == *++p && *++q == *++p + && *++q == *++p && ++p != end && *++q == *p + && *++q == *++p && *++q == *++p); + len = MaxMatch - (end - p); + p -= len; + if (len > m.len) { + m.dist = pos - mpos; + m.len = len; + if (len == maxlen) + return m; + } + } while ((mpos = s->chain[mpos % WinSize]) > limit && --chainlen); + if (m.len < MinMatch || (m.len == MinMatch && m.dist > BigDist)) + m.len = 0; + return m; +} + +static void deflate_init(State *s) { + int i; + + memset(s->chain, 0, sizeof(s->chain)); + memset(s->head, 0, sizeof(s->head)); + s->bits = s->nbits = s->lastblock = 0; + for (i = 0; i < 144; i++) + fixllen[i] = 8; + for (; i < 256; i++) + fixllen[i] = 9; + for (; i < 280; i++) + fixllen[i] = 7; + for (; i < Nlitlen; i++) + fixllen[i] = 8; + for (i = 0; i < Ndist; i++) + fixdlen[i] = 5; + huffcodes(fixlcode, fixllen, Nlitlen); + huffcodes(fixdcode, fixdlen, Ndist); +} + +static void newblock(State *s) { + s->rend = s->rbuf; + s->run = 0; + memset(s->lfreq, 0, sizeof(s->lfreq)); + memset(s->dfreq, 0, sizeof(s->dfreq)); + s->lfreq[EOB]++; +} + +static void fillwin(State *s) { + int n, k; + + if (s->pos > WinSize + MaxDist) { + /* shift win */ + memcpy(s->win, s->win + WinSize, WinSize); + for (n = HashSize; n--;) + s->head[n] = s->head[n] > WinSize ? s->head[n] - WinSize : 0; + for (n = WinSize; n--;) + s->chain[n] = s->chain[n] > WinSize ? s->chain[n] - WinSize : 0; + s->blockstart -= WinSize; + s->pos -= WinSize; + } + k = 2*WinSize - s->pos - s->avail; + n = s->srcend - s->src; + if (n > k) + n = k; + if (n == 0) + return; + memcpy(s->win + s->pos + s->avail, s->src, n); + s->src += n; + s->avail += n; +} + +/* TODO: pos overflow */ +int deflate(uchar *src, int len, uchar *dst) { + State s; + Match m; + Match prevm = {0, 0}; + int pos; + int head; + + deflate_init(&s); + newblock(&s); + s.src = src; + s.srcend = src + len; + s.dst = dst; + s.dstend = dst + len + len/4 + 10; /* TODO */ + pos = s.pos = s.blockstart = WinSize; + s.avail = 0; + do { + if (s.avail < MaxMatch || s.rend - s.rbuf >= RbufSize - 2) { +/*fprintf(stderr, "pos:%d bstart:%d avail:%d rsiz:%d\n", pos, s.blockstart, s.avail, s.rend - s.rbuf);*/ + if (pos - s.blockstart > MaxDist || s.rend - s.rbuf >= RbufSize - 2) { + flushlit(&s); + if (prevm.len) { + pos--; + s.avail++; + prevm.len = 0; + } + deflate_block(&s, pos - s.blockstart); + newblock(&s); + s.blockstart = pos; + } + s.pos = pos; + fillwin(&s); + pos = s.pos; + } + head = addtochain(&s, pos); + if (head >= pos || pos - head >= MaxDist) + head = 0; + if (head) + m = getmatch(&s, pos, head); + if (head && m.len > prevm.len) { + if (prevm.len) + recordlit(&s, s.win[pos-1]); + prevm = m; + } else if (prevm.len) { + recordmatch(&s, prevm); + prevm.len -= 2; + do { + pos++; + s.avail--; + addtochain(&s, pos); + } while (--prevm.len); + } else + recordlit(&s, s.win[pos]); + pos++; + s.avail--; + + } while (s.avail); + flushlit(&s); + s.lastblock = 1; + deflate_block(&s, pos - s.blockstart); + putbits(&s, 0, 7); + return s.dst - dst; +} + + +#include <stdlib.h> +#include <stdio.h> + +enum {Siz = 1<<24}; + +int main() { + uchar *src; + uchar *dst; + int len; + + src = malloc(Siz); + dst = malloc(Siz); + len = fread(src, 1, Siz, stdin); + len = deflate(src, len, dst); + fwrite(dst, 1, len, stdout); + fprintf(stderr, "compressed: %d\n", len); + free(dst); + free(src); + return 0; +}