commit 2dd25d387a3157e6e09d56db17ddc2a27b6d757e
parent cb1e87d4b17b038d4b65e0f9bc6ec3f4353d1394
Author: nsz <nszabolcs@gmail.com>
Date: Sun, 26 Jul 2009 18:48:13 +0200
deflate implementation (forgot to add it earlier..)
Diffstat:
deflate.c | | | 676 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 676 insertions(+), 0 deletions(-)
diff --git a/deflate.c b/deflate.c
@@ -0,0 +1,676 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+/*
+TODO:
+ hufflen: parent+heap only
+ match: check backwards
+ block end guess with lcount, dcount
+ bisect len/distbase -> lookup table
+ xcode can be calculated into array xfreq
+ record good prev match
+ buffer window: ushort pos: let it overflow
+ rolling hash
+ save prevm instead roll back on block boundary
+ read until avail > MaxMatch
+ if read size == 0 then no more reads
+
+spec case tests:
+ empty block
+ huff overflow
+ all zero (rle)
+ dist = 32K
+*/
+
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+
+enum {
+ WinSize = 1 << 15,
+ RbufSize = 1 << 14,
+ RunlenFlag = 1 << 15,
+ MinMatch = 3,
+ MaxMatch = 258,
+ BigDist = 1 << 12,
+ MaxDist = WinSize - MaxMatch,
+ MaxChainLen = 256,
+ HashBits = 13,
+ HashSize = 1 << HashBits,
+
+ CodeBits = 16, /* max number of bits in a code + 1 */
+ EOB = 256, /* end of block symbol */
+ Nlit = 256, /* number of lit codes */
+ Nlen = 29, /* number of len codes */
+ Nlitlen = Nlit + Nlen + 3, /* litlen codes + block end + 2 unused */
+ Ndist = 30, /* number of distance codes */
+ Nclen = 19 /* number of code length codes */
+};
+
+typedef struct {
+ ushort dist;
+ ushort len;
+} Match;
+
+typedef struct {
+ ushort n;
+ ushort bits;
+} Runlen;
+
+/* TODO: alloc buffers on heap */
+typedef struct {
+ int pos;
+ int avail;
+ int blockstart;
+ int lastblock;
+ uchar *src; /* input (block start) */
+ uchar *srcend;
+ uchar *dst; /* compressed output */
+ uchar *dstend;
+ uint bits;
+ int nbits;
+ int lfreq[Nlitlen];
+ int dfreq[Ndist];
+ ushort head[HashSize]; /* position of hash chain heads */
+ ushort chain[WinSize]; /* hash chains next */
+ uchar win[2*WinSize];
+ Runlen rbuf[RbufSize]; /* literal run length, match len, match dist */
+ Runlen *rend; /* current pos in rbuf */
+ int run; /* literal count */
+} State;
+
+static uchar fixllen[Nlitlen]; /* fixed lit/len huffman code tree */
+static ushort fixlcode[Nlitlen];
+static uchar fixdlen[Ndist]; /* fixed distance huffman code tree */
+static ushort fixdcode[Ndist];
+
+/* base offset and extra bits tables */
+static uchar lenbits[Nlen] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
+};
+static ushort lenbase[Nlen] = {
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
+};
+static uchar distbits[Ndist] = {
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
+};
+static ushort distbase[Ndist] = {
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
+};
+
+/* ordering of code lengths */
+static uchar clenorder[Nclen] = {
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
+};
+
+
+static uint revcode(uint c, int n) {
+ int i;
+ uint r = 0;
+
+ for (i = 0; i < n; i++) {
+ r = (r << 1) | (c & 1);
+ c >>= 1;
+ }
+ return r;
+}
+
+/* build huffman code tree from code lengths */
+static void huffcodes(ushort *codes, const uchar *lens, int n) {
+ int c[CodeBits];
+ int i, code, count;
+
+ /* count code lengths and calc first code for each length */
+ for (i = 0; i < CodeBits; i++)
+ c[i] = 0;
+ for (i = 0; i < n; i++)
+ c[lens[i]]++;
+ for (code = 0, i = 1; i < CodeBits; i++) {
+ count = c[i];
+ c[i] = code;
+ code += count;
+ if (code > (1 << i))
+ abort(); /* over-subscribed */
+ code <<= 1;
+ }
+ if (code < (1 << i))
+ /* incomplete */;
+
+ for (i = 0; i < n; i++)
+ if (lens[i])
+ codes[i] = revcode(c[lens[i]]++, lens[i]);
+ else
+ codes[i] = 0;
+}
+
+static int heapparent(int n) {return (n - 2)/4 * 2;}
+static int heapchild(int n) {return 2 * n + 2;}
+
+static int heappush(int *heap, int len, int w, int n) {
+ int p, c, tmp;
+
+ c = len;
+ heap[len++] = n;
+ heap[len++] = w;
+ while (c > 0) {
+ p = heapparent(c);
+ if (heap[c+1] < heap[p+1]) {
+ tmp = heap[c]; heap[c] = heap[p]; heap[p] = tmp;
+ tmp = heap[c+1]; heap[c+1] = heap[p+1]; heap[p+1] = tmp;
+ c = p;
+ } else
+ break;
+ }
+ return len;
+}
+
+static int heappop(int *heap, int len, int *w, int *n) {
+ int p, c, tmp;
+
+ *n = heap[0];
+ *w = heap[1];
+ heap[1] = heap[--len];
+ heap[0] = heap[--len];
+ p = 0;
+ for (;;) {
+ c = heapchild(p);
+ if (c >= len)
+ break;
+ if (c+2 < len && heap[c+3] < heap[c+1])
+ c += 2;
+ if (heap[p+1] > heap[c+1]) {
+ tmp = heap[p]; heap[p] = heap[c]; heap[c] = tmp;
+ tmp = heap[p+1]; heap[p+1] = heap[c+1]; heap[c+1] = tmp;
+ } else
+ break;
+ p = c;
+ }
+ return len;
+}
+
+/* symbol frequencies -> code lengths (limited to 255) */
+static void hufflens(uchar *lens, int *freqs, int nsym, int limit) {
+ /* 2 <= nsym <= Nlitlen, log(nsym) <= limit <= CodeBits-1 */
+ int parent[2*Nlitlen-1];
+ int count[CodeBits];
+ int heap[2*Nlitlen];
+ int n, len, top, overflow;
+ int i, j;
+ int wi, wj;
+
+ for (n = 0; n < limit+1; n++)
+ count[n] = 0;
+ for (len = n = 0; n < nsym; n++)
+ if (freqs[n] > 0)
+ len = heappush(heap, len, freqs[n], n);
+ else
+ lens[n] = 0;
+ /* deflate: fewer than two symbols: add new */
+ for (n = 0; len < 4; n++)
+ if (freqs[n] == 0)
+ len = heappush(heap, len, ++freqs[n], n);
+ /* build code tree */
+ top = len;
+ for (n = nsym; len > 2; n++) {
+ len = heappop(heap, len, &wi, &i);
+ len = heappop(heap, len, &wj, &j);
+ parent[i] = n;
+ parent[j] = n;
+ len = heappush(heap, len, wi + wj, n);
+ /* keep an ordered list of nodes at the end */
+ heap[len+1] = i;
+ heap[len] = j;
+ }
+ /* calc code lengths (deflate: with limit) */
+ overflow = 0;
+ parent[--n] = 0;
+ for (i = 2; i < top; i++) {
+ n = heap[i];
+ if (n >= nsym) {
+ /* overwrite parent index with length */
+ parent[n] = parent[parent[n]] + 1;
+ if (parent[n] > limit)
+ overflow++;
+ } else {
+ lens[n] = parent[parent[n]] + 1;
+ if (lens[n] > limit) {
+ lens[n] = limit;
+ overflow++;
+ }
+ count[lens[n]]++;
+ }
+ }
+ if (overflow == 0)
+ return;
+ /* modify code tree to fix overflow (from zlib) */
+ while (overflow > 0) {
+ for (n = limit-1; count[n] == 0; n--);
+ count[n]--;
+ count[n+1] += 2;
+ count[limit]--;
+ overflow -= 2;
+ }
+ for (len = limit; len > 0; len--)
+ for (i = count[len]; i > 0;) {
+ n = heap[--top];
+ if (n < nsym) {
+ lens[n] = len;
+ i--;
+ }
+ }
+}
+
+/* output n (<= 16) bits */
+static void putbits(State *s, uint bits, int n) {
+ s->bits |= bits << s->nbits;
+ s->nbits += n;
+ while (s->nbits >= 8) {
+ *s->dst++ = s->bits & 0xff;
+ s->bits >>= 8;
+ s->nbits -= 8;
+ }
+}
+
+static int clencodes(int *codes, int *extra, uchar *llen, int nlit, uchar *dlen, int ndist) {
+ int i, c, r, rr;
+ int n = 0;
+
+ for (i = 0; i < nlit; i++)
+ codes[i] = llen[i];
+ for (i = 0; i < ndist; i++)
+ codes[nlit + i] = dlen[i];
+ for (i = 0; i < nlit + ndist;) {
+ /* run length encoding */
+ c = codes[i];
+ for (r = 1; i + r < nlit + ndist && codes[i + r] == c; r++);
+ i += r;
+ if (c == 0) {
+ while (r >= 11) {
+ rr = r > 138 ? 138 : r;
+ codes[n] = 18;
+ extra[n++] = rr - 11;
+ r -= rr;
+ }
+ if (r >= 3) {
+ codes[n] = 17;
+ extra[n++] = r - 3;
+ r = 0;
+ }
+ }
+ while (r--) {
+ codes[n++] = c;
+ while (r >= 3) {
+ rr = r > 6 ? 6 : r;
+ codes[n] = 16;
+ extra[n++] = rr - 3;
+ r -= rr;
+ }
+ }
+ }
+ return n;
+}
+
+static void putblock(State *s, ushort *lcode, uchar *llen, ushort *dcode, uchar *dlen) {
+ int n;
+ Runlen *r;
+ uchar *pc;
+
+ for (r = s->rbuf, pc = s->win + s->blockstart; r != s->rend; r++)
+ if (r->bits & RunlenFlag)
+ for (n = r->n; n > 0; n--, pc++)
+ putbits(s, lcode[*pc], llen[*pc]);
+ else {
+ pc += lenbase[r->n] + r->bits;
+ putbits(s, lcode[Nlit + r->n + 1], llen[Nlit + r->n + 1]);
+ putbits(s, r->bits, lenbits[r->n]);
+ r++;
+ putbits(s, dcode[r->n], dlen[r->n]);
+ putbits(s, r->bits, distbits[r->n]);
+ }
+ putbits(s, lcode[EOB], llen[EOB]);
+}
+
+static void deflate_block(State *s, int blocklen) {
+ int cfreq[Nclen];
+ uchar llen[Nlitlen], dlen[Ndist], clen[Nclen];
+ ushort lcode[Nlitlen], dcode[Ndist], ccode[Nclen];
+ int codes[Nlitlen + Ndist], extra[Nlitlen + Ndist];
+ int i, c, n, ncodes;
+ int nlit, ndist, nclen;
+ int dynsize, fixsize, uncsize;
+ Runlen *r;
+ uchar *pc;
+int dyntree;
+
+ /* calc dynamic codes */
+ hufflens(llen, s->lfreq, Nlitlen, CodeBits-1);
+ hufflens(dlen, s->dfreq, Ndist, CodeBits-1);
+ huffcodes(lcode, llen, Nlitlen);
+ huffcodes(dcode, dlen, Ndist);
+ for (nlit = Nlitlen; nlit > Nlit && llen[nlit-1] == 0; nlit--);
+ for (ndist = Ndist; ndist > 1 && dlen[ndist-1] == 0; ndist--);
+ ncodes = clencodes(codes, extra, llen, nlit, dlen, ndist);
+ memset(cfreq, 0, sizeof(cfreq));
+ for (i = 0; i < ncodes; i++)
+ cfreq[codes[i]]++;
+ hufflens(clen, cfreq, Nclen, 7);
+ huffcodes(ccode, clen, Nclen);
+ for (nclen = Nclen; nclen > 4 && clen[clenorder[nclen-1]] == 0; nclen--);
+
+ /* calc size */
+ uncsize = 3 + 16 + 8 * blocklen + (16 - 3 - s->nbits) % 8; /* byte aligned */
+ fixsize = 3;
+ dynsize = 3 + 5 + 5 + 4 + 3 * nclen;
+ for (i = 0; i < ncodes; i++) {
+ c = codes[i];
+ dynsize += clen[c];
+ if (c == 16)
+ dynsize += 2;
+ if (c == 17)
+ dynsize += 3;
+ if (c == 18)
+ dynsize += 7;
+ }
+dyntree = dynsize - 3;
+ for (r = s->rbuf, pc = s->win + s->blockstart; r != s->rend; r++)
+ if (r->bits & RunlenFlag)
+ for (n = r->n; n > 0; n--, pc++) {
+ fixsize += fixllen[*pc];
+ dynsize += llen[*pc];
+ }
+ else {
+ pc += lenbase[r->n] + r->bits;
+ fixsize += fixllen[Nlit + r->n + 1];
+ dynsize += llen[Nlit + r->n + 1];
+ fixsize += lenbits[r->n];
+ dynsize += lenbits[r->n];
+ r++;
+ fixsize += fixdlen[r->n];
+ dynsize += dlen[r->n];
+ fixsize += distbits[r->n];
+ dynsize += distbits[r->n];
+ }
+ fixsize += fixllen[EOB];
+ dynsize += llen[EOB];
+
+ /* emit block */
+ putbits(s, s->lastblock, 1);
+ if (dynsize < fixsize && dynsize < uncsize) {
+ /* dynamic code */
+ putbits(s, 2, 2);
+ putbits(s, nlit - 257, 5);
+ putbits(s, ndist - 1, 5);
+ putbits(s, nclen - 4, 4);
+ for (i = 0; i < nclen; i++)
+ putbits(s, clen[clenorder[i]], 3);
+ for (i = 0; i < ncodes; i++) {
+ c = codes[i];
+ putbits(s, ccode[c], clen[c]);
+ if (c == 16)
+ putbits(s, extra[i], 2);
+ if (c == 17)
+ putbits(s, extra[i], 3);
+ if (c == 18)
+ putbits(s, extra[i], 7);
+ }
+ putblock(s, lcode, llen, dcode, dlen);
+ } else if (fixsize < uncsize) {
+ /* fixed code */
+ putbits(s, 1, 2);
+ putblock(s, fixlcode, fixllen, fixdcode, fixdlen);
+ } else {
+ /* uncompressed */
+ putbits(s, 0, 2);
+ putbits(s, 0, 7); /* align to byte boundary */
+ s->nbits = 0;
+ putbits(s, blocklen, 16);
+ putbits(s, ~blocklen & 0xffff, 16);
+ memcpy(s->dst, s->win + s->blockstart, blocklen);
+ s->dst += blocklen;
+ }
+fprintf(stderr, "blocklen: %d lzlen: %d dynsize: %d (tree: %d, rate: %.3f) fixsize: %d (rate: %.3f) uncsize: %d (rate: %.3f)\n", blocklen, s->rend - s->rbuf,
+ dynsize, dyntree, dynsize/(float)blocklen, fixsize, fixsize/(float)blocklen, uncsize, uncsize/(float)blocklen);
+}
+
+static int bisect(ushort *base, int len, int n) {
+ int lo = 0;
+ int hi = len;
+ int k;
+
+ while (lo < hi) {
+ k = (lo + hi) / 2;
+ if (n < base[k])
+ hi = k;
+ else
+ lo = k + 1;
+ }
+ return lo - 1;
+}
+
+static void flushlit(State *s) {
+ if (s->run) {
+ s->rend->bits = RunlenFlag;
+ s->rend->n = s->run;
+ s->rend++;
+ s->run = 0;
+ }
+}
+
+static void recordmatch(State *s, Match m) {
+ int n;
+
+ flushlit(s);
+/*fprintf(stderr, "len:%u dist:%u\n", m.len, m.dist);*/
+ n = bisect(lenbase, Nlen, m.len);
+ s->rend->n = n;
+ s->rend->bits = m.len - lenbase[n];
+ s->rend++;
+ s->lfreq[Nlit + n + 1]++;
+ n = bisect(distbase, Ndist, m.dist);
+ s->rend->n = n;
+ s->rend->bits = m.dist - distbase[n];
+ s->rend++;
+ s->dfreq[n]++;
+}
+
+static void recordlit(State *s, int c) {
+/*fprintf(stderr, "lit:%c\n", c);*/
+ s->run++;
+ s->lfreq[c]++;
+}
+
+/* multiplicative hash */
+static uint gethash(uchar *p) {
+ return (0x9e3779b1 * ((p[0]<<16) + (p[1]<<8) + p[2]) >> (32 - HashBits)) % HashSize;
+}
+
+static ushort addtochain(State *s, ushort pos) {
+ ushort i, h;
+
+ if (s->avail < MinMatch) /* TODO */
+ return 0;
+ h = gethash(s->win + pos); /* TODO: rolling hash */
+ i = s->head[h];
+ s->head[h] = pos;
+ s->chain[pos % WinSize] = i;
+ return i;
+}
+
+static Match getmatch(State *s, ushort pos, ushort mpos) {
+ Match m = {0, MinMatch-1};
+ uchar *q;
+ uchar *p = s->win + pos;
+ uchar *end = p + MaxMatch;
+ ushort len;
+ ushort maxlen = s->avail < MaxMatch ? s->avail : MaxMatch;
+ ushort limit = pos - MaxDist;
+ ushort chainlen = MaxChainLen;
+/*
+uint h;
+h = gethash(src);
+if (h != gethash(src - dist))
+fprintf(stderr, "i %d, dist %d delta %d curpos %d dsrc %d h: %d head %d\n", i, dist, c.next, (ushort)(long)src, src - s->src, h, s->head[h]);
+*/
+ do {
+ q = s->win + mpos;
+ /* at least m.len+1 long */
+ if (q[m.len] != p[m.len] || q[0] != p[0])
+ continue;
+ /* loop unroll: MaxMatch % 10 == 8 */
+ while (*++q == *++p && *++q == *++p
+ && *++q == *++p && *++q == *++p
+ && *++q == *++p && *++q == *++p
+ && *++q == *++p && ++p != end && *++q == *p
+ && *++q == *++p && *++q == *++p);
+ len = MaxMatch - (end - p);
+ p -= len;
+ if (len > m.len) {
+ m.dist = pos - mpos;
+ m.len = len;
+ if (len == maxlen)
+ return m;
+ }
+ } while ((mpos = s->chain[mpos % WinSize]) > limit && --chainlen);
+ if (m.len < MinMatch || (m.len == MinMatch && m.dist > BigDist))
+ m.len = 0;
+ return m;
+}
+
+static void deflate_init(State *s) {
+ int i;
+
+ memset(s->chain, 0, sizeof(s->chain));
+ memset(s->head, 0, sizeof(s->head));
+ s->bits = s->nbits = s->lastblock = 0;
+ for (i = 0; i < 144; i++)
+ fixllen[i] = 8;
+ for (; i < 256; i++)
+ fixllen[i] = 9;
+ for (; i < 280; i++)
+ fixllen[i] = 7;
+ for (; i < Nlitlen; i++)
+ fixllen[i] = 8;
+ for (i = 0; i < Ndist; i++)
+ fixdlen[i] = 5;
+ huffcodes(fixlcode, fixllen, Nlitlen);
+ huffcodes(fixdcode, fixdlen, Ndist);
+}
+
+static void newblock(State *s) {
+ s->rend = s->rbuf;
+ s->run = 0;
+ memset(s->lfreq, 0, sizeof(s->lfreq));
+ memset(s->dfreq, 0, sizeof(s->dfreq));
+ s->lfreq[EOB]++;
+}
+
+static void fillwin(State *s) {
+ int n, k;
+
+ if (s->pos > WinSize + MaxDist) {
+ /* shift win */
+ memcpy(s->win, s->win + WinSize, WinSize);
+ for (n = HashSize; n--;)
+ s->head[n] = s->head[n] > WinSize ? s->head[n] - WinSize : 0;
+ for (n = WinSize; n--;)
+ s->chain[n] = s->chain[n] > WinSize ? s->chain[n] - WinSize : 0;
+ s->blockstart -= WinSize;
+ s->pos -= WinSize;
+ }
+ k = 2*WinSize - s->pos - s->avail;
+ n = s->srcend - s->src;
+ if (n > k)
+ n = k;
+ if (n == 0)
+ return;
+ memcpy(s->win + s->pos + s->avail, s->src, n);
+ s->src += n;
+ s->avail += n;
+}
+
+/* TODO: pos overflow */
+int deflate(uchar *src, int len, uchar *dst) {
+ State s;
+ Match m;
+ Match prevm = {0, 0};
+ int pos;
+ int head;
+
+ deflate_init(&s);
+ newblock(&s);
+ s.src = src;
+ s.srcend = src + len;
+ s.dst = dst;
+ s.dstend = dst + len + len/4 + 10; /* TODO */
+ pos = s.pos = s.blockstart = WinSize;
+ s.avail = 0;
+ do {
+ if (s.avail < MaxMatch || s.rend - s.rbuf >= RbufSize - 2) {
+/*fprintf(stderr, "pos:%d bstart:%d avail:%d rsiz:%d\n", pos, s.blockstart, s.avail, s.rend - s.rbuf);*/
+ if (pos - s.blockstart > MaxDist || s.rend - s.rbuf >= RbufSize - 2) {
+ flushlit(&s);
+ if (prevm.len) {
+ pos--;
+ s.avail++;
+ prevm.len = 0;
+ }
+ deflate_block(&s, pos - s.blockstart);
+ newblock(&s);
+ s.blockstart = pos;
+ }
+ s.pos = pos;
+ fillwin(&s);
+ pos = s.pos;
+ }
+ head = addtochain(&s, pos);
+ if (head >= pos || pos - head >= MaxDist)
+ head = 0;
+ if (head)
+ m = getmatch(&s, pos, head);
+ if (head && m.len > prevm.len) {
+ if (prevm.len)
+ recordlit(&s, s.win[pos-1]);
+ prevm = m;
+ } else if (prevm.len) {
+ recordmatch(&s, prevm);
+ prevm.len -= 2;
+ do {
+ pos++;
+ s.avail--;
+ addtochain(&s, pos);
+ } while (--prevm.len);
+ } else
+ recordlit(&s, s.win[pos]);
+ pos++;
+ s.avail--;
+
+ } while (s.avail);
+ flushlit(&s);
+ s.lastblock = 1;
+ deflate_block(&s, pos - s.blockstart);
+ putbits(&s, 0, 7);
+ return s.dst - dst;
+}
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+enum {Siz = 1<<24};
+
+int main() {
+ uchar *src;
+ uchar *dst;
+ int len;
+
+ src = malloc(Siz);
+ dst = malloc(Siz);
+ len = fread(src, 1, Siz, stdin);
+ len = deflate(src, len, dst);
+ fwrite(dst, 1, len, stdout);
+ fprintf(stderr, "compressed: %d\n", len);
+ free(dst);
+ free(src);
+ return 0;
+}