flate

deflate implementation
git clone git://git.suckless.org/flate
Log | Files | Refs | README

commit 56b46cef14ad54fd2cdc9be296c36530078a0992
parent 2dee35176d25380361c699d97299b108ae2120e3
Author: nsz@tpx <unknown>
Date:   Mon, 10 Aug 2009 15:04:59 +0200

rm deflate_simple
Diffstat:
Makefile | 10++++------
deflate_simple.c | 578------------------------------------------------------------------------------
2 files changed, 4 insertions(+), 584 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,8 +1,8 @@ -CFLAGS=-g -Wall -ansi -pedantic -#CFLAGS=-O3 -Wall -ansi -pedantic +#CFLAGS=-g -Wall -ansi -pedantic +CFLAGS=-Os -Wall -ansi -pedantic LDFLAGS= -SRC=inflate.c inflate_simple.c inflate_example.c inflate_callback.c \ - deflate.c deflate_simple.c deflate_example.c +SRC=inflate.c inflate_example.c inflate_simple.c inflate_callback.c \ + deflate.c deflate_example.c OBJ=${SRC:.c=.o} EXE=inflate inflate_simple deflate deflate_simple @@ -15,8 +15,6 @@ inflate_callback: inflate_callback.o ${CC} -o $@ $^ ${LDFLAGS} deflate: deflate.o deflate_example.o ${CC} -o $@ $^ ${LDFLAGS} -deflate_simple: deflate_simple.o - ${CC} -o $@ $^ ${LDFLAGS} ${OBJ}: Makefile flate.h .c.o: ${CC} -c ${CFLAGS} $< diff --git a/deflate_simple.c b/deflate_simple.c @@ -1,578 +0,0 @@ -#include <string.h> - -typedef unsigned char uchar; -typedef unsigned short ushort; -typedef unsigned int uint; - -enum { - MinMatch = 3, - MaxMatch = 258, - BigDist = 1 << 12, - MaxChainStep = 128, - HashBits = 13, - HashSize = 1 << HashBits, - WinSize = 1 << 15, - BlockLen = 1 << 15, - RbufSize = BlockLen * 3/4 + 2 * MaxMatch, - RunlenFlag = 1 << 30, - - CodeBits = 16, /* max number of bits in a code + 1 */ - EOB = 256, /* end of block symbol */ - Nlit = 256, /* number of lit codes */ - Nlen = 29, /* number of len codes */ - Nlitlen = Nlit + Nlen + 3, /* litlen codes + block end + 2 unused */ - Ndist = 30, /* number of distance codes */ - Nclen = 19 /* number of code length codes */ -}; - -typedef struct { - ushort dist; - ushort len; -} Match; - -typedef struct { - ushort head[HashSize]; /* position of hash chain heads */ - ushort chain[WinSize]; /* hash chains */ - int pos; /* position in hash chain */ - uchar *src; /* input (block start) */ - uchar *dst; /* compressed output */ - uint bits; - int nbits; - int rbuf[RbufSize]; /* literal run length and (len,dist) */ - int *rend; /* current pos in rbuf */ - int run; /* literal count */ - int lfreq[Nlitlen]; - int dfreq[Ndist]; - int lastblock; -} State; - -static uchar fixllen[Nlitlen]; /* fixed lit/len huffman code tree */ -static ushort fixlcode[Nlitlen]; -static uchar fixdlen[Ndist]; /* fixed distance huffman code tree */ -static ushort fixdcode[Ndist]; - -/* base offset and extra bits tables */ -static uchar lenbits[Nlen] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 -}; -static ushort lenbase[Nlen] = { - 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258 -}; -static uchar distbits[Ndist] = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 -}; -static ushort distbase[Ndist] = { - 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 -}; - -/* ordering of code lengths */ -static uchar clenorder[Nclen] = { - 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 -}; - - -static uint revcode(uint c, int n) { - int i; - uint r = 0; - - for (i = 0; i < n; i++) { - r = (r << 1) | (c & 1); - c >>= 1; - } - return r; -} - -/* build huffman code tree from code lengths */ -static void huffcodes(ushort *codes, const uchar *lens, int n) { - int c[CodeBits]; - int i, code, count; - - /* count code lengths and calc first code for each length */ - for (i = 0; i < CodeBits; i++) - c[i] = 0; - for (i = 0; i < n; i++) - c[lens[i]]++; - for (code = 0, i = 1; i < CodeBits; i++) { - count = c[i]; - c[i] = code; - code += count; - code <<= 1; - } - for (i = 0; i < n; i++) - if (lens[i]) - codes[i] = revcode(c[lens[i]]++, lens[i]); - else - codes[i] = 0; -} - -static int heapparent(int n) {return (n - 2)/4 * 2;} -static int heapchild(int n) {return 2 * n + 2;} - -static int heappush(int *heap, int len, int w, int n) { - int p, c, tmp; - - c = len; - heap[len++] = n; - heap[len++] = w; - while (c > 0) { - p = heapparent(c); - if (heap[c+1] < heap[p+1]) { - tmp = heap[c]; heap[c] = heap[p]; heap[p] = tmp; - tmp = heap[c+1]; heap[c+1] = heap[p+1]; heap[p+1] = tmp; - c = p; - } else - break; - } - return len; -} - -static int heappop(int *heap, int len, int *w, int *n) { - int p, c, tmp; - - *n = heap[0]; - *w = heap[1]; - heap[1] = heap[--len]; - heap[0] = heap[--len]; - p = 0; - for (;;) { - c = heapchild(p); - if (c >= len) - break; - if (c+2 < len && heap[c+3] < heap[c+1]) - c += 2; - if (heap[p+1] > heap[c+1]) { - tmp = heap[p]; heap[p] = heap[c]; heap[c] = tmp; - tmp = heap[p+1]; heap[p+1] = heap[c+1]; heap[c+1] = tmp; - } else - break; - p = c; - } - return len; -} - -/* symbol frequencies -> code lengths (limited to 255) */ -static void hufflens(uchar *lens, int *freqs, int nsym, int limit) { - /* 2 <= nsym <= Nlitlen, log(nsym) <= limit <= CodeBits-1 */ - int parent[2*Nlitlen-1]; - int count[CodeBits]; - int heap[2*Nlitlen]; - int n, len, top, overflow; - int i, j; - int wi, wj; - - for (n = 0; n < limit+1; n++) - count[n] = 0; - for (len = n = 0; n < nsym; n++) - if (freqs[n] > 0) - len = heappush(heap, len, freqs[n], n); - else - lens[n] = 0; - /* deflate: fewer than two symbols: add new */ - for (n = 0; len < 4; n++) - if (freqs[n] == 0) - len = heappush(heap, len, ++freqs[n], n); - /* build code tree */ - top = len; - for (n = nsym; len > 2; n++) { - len = heappop(heap, len, &wi, &i); - len = heappop(heap, len, &wj, &j); - parent[i] = n; - parent[j] = n; - len = heappush(heap, len, wi + wj, n); - /* keep an ordered list of nodes at the end */ - heap[len+1] = i; - heap[len] = j; - } - /* calc code lengths (deflate: with limit) */ - overflow = 0; - parent[--n] = 0; - for (i = 2; i < top; i++) { - n = heap[i]; - if (n >= nsym) { - /* overwrite parent index with length */ - parent[n] = parent[parent[n]] + 1; - if (parent[n] > limit) - overflow++; - } else { - lens[n] = parent[parent[n]] + 1; - if (lens[n] > limit) { - lens[n] = limit; - overflow++; - } - count[lens[n]]++; - } - } - if (overflow == 0) - return; - /* modify code tree to fix overflow (from zlib) */ - while (overflow > 0) { - for (n = limit-1; count[n] == 0; n--); - count[n]--; - count[n+1] += 2; - count[limit]--; - overflow -= 2; - } - for (len = limit; len > 0; len--) - for (i = count[len]; i > 0;) { - n = heap[--top]; - if (n < nsym) { - lens[n] = len; - i--; - } - } -} - -/* output n (<= 16) bits */ -static void putbits(State *s, uint bits, int n) { - s->bits |= bits << s->nbits; - s->nbits += n; - while (s->nbits >= 8) { - *s->dst++ = s->bits & 0xff; - s->bits >>= 8; - s->nbits -= 8; - } -} - -static int clencodes(int *codes, int *extra, uchar *llen, int nlit, uchar *dlen, int ndist) { - int i, c, r, rr; - int n = 0; - - for (i = 0; i < nlit; i++) - codes[i] = llen[i]; - for (i = 0; i < ndist; i++) - codes[nlit + i] = dlen[i]; - for (i = 0; i < nlit + ndist;) { - /* run length encoding */ - c = codes[i]; - for (r = 1; i + r < nlit + ndist && codes[i + r] == c; r++); - i += r; - if (c == 0) { - while (r >= 11) { - rr = r > 138 ? 138 : r; - codes[n] = 18; - extra[n++] = rr - 11; - r -= rr; - } - if (r >= 3) { - codes[n] = 17; - extra[n++] = r - 3; - r = 0; - } - } - while (r--) { - codes[n++] = c; - while (r >= 3) { - rr = r > 6 ? 6 : r; - codes[n] = 16; - extra[n++] = rr - 3; - r -= rr; - } - } - } - return n; -} - -static void putblock(State *s, ushort *lcode, uchar *llen, ushort *dcode, uchar *dlen) { - int n, k; - int *pn; - uchar *pc; - - for (pn = s->rbuf, pc = s->src; pn != s->rend;) { - n = *pn++; - if (n & RunlenFlag) { - n %= RunlenFlag; - while (n--) { - putbits(s, lcode[*pc], llen[*pc]); - pc++; - } - } else { - pc += lenbase[n >> 16] + (n & 0xffff); - k = n >> 16; - putbits(s, lcode[Nlit + k + 1], llen[Nlit + k + 1]); - putbits(s, n & 0xffff, lenbits[k]); - n = *pn++; - k = n >> 16; - putbits(s, dcode[k], dlen[k]); - putbits(s, n & 0xffff, distbits[k]); - } - } - putbits(s, lcode[EOB], llen[EOB]); -} - -static void deflate_block(State *s, int blocklen) { - int cfreq[Nclen]; - uchar llen[Nlitlen], dlen[Ndist], clen[Nclen]; - ushort lcode[Nlitlen], dcode[Ndist], ccode[Nclen]; - int codes[Nlitlen + Ndist], extra[Nlitlen + Ndist]; - int i, c, n, ncodes; - int nlit, ndist, nclen; - int dynsize, fixsize, uncsize; - int *pn; - uchar *pc; - - /* calc dynamic codes */ - hufflens(llen, s->lfreq, Nlitlen, CodeBits-1); - hufflens(dlen, s->dfreq, Ndist, CodeBits-1); - huffcodes(lcode, llen, Nlitlen); - huffcodes(dcode, dlen, Ndist); - for (nlit = Nlitlen; nlit > Nlit && llen[nlit-1] == 0; nlit--); - for (ndist = Ndist; ndist > 1 && dlen[ndist-1] == 0; ndist--); - ncodes = clencodes(codes, extra, llen, nlit, dlen, ndist); - memset(cfreq, 0, sizeof(cfreq)); - for (i = 0; i < ncodes; i++) - cfreq[codes[i]]++; - hufflens(clen, cfreq, Nclen, 7); - huffcodes(ccode, clen, Nclen); - for (nclen = Nclen; nclen > 4 && clen[clenorder[nclen-1]] == 0; nclen--); - - /* calc size */ - uncsize = 3 + 16 + 8 * blocklen + (16 - 3 - s->nbits) % 8; /* byte aligned */ - fixsize = 3; - dynsize = 3 + 5 + 5 + 4 + 3 * nclen; - for (i = 0; i < ncodes; i++) { - c = codes[i]; - dynsize += clen[c]; - if (c == 16) - dynsize += 2; - if (c == 17) - dynsize += 3; - if (c == 18) - dynsize += 7; - } - for (pn = s->rbuf, pc = s->src; pn != s->rend;) { - n = *pn++; - if (n & RunlenFlag) { - n %= RunlenFlag; - while (n--) { - fixsize += fixllen[*pc]; - dynsize += llen[*pc++]; - } - } else { - pc += lenbase[n >> 16] + (n & 0xffff); - n >>= 16; - fixsize += fixllen[Nlit + n + 1]; - dynsize += llen[Nlit + n + 1]; - fixsize += lenbits[n]; - dynsize += lenbits[n]; - n = *pn++ >> 16; - fixsize += fixdlen[n]; - dynsize += dlen[n]; - fixsize += distbits[n]; - dynsize += distbits[n]; - } - } - fixsize += fixllen[EOB]; - dynsize += llen[EOB]; - - /* emit block */ - putbits(s, s->lastblock, 1); - if (dynsize < fixsize && dynsize < uncsize) { - /* dynamic code */ - putbits(s, 2, 2); - putbits(s, nlit - 257, 5); - putbits(s, ndist - 1, 5); - putbits(s, nclen - 4, 4); - for (i = 0; i < nclen; i++) - putbits(s, clen[clenorder[i]], 3); - for (i = 0; i < ncodes; i++) { - c = codes[i]; - putbits(s, ccode[c], clen[c]); - if (c == 16) - putbits(s, extra[i], 2); - if (c == 17) - putbits(s, extra[i], 3); - if (c == 18) - putbits(s, extra[i], 7); - } - putblock(s, lcode, llen, dcode, dlen); - } else if (fixsize < uncsize) { - /* fixed code */ - putbits(s, 1, 2); - putblock(s, fixlcode, fixllen, fixdcode, fixdlen); - } else { - /* uncompressed */ - putbits(s, 0, 2); - putbits(s, 0, 7); /* align to byte boundary */ - s->nbits = 0; - putbits(s, blocklen, 16); - putbits(s, ~blocklen & 0xffff, 16); - memcpy(s->dst, s->src, blocklen); - s->dst += blocklen; - } -} - -static int bisect(ushort *base, int len, int n) { - int lo = 0; - int hi = len; - int k; - - while (lo < hi) { - k = (lo + hi) / 2; - if (n < base[k]) - hi = k; - else - lo = k + 1; - } - return lo - 1; -} - -static void recordmatch(State *s, Match m) { - int n; - - if (s->run) { - *s->rend++ = s->run | RunlenFlag; - s->run = 0; - } - n = bisect(lenbase, Nlen, m.len); - *s->rend++ = n << 16 | (m.len - lenbase[n]); - s->lfreq[Nlit + n + 1]++; - n = bisect(distbase, Ndist, m.dist); - *s->rend++ = n << 16 | (m.dist - distbase[n]); - s->dfreq[n]++; -} - -static void recordlit(State *s, int c) { - s->run++; - s->lfreq[c]++; -} - -/* multiplicative hash */ -static uint gethash(uchar *p) { - return 0x9e3779b1 * ((p[0]<<16) + (p[1]<<8) + p[2]) >> (32 - HashBits); -} - -static Match getmatch(State *s, uchar *src, int len) { - int i, j; - int pos; - int dist; - int lastdist = 0; - Match m = {0, 0}; - - if (len < MinMatch) - return m; - if (len > MaxMatch) - len = MaxMatch; - pos = s->head[gethash(src)]; - for (i = 0; i < MaxChainStep; i++) { - dist = (WinSize + s->pos - pos) % WinSize; - if (dist <= lastdist) - break; - lastdist = dist; - for (j = 0; j < len; j++) - if (src[j] != src[j - dist]) - break; - if (j > m.len) { - m.dist = dist; - m.len = j; - if (j == len) - return m; - } - pos = s->chain[pos]; - } - if (m.len < MinMatch || (m.len == MinMatch && m.dist > BigDist)) - m.len = 0; - return m; -} - -static void deflate_init(State *s) { - int i; - - memset(s->chain, 0, sizeof(s->chain)); - memset(s->head, 0, sizeof(s->head)); - s->pos = s->bits = s->nbits = s->lastblock = 0; - for (i = 0; i < 144; i++) - fixllen[i] = 8; - for (; i < 256; i++) - fixllen[i] = 9; - for (; i < 280; i++) - fixllen[i] = 7; - for (; i < Nlitlen; i++) - fixllen[i] = 8; - for (i = 0; i < Ndist; i++) - fixdlen[i] = 5; - huffcodes(fixlcode, fixllen, Nlitlen); - huffcodes(fixdcode, fixdlen, Ndist); -} - -static void newblock(State *s) { - s->rend = s->rbuf; - s->run = 0; - memset(s->lfreq, 0, sizeof(s->lfreq)); - memset(s->dfreq, 0, sizeof(s->dfreq)); - s->lfreq[EOB]++; -} - -int deflate(uchar *src, int len, uchar *dst) { - State s; - int step; - uint hash; - Match m; - Match prevm = {0, 0}; - - deflate_init(&s); - newblock(&s); - s.dst = dst; - s.src = src; - while (len > 0) { - m = getmatch(&s, src, len); - if (m.len > prevm.len) { - if (prevm.len) - recordlit(&s, *(src-1)); - step = 1; - prevm = m; - } else if (prevm.len) { - recordmatch(&s, prevm); - step = prevm.len - 1; - prevm.len = 0; - } else { - recordlit(&s, *src); - step = 1; - } - while (step > 0) { - if (len >= MinMatch) { - hash = gethash(src); - s.chain[s.pos] = s.head[hash]; - s.head[hash] = s.pos; - s.pos = (s.pos + 1) % WinSize; - } - src++; - len--; - step--; - } - if (prevm.len == 0 && src - s.src > BlockLen) { - if (s.run) - *s.rend++ = s.run | RunlenFlag; - deflate_block(&s, src - s.src); - newblock(&s); - s.src = src; - } - } - if (s.run) - *s.rend++ = s.run | RunlenFlag; - s.lastblock = 1; - deflate_block(&s, src - s.src); - putbits(&s, 0, 7); - return s.dst - dst; -} - - -#include <stdlib.h> -#include <stdio.h> - -enum {Siz = 1<<24}; - -int main() { - uchar *src; - uchar *dst; - int len; - - src = malloc(Siz); - dst = malloc(Siz); - len = fread(src, 1, Siz, stdin); - len = deflate(src, len, dst); - fwrite(dst, 1, len, stdout); - fprintf(stderr, "compressed: %d\n", len); - free(dst); - free(src); - return 0; -}