flate

deflate implementation
git clone git://git.suckless.org/flate
Log | Files | Refs | README

commit 312c496743eb1cc762ffa956e212c4919ac23bf3
parent b64c42f158913ffd87ecea868f0d8eb23c87b8b4
Author: nsz@tpx <unknown>
Date:   Wed,  5 Aug 2009 21:08:28 +0200

deflate with dstbuf (needs cleanup)
Diffstat:
Makefile | 8++++----
deflate.c | 256++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------
deflate.h | 32++++++++++++++++++++++++++++++++
deflate_example.c | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 274 insertions(+), 92 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,8 +1,8 @@ -#CFLAGS=-g -Wall -ansi -pedantic -CFLAGS=-O3 -Wall -ansi -pedantic +CFLAGS=-g -Wall -ansi -pedantic +#CFLAGS=-O3 -Wall -ansi -pedantic LDFLAGS= SRC=inflate.c inflate_simple.c inflate_example.c inflate_callback.c \ - deflate.c deflate_simple.c + deflate.c deflate_simple.c deflate_example.c OBJ=${SRC:.c=.o} EXE=inflate inflate_simple deflate deflate_simple @@ -13,7 +13,7 @@ inflate_simple: inflate_simple.o ${CC} -o $@ $^ ${LDFLAGS} inflate_callback: inflate_callback.o ${CC} -o $@ $^ ${LDFLAGS} -deflate: deflate.o +deflate: deflate.o deflate_example.o ${CC} -o $@ $^ ${LDFLAGS} deflate_simple: deflate_simple.o ${CC} -o $@ $^ ${LDFLAGS} diff --git a/deflate.c b/deflate.c @@ -1,6 +1,7 @@ #include <stdlib.h> #include <string.h> #include <stdio.h> +#include "deflate.h" /* TODO: @@ -18,21 +19,22 @@ TODO: runlen -> lz (rename) bounds on the compressend size zlib trick: if same freq then shorter code goes to the one with smaller subtree + alloc bufs on heap? + overlap freq/code, rbuf/dstwin ? + fix match (loopunroll near end) spec case tests: empty block huff overflow all zero (rle) dist = 32K + test with small bufsize (1,2..17 bytes) */ -typedef unsigned char uchar; -typedef unsigned short ushort; -typedef unsigned int uint; - enum { WinSize = 1 << 15, - RbufSize = 1 << 14, + DstSize = 2*WinSize + 8, + RbufSize = 1 << 13, RunlenFlag = 1 << 15, MinMatch = 3, MaxMatch = 258, @@ -51,6 +53,12 @@ enum { Nclen = 19 /* number of code length codes */ }; +/* state */ +enum { + FillWin, + Block +}; + typedef struct { ushort dist; ushort len; @@ -63,14 +71,16 @@ typedef struct { /* TODO: alloc buffers on heap */ typedef struct { + int state; int pos; int avail; int blockstart; int lastblock; + Match m; uchar *src; /* input (block start) */ uchar *srcend; uchar *dst; /* compressed output */ - uchar *dstend; + uchar *dstpos; uint bits; int nbits; int lfreq[Nlitlen]; @@ -81,6 +91,8 @@ typedef struct { Runlen rbuf[RbufSize]; /* literal run length, match len, match dist */ Runlen *rend; /* current pos in rbuf */ int run; /* literal count */ + + uchar dstbuf[DstSize]; } State; static uchar fixllen[Nlitlen]; /* fixed lit/len huffman code tree */ @@ -275,7 +287,7 @@ static void putbits(State *s, uint bits, int n) { } } -static int clencodes(int *codes, int *extra, uchar *llen, int nlit, uchar *dlen, int ndist) { +static int clencodes(uchar *codes, uchar *extra, uchar *llen, int nlit, uchar *dlen, int ndist) { int i, c, r, rr; int n = 0; @@ -336,9 +348,9 @@ static void putblock(State *s, ushort *lcode, uchar *llen, ushort *dcode, uchar static void deflate_block(State *s, int blocklen) { int cfreq[Nclen]; + uchar codes[Nlitlen + Ndist], extra[Nlitlen + Ndist]; uchar llen[Nlitlen], dlen[Ndist], clen[Nclen]; ushort lcode[Nlitlen], dcode[Ndist], ccode[Nclen]; - int codes[Nlitlen + Ndist], extra[Nlitlen + Ndist]; int i, c, n, ncodes; int nlit, ndist, nclen; int dynsize, fixsize, uncsize; @@ -502,31 +514,30 @@ static ushort addtochain(State *s, ushort pos) { static Match getmatch(State *s, ushort pos, ushort mpos) { Match m = {0, MinMatch-1}; - uchar *q; - uchar *p = s->win + pos; - uchar *end = p + MaxMatch; ushort len; ushort maxlen = s->avail < MaxMatch ? s->avail : MaxMatch; ushort limit = pos - MaxDist; ushort chainlen = MaxChainLen; -/* -uint h; -h = gethash(src); -if (h != gethash(src - dist)) -fprintf(stderr, "i %d, dist %d delta %d curpos %d dsrc %d h: %d head %d\n", i, dist, c.next, (ushort)(long)src, src - s->src, h, s->head[h]); -*/ + uchar *q; + uchar *p = s->win + pos; + uchar *end = p + maxlen; + do { q = s->win + mpos; /* at least m.len+1 long */ if (q[m.len] != p[m.len] || q[0] != p[0]) continue; /* loop unroll: MaxMatch % 10 == 8 */ +/* TODO: if maxlen == MaxMatch while (*++q == *++p && *++q == *++p && *++q == *++p && *++q == *++p && *++q == *++p && *++q == *++p && *++q == *++p && ++p != end && *++q == *p && *++q == *++p && *++q == *++p); len = MaxMatch - (end - p); +*/ + while (++p != end && *++q == *p); + len = maxlen - (end - p); p -= len; if (len > m.len) { m.dist = pos - mpos; @@ -540,27 +551,8 @@ fprintf(stderr, "i %d, dist %d delta %d curpos %d dsrc %d h: %d head %d\n", i, d return m; } -static void deflate_init(State *s) { - int i; - - memset(s->chain, 0, sizeof(s->chain)); - memset(s->head, 0, sizeof(s->head)); - s->bits = s->nbits = s->lastblock = 0; - for (i = 0; i < 144; i++) - fixllen[i] = 8; - for (; i < 256; i++) - fixllen[i] = 9; - for (; i < 280; i++) - fixllen[i] = 7; - for (; i < Nlitlen; i++) - fixllen[i] = 8; - for (i = 0; i < Ndist; i++) - fixdlen[i] = 5; - huffcodes(fixlcode, fixllen, Nlitlen); - huffcodes(fixdcode, fixdlen, Ndist); -} - static void newblock(State *s) { + s->dst = s->dstpos = s->dstbuf; s->rend = s->rbuf; s->run = 0; memset(s->lfreq, 0, sizeof(s->lfreq)); @@ -568,7 +560,7 @@ static void newblock(State *s) { s->lfreq[EOB]++; } -static void fillwin(State *s) { +static int fillwin(State *s) { int n, k; if (s->pos > WinSize + MaxDist) { @@ -586,94 +578,182 @@ static void fillwin(State *s) { if (n > k) n = k; if (n == 0) - return; + return FlateNeedInput; memcpy(s->win + s->pos + s->avail, s->src, n); s->src += n; s->avail += n; + return FlateOk; } /* TODO: pos overflow */ -int deflate(uchar *src, int len, uchar *dst) { - State s; +static int deflate_state(State *s) { Match m; Match prevm = {0, 0}; - int pos; + int pos = s->pos; int head; - deflate_init(&s); - newblock(&s); - s.src = src; - s.srcend = src + len; - s.dst = dst; - s.dstend = dst + len + len/4 + 10; /* TODO */ - pos = s.pos = s.blockstart = WinSize; - s.avail = 0; + + if (s->state == Block) { + if (s->lastblock && s->dstpos == s->dstbuf) /* todo.. */ + return FlateOk; + else + goto block; + } + if (s->state == FillWin) + goto filled; do { - if (s.avail < MaxMatch || s.rend - s.rbuf >= RbufSize - 2) { -/*fprintf(stderr, "pos:%d bstart:%d avail:%d rsiz:%d\n", pos, s.blockstart, s.avail, s.rend - s.rbuf);*/ - if (pos - s.blockstart > MaxDist || s.rend - s.rbuf >= RbufSize - 2) { - flushlit(&s); + /* messy.. */ + if (s->avail < MaxMatch || s->rend - s->rbuf >= RbufSize - 2) { + if (pos - s->blockstart > MaxDist || s->rend - s->rbuf >= RbufSize - 2) { + flushlit(s); if (prevm.len) { pos--; - s.avail++; + s->avail++; prevm.len = 0; } - deflate_block(&s, pos - s.blockstart); - newblock(&s); - s.blockstart = pos; +fprintf(stderr, "avail %d pos %d lastpos %d srcavail %d\n", s->avail, pos, s->pos, s->srcend - s->src); + s->state = Block; + s->pos = pos; + s->m = prevm; + deflate_block(s, pos - s->blockstart); + return FlateHasOutput; +block: + if (s->dstpos != s->dstbuf) + return FlateHasOutput; + newblock(s); + pos = s->pos; + prevm = s->m; + s->blockstart = pos; } - s.pos = pos; - fillwin(&s); - pos = s.pos; + s->pos = pos; + s->m = prevm; + s->state = FillWin; + if (fillwin(s)) + return FlateNeedInput; +/* TODO: .. */ +filled: + if (fillwin(s)) + /* tralala */; +fprintf(stderr, "avail %d pos %d srcavail %d\n", s->avail, s->pos, s->srcend - s->src); + pos = s->pos; + prevm = s->m; } - head = addtochain(&s, pos); + head = addtochain(s, pos); if (head >= pos || pos - head >= MaxDist) head = 0; if (head) - m = getmatch(&s, pos, head); + m = getmatch(s, pos, head); if (head && m.len > prevm.len) { if (prevm.len) - recordlit(&s, s.win[pos-1]); + recordlit(s, s->win[pos-1]); prevm = m; } else if (prevm.len) { - recordmatch(&s, prevm); + recordmatch(s, prevm); prevm.len -= 2; do { pos++; - s.avail--; - addtochain(&s, pos); + s->avail--; + addtochain(s, pos); } while (--prevm.len); } else - recordlit(&s, s.win[pos]); + recordlit(s, s->win[pos]); pos++; - s.avail--; - - } while (s.avail); - flushlit(&s); - s.lastblock = 1; - deflate_block(&s, pos - s.blockstart); - putbits(&s, 0, 7); - return s.dst - dst; + s->avail--; + } while (s->avail); + flushlit(s); + s->lastblock = 1; + s->state = Block; + deflate_block(s, pos - s->blockstart); + putbits(s, 0, 7); + return FlateHasOutput; +} + +static State *alloc_state(void) { + State *s = malloc(sizeof(State)); + int i; + + if (!s) + return s; + memset(s->chain, 0, sizeof(s->chain)); + memset(s->head, 0, sizeof(s->head)); + s->bits = s->nbits = s->lastblock = 0; + for (i = 0; i < 144; i++) + fixllen[i] = 8; + for (; i < 256; i++) + fixllen[i] = 9; + for (; i < 280; i++) + fixllen[i] = 7; + for (; i < Nlitlen; i++) + fixllen[i] = 8; + for (i = 0; i < Ndist; i++) + fixdlen[i] = 5; + huffcodes(fixlcode, fixllen, Nlitlen); + huffcodes(fixdcode, fixdlen, Ndist); + + s->dstpos = s->dstbuf; + s->pos = s->blockstart = WinSize; + s->avail = 0; + return s; } +int deflate(FlateStream *stream) { + State *s = stream->state; + int n, k; + + if (stream->err) { + free(s); + stream->state = 0; + return FlateError; + } + if (!s) { + s = stream->state = alloc_state(); + if (!s) + return stream->err = "no mem.", FlateError; + s->state = Block; + } + if (stream->nin) { + s->src = stream->in; + s->srcend = s->src + stream->nin; + stream->nin = 0; + } + n = deflate_state(s); + if (n == FlateHasOutput) { + k = s->dst - s->dstpos; + if (k < stream->nout) + stream->nout = k; + memcpy(stream->out, s->dstpos, stream->nout); + s->dstpos += stream->nout; + if (s->dstpos == s->dst) + s->dstpos = s->dstbuf; + } + if (n == FlateOk) { + free(s); + stream->state = 0; + } + return n; +} +/* #include <stdlib.h> #include <stdio.h> enum {Siz = 1<<24}; int main() { - uchar *src; - uchar *dst; - int len; - - src = malloc(Siz); - dst = malloc(Siz); - len = fread(src, 1, Siz, stdin); - len = deflate(src, len, dst); - fwrite(dst, 1, len, stdout); - fprintf(stderr, "compressed: %d\n", len); - free(dst); - free(src); + FlateStream s; + int n; + + s.in = malloc(Siz); + s.nin = fread(s.in, 1, Siz, stdin); + s.out = malloc(Siz); + s.nout = Siz; + s.err = 0; + s.state = 0; + n = deflate(&s); + fwrite(s.out, 1, s.nout, stdout); + fprintf(stderr, "compressed: %d return: %d\n", s.nout, n); + free(s.in); + free(s.out); return 0; } +*/ diff --git a/deflate.h b/deflate.h @@ -0,0 +1,32 @@ +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; + +/* TODO: + flatein flateout flateerr + inflate_reset, inflate_free? + */ + +/* return values */ +enum { + FlateOk = 0, + FlateError = -1, + FlateNeedInput = -2, + FlateHasOutput = -3 +}; + +typedef struct { + uchar *in; + int nin; + uchar *out; + int nout; + char *err; + void *state; +} FlateStream; + +/* state and err should be initialized to 0, to free internal state call s->err = ""; inflate(s) */ +/* decode from in to out, return if input is needed or output is available */ +int deflate(FlateStream *s); + +/* callback interface: r(buf, n, rdata) reads at most n bytes into buf */ +int deflate_callback(int (*r)(void *, int, void *), void *rdata, int (*w)(void *, int, void *), void *wdata); diff --git a/deflate_example.c b/deflate_example.c @@ -0,0 +1,70 @@ +#include <stdlib.h> +#include <stdio.h> +#include "deflate.h" + +int compress_stream(FILE *in, FILE *out) { + FlateStream s; + int n, nin, nout; + enum {Nin = 1<<12, Nout = 1<<15}; + + s.in = malloc(Nin); + s.out = malloc(Nout); + s.nout = Nout; + s.err = 0; + s.state = 0; + nin = nout = 0; + + for (n = FlateNeedInput; ; n = deflate(&s)) + switch (n) { + case FlateOk: + case FlateError: + fprintf(stderr, "in: %d out: %d err: %s\n", nin, nout, s.err); + free(s.in); + free(s.out); + return n; + case FlateNeedInput: + s.nin = fread(s.in, 1, Nin, in); + nin += s.nin; + break; + case FlateHasOutput: + if (s.nout != fwrite(s.out, 1, s.nout, out)) + s.err = "write error."; + nout += s.nout; + s.nout = Nout; + break; + } +} + +int compress_mem(void *src, int srclen, void *dst, int dstlen) { + FlateStream s; + int n; + + s.in = src; + s.nin = srclen; + s.out = dst; + s.nout = dstlen; + s.err = 0; + s.state = 0; + + while ((n = deflate(&s)) == FlateHasOutput) { + dstlen -= s.nout; + s.out += s.nout; + s.nout = dstlen; + if (dstlen < 0) + s.err = "not enough output space."; + } + if (n == FlateNeedInput) { + s.err = "input is not terminated."; + n = deflate(&s); + } + if (n == FlateOk) { + fprintf(stderr, "in: %d out: %d\n", srclen, (char *)s.out - (char *)dst); + return (char *)s.out - (char *)dst; + } + fprintf(stderr, "in: %d out: %d err: %s\n", srclen, (char *)s.out - (char *)dst, s.err); + return -1; +} + +int main(void) { + return compress_stream(stdin, stdout); +}