flate

deflate implementation
git clone git://git.suckless.org/flate
Log | Files | Refs | README

commit 2dee35176d25380361c699d97299b108ae2120e3
parent 1428f6315a1c5918bd4fabd2daaa1eb0145a11a7
Author: nsz@tpx <unknown>
Date:   Mon, 10 Aug 2009 14:56:14 +0200

+callback, s/runlen/lz/, todo update
Diffstat:
Makefile | 6+++---
deflate.c | 148+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
deflate_example.c | 39+++++++++++++++++++++++++++++++++++++++
inflate.c | 7++++---
inflate_example.c | 41++++++++++++++++++++++++++++++++++++++++-
5 files changed, 182 insertions(+), 59 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,5 +1,5 @@ -#CFLAGS=-g -Wall -ansi -pedantic -CFLAGS=-O3 -Wall -ansi -pedantic +CFLAGS=-g -Wall -ansi -pedantic +#CFLAGS=-O3 -Wall -ansi -pedantic LDFLAGS= SRC=inflate.c inflate_simple.c inflate_example.c inflate_callback.c \ deflate.c deflate_simple.c deflate_example.c @@ -38,7 +38,7 @@ profinf: inflate_example.c inflate.c profdef: deflate_example.c deflate.c gcc -O3 -fprofile-arcs -ftest-coverage -pg -g -Wall $+ - ./a.out <g.tar >/dev/null + ./a.out <q.tar >/dev/null gcov -b $+ >/dev/null gprof a.out >$<.gprof rm -f a.out *.gcno *.gcda gmon.out diff --git a/deflate.c b/deflate.c @@ -10,12 +10,14 @@ TODO: block end guess with lfreq, dfreq bisect len/distbase -> lookup table (rolling hash) - . runlen -> lz (rename) bounds on the compressend size (zlib huffcode trick: if same freq then shorter code goes to the one with smaller subtree) overlap freq/code, rbuf/dstwin ? . better dstsize (blocklen <= 32k) . input from in+nin instead of src+srcend + . setting s->state.. + . ushort vs int + . memcpy vs memset spec case tests: empty block @@ -35,8 +37,8 @@ enum { MaxDist = 1 << 15, /* max match distance */ WinSize = 2*MaxDist + MaxMatch, /* input window size */ DstSize = WinSize + 8, /* output window size (worst case compressed block size) */ - RbufSize = 1 << 13, /* lz buffer size */ - RunlenFlag = 1 << 15, /* marks literal run length in lz buffer */ + LzSize = 1 << 13, /* lz buffer size */ + LzLitFlag = 1 << 15, /* marks literal run length in lz buffer */ CodeBits = 16, /* max number of bits in a code + 1 */ EOB = 256, /* end of block symbol */ @@ -55,7 +57,7 @@ typedef struct { typedef struct { ushort n; ushort bits; -} Runlen; +} LzCode; typedef struct { int state; /* prev return value */ @@ -73,9 +75,9 @@ typedef struct { uchar win[WinSize]; /* input window */ ushort head[HashSize]; /* position of hash chain heads */ ushort chain[MaxDist]; /* hash chain */ - Runlen rbuf[RbufSize]; /* literal run length, match len, match dist */ - Runlen *rend; /* current pos in rbuf */ - int run; /* literal count */ + LzCode lzbuf[LzSize]; /* literal run length, match len, match dist */ + LzCode *lz; /* current pos in lzbuf */ + int run; /* literal run length */ int lfreq[Nlitlen]; int dfreq[Ndist]; uchar dstbuf[DstSize]; @@ -314,20 +316,20 @@ static int clencodes(uchar *codes, uchar *extra, uchar *llen, int nlit, uchar *d static void putblock(State *s, ushort *lcode, uchar *llen, ushort *dcode, uchar *dlen) { int n; - Runlen *r; + LzCode *lz; uchar *pc; - for (r = s->rbuf, pc = s->win + s->startpos; r != s->rend; r++) - if (r->bits & RunlenFlag) - for (n = r->n; n > 0; n--, pc++) + for (lz = s->lzbuf, pc = s->win + s->startpos; lz != s->lz; lz++) + if (lz->bits & LzLitFlag) + for (n = lz->n; n > 0; n--, pc++) putbits(s, lcode[*pc], llen[*pc]); else { - pc += lenbase[r->n] + r->bits; - putbits(s, lcode[Nlit + r->n + 1], llen[Nlit + r->n + 1]); - putbits(s, r->bits, lenbits[r->n]); - r++; - putbits(s, dcode[r->n], dlen[r->n]); - putbits(s, r->bits, distbits[r->n]); + pc += lenbase[lz->n] + lz->bits; + putbits(s, lcode[Nlit + lz->n + 1], llen[Nlit + lz->n + 1]); + putbits(s, lz->bits, lenbits[lz->n]); + lz++; + putbits(s, dcode[lz->n], dlen[lz->n]); + putbits(s, lz->bits, distbits[lz->n]); } putbits(s, lcode[EOB], llen[EOB]); } @@ -339,7 +341,7 @@ static void deflate_block(State *s) { ushort lcode[Nlitlen], dcode[Ndist], ccode[Nclen]; int i, c, n, ncodes; int nlit, ndist, nclen; - Runlen *r; + LzCode *lz; uchar *pc; int dynsize, fixsize, uncsize; int blocklen = s->pos - s->startpos; @@ -375,23 +377,23 @@ int dyntree; dynsize += 7; } dyntree = dynsize - 3; - for (r = s->rbuf, pc = s->win + s->startpos; r != s->rend; r++) - if (r->bits & RunlenFlag) - for (n = r->n; n > 0; n--, pc++) { + for (lz = s->lzbuf, pc = s->win + s->startpos; lz != s->lz; lz++) + if (lz->bits & LzLitFlag) + for (n = lz->n; n > 0; n--, pc++) { fixsize += fixllen[*pc]; dynsize += llen[*pc]; } else { - pc += lenbase[r->n] + r->bits; - fixsize += fixllen[Nlit + r->n + 1]; - dynsize += llen[Nlit + r->n + 1]; - fixsize += lenbits[r->n]; - dynsize += lenbits[r->n]; - r++; - fixsize += fixdlen[r->n]; - dynsize += dlen[r->n]; - fixsize += distbits[r->n]; - dynsize += distbits[r->n]; + pc += lenbase[lz->n] + lz->bits; + fixsize += fixllen[Nlit + lz->n + 1]; + dynsize += llen[Nlit + lz->n + 1]; + fixsize += lenbits[lz->n]; + dynsize += lenbits[lz->n]; + lz++; + fixsize += fixdlen[lz->n]; + dynsize += dlen[lz->n]; + fixsize += distbits[lz->n]; + dynsize += distbits[lz->n]; } fixsize += fixllen[EOB]; dynsize += llen[EOB]; @@ -431,7 +433,7 @@ dyntree = dynsize - 3; memcpy(s->dst, s->win + s->startpos, blocklen); s->dst += blocklen; } -fprintf(stderr, "blocklen: %d lzlen: %d dynsize: %d (tree: %d, rate: %.3f) fixsize: %d (rate: %.3f) uncsize: %d (rate: %.3f)\n", blocklen, s->rend - s->rbuf, +fprintf(stderr, "blocklen: %d lzlen: %d dynsize: %d (tree: %d, rate: %.3f) fixsize: %d (rate: %.3f) uncsize: %d (rate: %.3f)\n", blocklen, s->lz - s->lzbuf, dynsize, dyntree, dynsize/(float)blocklen, fixsize, fixsize/(float)blocklen, uncsize, uncsize/(float)blocklen); } @@ -452,9 +454,9 @@ static int bisect(ushort *base, int len, int n) { static void flushlit(State *s) { if (s->run) { - s->rend->bits = RunlenFlag; - s->rend->n = s->run; - s->rend++; + s->lz->bits = LzLitFlag; + s->lz->n = s->run; + s->lz++; s->run = 0; } } @@ -464,14 +466,14 @@ static void recordmatch(State *s, Match m) { flushlit(s); n = bisect(lenbase, Nlen, m.len); - s->rend->n = n; - s->rend->bits = m.len - lenbase[n]; - s->rend++; + s->lz->n = n; + s->lz->bits = m.len - lenbase[n]; + s->lz++; s->lfreq[Nlit + n + 1]++; n = bisect(distbase, Ndist, m.dist); - s->rend->n = n; - s->rend->bits = m.dist - distbase[n]; - s->rend++; + s->lz->n = n; + s->lz->bits = m.dist - distbase[n]; + s->lz++; s->dfreq[n]++; } @@ -481,19 +483,19 @@ static void recordlit(State *s, int c) { } /* multiplicative hash */ -static uint gethash(uchar *p) { +static ushort gethash(uchar *p) { return (0x9e3779b1 * ((p[0]<<16) + (p[1]<<8) + p[2]) >> (32 - HashBits)) % HashSize; } -static ushort addtochain(State *s, ushort pos) { +static ushort addtochain(State *s) { ushort i, h; if (s->avail < MinMatch) /* TODO */ return 0; - h = gethash(s->win + pos); + h = gethash(s->win + s->pos); i = s->head[h]; - s->head[h] = pos; - s->chain[pos % MaxDist] = i; + s->head[h] = s->pos; + s->chain[s->pos % MaxDist] = i; return i; } @@ -538,7 +540,7 @@ static Match getmatch(State *s, int mpos) { static void newblock(State *s) { s->startpos = s->pos; s->dst = s->dstbegin = s->dstbuf; - s->rend = s->rbuf; + s->lz = s->lzbuf; s->run = 0; memset(s->lfreq, 0, sizeof(s->lfreq)); memset(s->dfreq, 0, sizeof(s->dfreq)); @@ -550,7 +552,8 @@ static int fillwin(State *s) { if (s->pos >= 2*MaxDist) { /* shift win */ - memcpy(s->win, s->win + MaxDist, MaxDist + MaxMatch); + memcpy(s->win, s->win + MaxDist, MaxDist); + memcpy(s->win + MaxDist, s->win + 2*MaxDist, MaxMatch); for (n = HashSize; n--;) s->head[n] = s->head[n] > MaxDist ? s->head[n] - MaxDist : 0; for (n = MaxDist; n--;) @@ -592,7 +595,7 @@ static int deflate_state(State *s) { else return s->state; for (;;) { - head = addtochain(s, s->pos); + head = addtochain(s); if (head >= s->pos || s->pos - head >= MaxDist) head = 0; if (head) @@ -608,7 +611,7 @@ static int deflate_state(State *s) { do { s->pos++; s->avail--; - addtochain(s, s->pos); + addtochain(s); } while (--s->prevm.len); } else recordlit(s, s->win[s->pos]); @@ -621,8 +624,8 @@ static int deflate_state(State *s) { putbits(s, 0, 7); return FlateOut; } - if (s->pos - s->startpos >= MaxDist || s->rend - s->rbuf > RbufSize - 3) { -fprintf(stderr, "start %d pos %d avail %d srcavail %d rlen %d\n", s->startpos, s->pos, s->avail, s->srcend - s->src, s->rend - s->rbuf); + if (s->pos - s->startpos >= MaxDist || s->lz - s->lzbuf > LzSize - 3) { +fprintf(stderr, "start %d pos %d avail %d srcavail %d rlen %d\n", s->startpos, s->pos, s->avail, s->srcend - s->src, s->lz - s->lzbuf); flushlit(s); if (s->prevm.len) s->pos--; @@ -658,6 +661,7 @@ static State *alloc_state(void) { s->dst = s->dstbegin = s->dstbuf; s->pos = s->startpos = MaxDist; s->avail = 0; + s->prevm.len = 0; return s; } @@ -695,3 +699,43 @@ int deflate(FlateStream *stream) { } return n; } + +int deflate_callback(int (*r)(void *, int, void *), void *rdata, int (*w)(void *, int, void *), void *wdata) { + State *s; + int len, n; + enum {SrcSize = 1 << 12}; + + s = alloc_state(); + if (!s) + return FlateErr; + s->src = s->srcend = malloc(SrcSize); + if (!s->src) { + free(s); + return FlateErr; + } + s->state = FlateOut; + for (;;) { + n = s->state = deflate_state(s); + switch (n) { + case FlateIn: + len = r(s->src, SrcSize, rdata); + if (len > 0) + s->srcend = s->src + len; + else + s->state = FlateErr; + break; + case FlateOut: + len = w(s->dstbegin, s->dst - s->dstbegin, wdata); + if (len == s->dst - s->dstbegin) + s->dstbegin = s->dst; + else + s->state = FlateErr; + break; + case FlateOk: + case FlateErr: + free(s->src); + free(s); + return n; + } + } +} diff --git a/deflate_example.c b/deflate_example.c @@ -2,6 +2,7 @@ #include <stdio.h> #include "flate.h" +/* compress stream using stream struct interface */ int compress_stream(FILE *in, FILE *out) { FlateStream s; int n, nin, nout; @@ -35,6 +36,44 @@ int compress_stream(FILE *in, FILE *out) { } } +/* compress stream using callback interface */ +struct data { + FILE *f; + uint n; +}; + +int r(void *p, int siz, void *data) { + struct data *d = data; + uint n; + + n = fread(p, 1, siz, d->f); + d->n += n; + return n; +} + +int w(void *p, int siz, void *data) { + struct data *d = data; + uint n; + + n = fwrite(p, 1, siz, d->f); + d->n += n; + return n; +} + +int compress_callback(FILE *in, FILE *out) { + struct data rr; + struct data ww; + int err; + + rr.f = in; + ww.f = out; + rr.n = ww.n = 0; + err = deflate_callback(r, &rr, w, &ww); + fprintf(stderr, "in: %d out: %d err: %d\n", rr.n, ww.n, err); + return err; +} + +/* in memory compression */ int compress_mem(void *src, int srclen, void *dst, int dstlen) { FlateStream s; int n; diff --git a/inflate.c b/inflate.c @@ -648,7 +648,7 @@ int inflate_callback(int (*r)(void *, int, void *), void *rdata, int (*w)(void * if (!s) return FlateErr; s->src = s->srcend = src = malloc(SrcSize); - if (!src) { + if (!s->src) { free(s); return FlateErr; } @@ -656,9 +656,10 @@ int inflate_callback(int (*r)(void *, int, void *), void *rdata, int (*w)(void * for (;;) switch (n) { case FlateIn: - len = r(src, SrcSize, rdata); + s->src = src; + len = r(s->src, SrcSize, rdata); if (len > 0) { - s->srcend = src + len; + s->srcend = s->src + len; n = inflate_state(s); } else n = FlateErr; diff --git a/inflate_example.c b/inflate_example.c @@ -2,6 +2,7 @@ #include <stdio.h> #include "flate.h" +/* decompress stream using stream struct interface */ int decompress_stream(FILE *in, FILE *out) { FlateStream s; int n, nin, nout; @@ -37,6 +38,44 @@ int decompress_stream(FILE *in, FILE *out) { } } +/* decompress stream using callback interface */ +struct data { + FILE *f; + uint n; +}; + +int r(void *p, int siz, void *data) { + struct data *d = data; + uint n; + + n = fread(p, 1, siz, d->f); + d->n += n; + return n; +} + +int w(void *p, int siz, void *data) { + struct data *d = data; + uint n; + + n = fwrite(p, 1, siz, d->f); + d->n += n; + return n; +} + +int decompress_callback(FILE *in, FILE *out) { + struct data rr; + struct data ww; + int err; + + rr.f = in; + ww.f = out; + rr.n = ww.n = 0; + err = inflate_callback(r, &rr, w, &ww); + fprintf(stderr, "in: %d out: %d err: %d\n", rr.n, ww.n, err); + return err; +} + +/* in memory decompression */ int decompress_mem(void *src, int srclen, void *dst, int dstlen) { FlateStream s; int n; @@ -68,5 +107,5 @@ int decompress_mem(void *src, int srclen, void *dst, int dstlen) { } int main(void) { - return decompress_stream(stdin, stdout); + return decompress_callback(stdin, stdout); }