commit 2dee35176d25380361c699d97299b108ae2120e3
parent 1428f6315a1c5918bd4fabd2daaa1eb0145a11a7
Author: nsz@tpx <unknown>
Date: Mon, 10 Aug 2009 14:56:14 +0200
+callback, s/runlen/lz/, todo update
Diffstat:
5 files changed, 182 insertions(+), 59 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
-#CFLAGS=-g -Wall -ansi -pedantic
-CFLAGS=-O3 -Wall -ansi -pedantic
+CFLAGS=-g -Wall -ansi -pedantic
+#CFLAGS=-O3 -Wall -ansi -pedantic
LDFLAGS=
SRC=inflate.c inflate_simple.c inflate_example.c inflate_callback.c \
deflate.c deflate_simple.c deflate_example.c
@@ -38,7 +38,7 @@ profinf: inflate_example.c inflate.c
profdef: deflate_example.c deflate.c
gcc -O3 -fprofile-arcs -ftest-coverage -pg -g -Wall $+
- ./a.out <g.tar >/dev/null
+ ./a.out <q.tar >/dev/null
gcov -b $+ >/dev/null
gprof a.out >$<.gprof
rm -f a.out *.gcno *.gcda gmon.out
diff --git a/deflate.c b/deflate.c
@@ -10,12 +10,14 @@ TODO:
block end guess with lfreq, dfreq
bisect len/distbase -> lookup table
(rolling hash)
- . runlen -> lz (rename)
bounds on the compressend size
(zlib huffcode trick: if same freq then shorter code goes to the one with smaller subtree)
overlap freq/code, rbuf/dstwin ?
. better dstsize (blocklen <= 32k)
. input from in+nin instead of src+srcend
+ . setting s->state..
+ . ushort vs int
+ . memcpy vs memset
spec case tests:
empty block
@@ -35,8 +37,8 @@ enum {
MaxDist = 1 << 15, /* max match distance */
WinSize = 2*MaxDist + MaxMatch, /* input window size */
DstSize = WinSize + 8, /* output window size (worst case compressed block size) */
- RbufSize = 1 << 13, /* lz buffer size */
- RunlenFlag = 1 << 15, /* marks literal run length in lz buffer */
+ LzSize = 1 << 13, /* lz buffer size */
+ LzLitFlag = 1 << 15, /* marks literal run length in lz buffer */
CodeBits = 16, /* max number of bits in a code + 1 */
EOB = 256, /* end of block symbol */
@@ -55,7 +57,7 @@ typedef struct {
typedef struct {
ushort n;
ushort bits;
-} Runlen;
+} LzCode;
typedef struct {
int state; /* prev return value */
@@ -73,9 +75,9 @@ typedef struct {
uchar win[WinSize]; /* input window */
ushort head[HashSize]; /* position of hash chain heads */
ushort chain[MaxDist]; /* hash chain */
- Runlen rbuf[RbufSize]; /* literal run length, match len, match dist */
- Runlen *rend; /* current pos in rbuf */
- int run; /* literal count */
+ LzCode lzbuf[LzSize]; /* literal run length, match len, match dist */
+ LzCode *lz; /* current pos in lzbuf */
+ int run; /* literal run length */
int lfreq[Nlitlen];
int dfreq[Ndist];
uchar dstbuf[DstSize];
@@ -314,20 +316,20 @@ static int clencodes(uchar *codes, uchar *extra, uchar *llen, int nlit, uchar *d
static void putblock(State *s, ushort *lcode, uchar *llen, ushort *dcode, uchar *dlen) {
int n;
- Runlen *r;
+ LzCode *lz;
uchar *pc;
- for (r = s->rbuf, pc = s->win + s->startpos; r != s->rend; r++)
- if (r->bits & RunlenFlag)
- for (n = r->n; n > 0; n--, pc++)
+ for (lz = s->lzbuf, pc = s->win + s->startpos; lz != s->lz; lz++)
+ if (lz->bits & LzLitFlag)
+ for (n = lz->n; n > 0; n--, pc++)
putbits(s, lcode[*pc], llen[*pc]);
else {
- pc += lenbase[r->n] + r->bits;
- putbits(s, lcode[Nlit + r->n + 1], llen[Nlit + r->n + 1]);
- putbits(s, r->bits, lenbits[r->n]);
- r++;
- putbits(s, dcode[r->n], dlen[r->n]);
- putbits(s, r->bits, distbits[r->n]);
+ pc += lenbase[lz->n] + lz->bits;
+ putbits(s, lcode[Nlit + lz->n + 1], llen[Nlit + lz->n + 1]);
+ putbits(s, lz->bits, lenbits[lz->n]);
+ lz++;
+ putbits(s, dcode[lz->n], dlen[lz->n]);
+ putbits(s, lz->bits, distbits[lz->n]);
}
putbits(s, lcode[EOB], llen[EOB]);
}
@@ -339,7 +341,7 @@ static void deflate_block(State *s) {
ushort lcode[Nlitlen], dcode[Ndist], ccode[Nclen];
int i, c, n, ncodes;
int nlit, ndist, nclen;
- Runlen *r;
+ LzCode *lz;
uchar *pc;
int dynsize, fixsize, uncsize;
int blocklen = s->pos - s->startpos;
@@ -375,23 +377,23 @@ int dyntree;
dynsize += 7;
}
dyntree = dynsize - 3;
- for (r = s->rbuf, pc = s->win + s->startpos; r != s->rend; r++)
- if (r->bits & RunlenFlag)
- for (n = r->n; n > 0; n--, pc++) {
+ for (lz = s->lzbuf, pc = s->win + s->startpos; lz != s->lz; lz++)
+ if (lz->bits & LzLitFlag)
+ for (n = lz->n; n > 0; n--, pc++) {
fixsize += fixllen[*pc];
dynsize += llen[*pc];
}
else {
- pc += lenbase[r->n] + r->bits;
- fixsize += fixllen[Nlit + r->n + 1];
- dynsize += llen[Nlit + r->n + 1];
- fixsize += lenbits[r->n];
- dynsize += lenbits[r->n];
- r++;
- fixsize += fixdlen[r->n];
- dynsize += dlen[r->n];
- fixsize += distbits[r->n];
- dynsize += distbits[r->n];
+ pc += lenbase[lz->n] + lz->bits;
+ fixsize += fixllen[Nlit + lz->n + 1];
+ dynsize += llen[Nlit + lz->n + 1];
+ fixsize += lenbits[lz->n];
+ dynsize += lenbits[lz->n];
+ lz++;
+ fixsize += fixdlen[lz->n];
+ dynsize += dlen[lz->n];
+ fixsize += distbits[lz->n];
+ dynsize += distbits[lz->n];
}
fixsize += fixllen[EOB];
dynsize += llen[EOB];
@@ -431,7 +433,7 @@ dyntree = dynsize - 3;
memcpy(s->dst, s->win + s->startpos, blocklen);
s->dst += blocklen;
}
-fprintf(stderr, "blocklen: %d lzlen: %d dynsize: %d (tree: %d, rate: %.3f) fixsize: %d (rate: %.3f) uncsize: %d (rate: %.3f)\n", blocklen, s->rend - s->rbuf,
+fprintf(stderr, "blocklen: %d lzlen: %d dynsize: %d (tree: %d, rate: %.3f) fixsize: %d (rate: %.3f) uncsize: %d (rate: %.3f)\n", blocklen, s->lz - s->lzbuf,
dynsize, dyntree, dynsize/(float)blocklen, fixsize, fixsize/(float)blocklen, uncsize, uncsize/(float)blocklen);
}
@@ -452,9 +454,9 @@ static int bisect(ushort *base, int len, int n) {
static void flushlit(State *s) {
if (s->run) {
- s->rend->bits = RunlenFlag;
- s->rend->n = s->run;
- s->rend++;
+ s->lz->bits = LzLitFlag;
+ s->lz->n = s->run;
+ s->lz++;
s->run = 0;
}
}
@@ -464,14 +466,14 @@ static void recordmatch(State *s, Match m) {
flushlit(s);
n = bisect(lenbase, Nlen, m.len);
- s->rend->n = n;
- s->rend->bits = m.len - lenbase[n];
- s->rend++;
+ s->lz->n = n;
+ s->lz->bits = m.len - lenbase[n];
+ s->lz++;
s->lfreq[Nlit + n + 1]++;
n = bisect(distbase, Ndist, m.dist);
- s->rend->n = n;
- s->rend->bits = m.dist - distbase[n];
- s->rend++;
+ s->lz->n = n;
+ s->lz->bits = m.dist - distbase[n];
+ s->lz++;
s->dfreq[n]++;
}
@@ -481,19 +483,19 @@ static void recordlit(State *s, int c) {
}
/* multiplicative hash */
-static uint gethash(uchar *p) {
+static ushort gethash(uchar *p) {
return (0x9e3779b1 * ((p[0]<<16) + (p[1]<<8) + p[2]) >> (32 - HashBits)) % HashSize;
}
-static ushort addtochain(State *s, ushort pos) {
+static ushort addtochain(State *s) {
ushort i, h;
if (s->avail < MinMatch) /* TODO */
return 0;
- h = gethash(s->win + pos);
+ h = gethash(s->win + s->pos);
i = s->head[h];
- s->head[h] = pos;
- s->chain[pos % MaxDist] = i;
+ s->head[h] = s->pos;
+ s->chain[s->pos % MaxDist] = i;
return i;
}
@@ -538,7 +540,7 @@ static Match getmatch(State *s, int mpos) {
static void newblock(State *s) {
s->startpos = s->pos;
s->dst = s->dstbegin = s->dstbuf;
- s->rend = s->rbuf;
+ s->lz = s->lzbuf;
s->run = 0;
memset(s->lfreq, 0, sizeof(s->lfreq));
memset(s->dfreq, 0, sizeof(s->dfreq));
@@ -550,7 +552,8 @@ static int fillwin(State *s) {
if (s->pos >= 2*MaxDist) {
/* shift win */
- memcpy(s->win, s->win + MaxDist, MaxDist + MaxMatch);
+ memcpy(s->win, s->win + MaxDist, MaxDist);
+ memcpy(s->win + MaxDist, s->win + 2*MaxDist, MaxMatch);
for (n = HashSize; n--;)
s->head[n] = s->head[n] > MaxDist ? s->head[n] - MaxDist : 0;
for (n = MaxDist; n--;)
@@ -592,7 +595,7 @@ static int deflate_state(State *s) {
else
return s->state;
for (;;) {
- head = addtochain(s, s->pos);
+ head = addtochain(s);
if (head >= s->pos || s->pos - head >= MaxDist)
head = 0;
if (head)
@@ -608,7 +611,7 @@ static int deflate_state(State *s) {
do {
s->pos++;
s->avail--;
- addtochain(s, s->pos);
+ addtochain(s);
} while (--s->prevm.len);
} else
recordlit(s, s->win[s->pos]);
@@ -621,8 +624,8 @@ static int deflate_state(State *s) {
putbits(s, 0, 7);
return FlateOut;
}
- if (s->pos - s->startpos >= MaxDist || s->rend - s->rbuf > RbufSize - 3) {
-fprintf(stderr, "start %d pos %d avail %d srcavail %d rlen %d\n", s->startpos, s->pos, s->avail, s->srcend - s->src, s->rend - s->rbuf);
+ if (s->pos - s->startpos >= MaxDist || s->lz - s->lzbuf > LzSize - 3) {
+fprintf(stderr, "start %d pos %d avail %d srcavail %d rlen %d\n", s->startpos, s->pos, s->avail, s->srcend - s->src, s->lz - s->lzbuf);
flushlit(s);
if (s->prevm.len)
s->pos--;
@@ -658,6 +661,7 @@ static State *alloc_state(void) {
s->dst = s->dstbegin = s->dstbuf;
s->pos = s->startpos = MaxDist;
s->avail = 0;
+ s->prevm.len = 0;
return s;
}
@@ -695,3 +699,43 @@ int deflate(FlateStream *stream) {
}
return n;
}
+
+int deflate_callback(int (*r)(void *, int, void *), void *rdata, int (*w)(void *, int, void *), void *wdata) {
+ State *s;
+ int len, n;
+ enum {SrcSize = 1 << 12};
+
+ s = alloc_state();
+ if (!s)
+ return FlateErr;
+ s->src = s->srcend = malloc(SrcSize);
+ if (!s->src) {
+ free(s);
+ return FlateErr;
+ }
+ s->state = FlateOut;
+ for (;;) {
+ n = s->state = deflate_state(s);
+ switch (n) {
+ case FlateIn:
+ len = r(s->src, SrcSize, rdata);
+ if (len > 0)
+ s->srcend = s->src + len;
+ else
+ s->state = FlateErr;
+ break;
+ case FlateOut:
+ len = w(s->dstbegin, s->dst - s->dstbegin, wdata);
+ if (len == s->dst - s->dstbegin)
+ s->dstbegin = s->dst;
+ else
+ s->state = FlateErr;
+ break;
+ case FlateOk:
+ case FlateErr:
+ free(s->src);
+ free(s);
+ return n;
+ }
+ }
+}
diff --git a/deflate_example.c b/deflate_example.c
@@ -2,6 +2,7 @@
#include <stdio.h>
#include "flate.h"
+/* compress stream using stream struct interface */
int compress_stream(FILE *in, FILE *out) {
FlateStream s;
int n, nin, nout;
@@ -35,6 +36,44 @@ int compress_stream(FILE *in, FILE *out) {
}
}
+/* compress stream using callback interface */
+struct data {
+ FILE *f;
+ uint n;
+};
+
+int r(void *p, int siz, void *data) {
+ struct data *d = data;
+ uint n;
+
+ n = fread(p, 1, siz, d->f);
+ d->n += n;
+ return n;
+}
+
+int w(void *p, int siz, void *data) {
+ struct data *d = data;
+ uint n;
+
+ n = fwrite(p, 1, siz, d->f);
+ d->n += n;
+ return n;
+}
+
+int compress_callback(FILE *in, FILE *out) {
+ struct data rr;
+ struct data ww;
+ int err;
+
+ rr.f = in;
+ ww.f = out;
+ rr.n = ww.n = 0;
+ err = deflate_callback(r, &rr, w, &ww);
+ fprintf(stderr, "in: %d out: %d err: %d\n", rr.n, ww.n, err);
+ return err;
+}
+
+/* in memory compression */
int compress_mem(void *src, int srclen, void *dst, int dstlen) {
FlateStream s;
int n;
diff --git a/inflate.c b/inflate.c
@@ -648,7 +648,7 @@ int inflate_callback(int (*r)(void *, int, void *), void *rdata, int (*w)(void *
if (!s)
return FlateErr;
s->src = s->srcend = src = malloc(SrcSize);
- if (!src) {
+ if (!s->src) {
free(s);
return FlateErr;
}
@@ -656,9 +656,10 @@ int inflate_callback(int (*r)(void *, int, void *), void *rdata, int (*w)(void *
for (;;)
switch (n) {
case FlateIn:
- len = r(src, SrcSize, rdata);
+ s->src = src;
+ len = r(s->src, SrcSize, rdata);
if (len > 0) {
- s->srcend = src + len;
+ s->srcend = s->src + len;
n = inflate_state(s);
} else
n = FlateErr;
diff --git a/inflate_example.c b/inflate_example.c
@@ -2,6 +2,7 @@
#include <stdio.h>
#include "flate.h"
+/* decompress stream using stream struct interface */
int decompress_stream(FILE *in, FILE *out) {
FlateStream s;
int n, nin, nout;
@@ -37,6 +38,44 @@ int decompress_stream(FILE *in, FILE *out) {
}
}
+/* decompress stream using callback interface */
+struct data {
+ FILE *f;
+ uint n;
+};
+
+int r(void *p, int siz, void *data) {
+ struct data *d = data;
+ uint n;
+
+ n = fread(p, 1, siz, d->f);
+ d->n += n;
+ return n;
+}
+
+int w(void *p, int siz, void *data) {
+ struct data *d = data;
+ uint n;
+
+ n = fwrite(p, 1, siz, d->f);
+ d->n += n;
+ return n;
+}
+
+int decompress_callback(FILE *in, FILE *out) {
+ struct data rr;
+ struct data ww;
+ int err;
+
+ rr.f = in;
+ ww.f = out;
+ rr.n = ww.n = 0;
+ err = inflate_callback(r, &rr, w, &ww);
+ fprintf(stderr, "in: %d out: %d err: %d\n", rr.n, ww.n, err);
+ return err;
+}
+
+/* in memory decompression */
int decompress_mem(void *src, int srclen, void *dst, int dstlen) {
FlateStream s;
int n;
@@ -68,5 +107,5 @@ int decompress_mem(void *src, int srclen, void *dst, int dstlen) {
}
int main(void) {
- return decompress_stream(stdin, stdout);
+ return decompress_callback(stdin, stdout);
}