commit 27c7698383f93c1358c170b6201f3b1d032a483d
parent 11d9b69b27faa3b4cdc3dff28364871582c8ee53
Author: nsz <nszabolcs@gmail.com>
Date: Sat, 18 Jul 2009 14:18:32 +0200
renames, code clean up
Diffstat:
deflate_simple.c | | | 260 | ++++++++++++++++++++++++++++++++++++++++--------------------------------------- |
1 file changed, 131 insertions(+), 129 deletions(-)
diff --git a/deflate_simple.c b/deflate_simple.c
@@ -7,6 +7,10 @@ TODO:
hufflen: parent+heap only
match: check backwards
block end: lcount, dcount
+ bisect len/dist base -> lookup table
+ don't store lz extra nbits: lenbits[lensym]
+ xcode can be calculated into array xfreq
+ stop after good match
*/
typedef unsigned char uchar;
@@ -16,15 +20,17 @@ typedef unsigned int uint;
enum {
MinMatch = 3,
MaxMatch = 258,
+ BigDist = 1 << 12,
MaxChainStep = 64,
- HashSize = 2039,
+ HashBits = 12,
+ HashSize = 1 << HashBits,
WinSize = 1 << 15,
+ BlockLen = 1 << 15,
+ RunlenSize = (BlockLen + 2 * MaxMatch) * 3/4,
+ RunlenFlag = 1 << 30,
- LzRunlen = 1<<30
-};
-
-enum {
CodeBits = 16, /* max number of bits in a code + 1 */
+ EOB = 256, /* end of block symbol */
Nlit = 256, /* number of lit codes */
Nlen = 29, /* number of len codes */
Nlitlen = Nlit + Nlen + 3, /* litlen codes + block end + 2 unused */
@@ -41,30 +47,22 @@ typedef struct {
ushort head[HashSize]; /* position of hash chain heads */
ushort chain[WinSize]; /* hash chains */
int pos; /* position in hash chain */
-
- uchar *src;
- int lzbuf[WinSize]; /* literal runlength and (len,dist) sequence */
- int *lzp;
- int runlen;
-
- int lfreq[Nlitlen];
- int dfreq[Ndist];
-/*
- int lcount;
- int dcount;
-*/
-
+ uchar *src; /* input (block start) */
uchar *dst; /* compressed output */
uint bits;
int nbits;
-
+ int rbuf[RunlenSize]; /* literal run length and (len,dist) */
+ int *rend; /* current pos in rbuf */
+ int run; /* literal count */
+ int lfreq[Nlitlen];
+ int dfreq[Ndist];
int lastblock;
} State;
-static uchar fixed_llen[Nlitlen]; /* fixed lit/len huffman code tree */
-static ushort fixed_lcode[Nlitlen];
-static uchar fixed_dlen[Ndist]; /* fixed distance huffman code tree */
-static ushort fixed_dcode[Ndist];
+static uchar fixllen[Nlitlen]; /* fixed lit/len huffman code tree */
+static ushort fixlcode[Nlitlen];
+static uchar fixdlen[Ndist]; /* fixed distance huffman code tree */
+static ushort fixdcode[Ndist];
/* base offset and extra bits tables */
static uchar lenbits[Nlen] = {
@@ -294,30 +292,30 @@ static int clencodes(int *codes, int *extra, uchar *llen, int nlit, uchar *dlen,
}
static void putblock(State *s, ushort *lcode, uchar *llen, ushort *dcode, uchar *dlen) {
- int c, lit;
- int *p;
- uchar *plit;
-
- for (p = s->lzbuf, plit = s->src; p != s->lzp;) {
- c = *p++;
- if (c >= LzRunlen) {
- c %= LzRunlen;
- while (c--) {
- putbits(s, lcode[*plit], llen[*plit]);
- plit++;
+ int n, k;
+ int *pn;
+ uchar *pc;
+
+ for (pn = s->rbuf, pc = s->src; pn != s->rend;) {
+ n = *pn++;
+ if (n & RunlenFlag) {
+ n %= RunlenFlag;
+ while (n--) {
+ putbits(s, lcode[*pc], llen[*pc]);
+ pc++;
}
} else {
- lit = (c & 0xff) + Nlit + 1;
- putbits(s, lcode[lit], llen[lit]); /* TODO: lenbits[] */
- putbits(s, c >> 16, (c >> 8) & 0xff);
- plit += lenbase[c & 0xff] + (c >> 16);
- c = *p++;
- putbits(s, dcode[c & 0xff], dlen[c & 0xff]);
- putbits(s, c >> 16, (c >> 8) & 0xff);
+ pc += lenbase[n >> 16] + (n & 0xffff);
+ k = n >> 16;
+ putbits(s, lcode[Nlit + k + 1], llen[Nlit + k + 1]);
+ putbits(s, n & 0xffff, lenbits[k]);
+ n = *pn++;
+ k = n >> 16;
+ putbits(s, dcode[k], dlen[k]);
+ putbits(s, n & 0xffff, distbits[k]);
}
}
- /* EOB */
- putbits(s, lcode[256], llen[256]);
+ putbits(s, lcode[EOB], llen[EOB]);
}
static void deflate_block(State *s, int blocklen) {
@@ -325,33 +323,33 @@ static void deflate_block(State *s, int blocklen) {
uchar llen[Nlitlen], dlen[Ndist], clen[Nclen];
ushort lcode[Nlitlen], dcode[Ndist], ccode[Nclen];
int codes[Nlitlen + Ndist], extra[Nlitlen + Ndist];
- int i, c, nc;
+ int i, c, n, ncodes;
int nlit, ndist, nclen;
int dynsize, fixsize, uncsize;
- int *p;
- uchar *plit;
+ int *pn;
+ uchar *pc;
int dyntree;
+ /* calc dynamic codes */
hufflens(llen, s->lfreq, Nlitlen, CodeBits-1);
hufflens(dlen, s->dfreq, Ndist, CodeBits-1);
huffcodes(lcode, llen, Nlitlen);
huffcodes(dcode, dlen, Ndist);
for (nlit = Nlitlen; nlit > Nlit && llen[nlit-1] == 0; nlit--);
for (ndist = Ndist; ndist > 1 && dlen[ndist-1] == 0; ndist--);
- nc = clencodes(codes, extra, llen, nlit, dlen, ndist);
+ ncodes = clencodes(codes, extra, llen, nlit, dlen, ndist);
memset(cfreq, 0, sizeof(cfreq));
- for (i = 0; i < nc; i++)
+ for (i = 0; i < ncodes; i++)
cfreq[codes[i]]++;
hufflens(clen, cfreq, Nclen, 7);
huffcodes(ccode, clen, Nclen);
for (nclen = Nclen; nclen > 4 && clen[clenorder[nclen-1]] == 0; nclen--);
/* calc size */
- uncsize = 3 + 16 + 8*blocklen + (16 - 3 - s->nbits)%8; /* byte aligned */
+ uncsize = 3 + 16 + 8 * blocklen + (16 - 3 - s->nbits) % 8; /* byte aligned */
fixsize = 3;
- dynsize = 3 + 5 + 5 + 4;
- dynsize += 3 * nclen;
- for (i = 0; i < nc; i++) {
+ dynsize = 3 + 5 + 5 + 4 + 3 * nclen;
+ for (i = 0; i < ncodes; i++) {
c = codes[i];
dynsize += clen[c];
if (c == 16)
@@ -362,31 +360,32 @@ int dyntree;
dynsize += 7;
}
dyntree = dynsize - 3;
-/* TODO: */
- for (p = s->lzbuf, plit = s->src; p != s->lzp;) {
- c = *p++;
- if (c >= LzRunlen) {
- c %= LzRunlen;
- while (c--) {
- fixsize += fixed_llen[*plit];
- dynsize += llen[*plit++];
+ for (pn = s->rbuf, pc = s->src; pn != s->rend;) {
+ n = *pn++;
+ if (n & RunlenFlag) {
+ n %= RunlenFlag;
+ while (n--) {
+ fixsize += fixllen[*pc];
+ dynsize += llen[*pc++];
}
} else {
- fixsize += fixed_llen[(c & 0xff) + Nlit + 1];
- fixsize += (c >> 8) & 0xff;
- dynsize += llen[(c & 0xff) + Nlit + 1];
- dynsize += (c >> 8) & 0xff;
- plit += lenbase[c & 0xff] + (c >> 16);
- c = *p++;
- fixsize += fixed_dlen[c & 0xff];
- fixsize += (c >> 8) & 0xff;
- dynsize += dlen[c & 0xff];
- dynsize += (c >> 8) & 0xff;
+ pc += lenbase[n >> 16] + (n & 0xffff);
+ n >>= 16;
+ fixsize += fixllen[Nlit + n + 1];
+ dynsize += llen[Nlit + n + 1];
+ fixsize += lenbits[n];
+ dynsize += lenbits[n];
+ n = *pn++ >> 16;
+ fixsize += fixdlen[n];
+ dynsize += dlen[n];
+ fixsize += distbits[n];
+ dynsize += distbits[n];
}
}
- fixsize += fixed_llen[256];
- dynsize += llen[256];
+ fixsize += fixllen[EOB];
+ dynsize += llen[EOB];
+ /* emit block */
putbits(s, s->lastblock, 1);
if (dynsize < fixsize && dynsize < uncsize) {
/* dynamic code */
@@ -394,10 +393,9 @@ dyntree = dynsize - 3;
putbits(s, nlit - 257, 5);
putbits(s, ndist - 1, 5);
putbits(s, nclen - 4, 4);
- /* huffman code trees */
for (i = 0; i < nclen; i++)
putbits(s, clen[clenorder[i]], 3);
- for (i = 0; i < nc; i++) {
+ for (i = 0; i < ncodes; i++) {
c = codes[i];
putbits(s, ccode[c], clen[c]);
if (c == 16)
@@ -407,12 +405,11 @@ dyntree = dynsize - 3;
if (c == 18)
putbits(s, extra[i], 7);
}
- /* block data + eob */
putblock(s, lcode, llen, dcode, dlen);
} else if (fixsize < uncsize) {
/* fixed code */
putbits(s, 1, 2);
- putblock(s, fixed_lcode, fixed_llen, fixed_dcode, fixed_dlen);
+ putblock(s, fixlcode, fixllen, fixdcode, fixdlen);
} else {
/* uncompressed */
putbits(s, 0, 2);
@@ -423,16 +420,10 @@ dyntree = dynsize - 3;
memcpy(s->dst, s->src, blocklen);
s->dst += blocklen;
}
-fprintf(stderr, "blocklen: %d lzlen: %d dynsize: %d (tree: %d, rate: %.3f) fixsize: %d (rate: %.3f) uncsize: %d (rate: %.3f)\n", blocklen, s->lzp - s->lzbuf,
+fprintf(stderr, "blocklen: %d lzlen: %d dynsize: %d (tree: %d, rate: %.3f) fixsize: %d (rate: %.3f) uncsize: %d (rate: %.3f)\n", blocklen, s->rend - s->rbuf,
dynsize, dyntree, dynsize/(float)blocklen, fixsize, fixsize/(float)blocklen, uncsize, uncsize/(float)blocklen);
}
-
-static void recordlit(State *s, ushort lit) {
- s->runlen++;
- s->lfreq[lit]++;
-}
-
static int bisect(ushort *base, int len, int n) {
int lo = 0;
int hi = len;
@@ -449,23 +440,43 @@ static int bisect(ushort *base, int len, int n) {
}
static void recordmatch(State *s, Match m) {
- int i;
+ int n;
- if (s->runlen) {
- *s->lzp++ = s->runlen | LzRunlen;
- s->runlen = 0;
+ if (s->run) {
+ *s->rend++ = s->run | RunlenFlag;
+ s->run = 0;
}
- i = bisect(lenbase, Nlen, m.len);
- *s->lzp++ = i | lenbits[i] << 8 | (m.len - lenbase[i]) << 16;
- s->lfreq[Nlit + i + 1]++; /* TODO: no +1, lextra, dextra */
- i = bisect(distbase, Ndist, m.dist);
- *s->lzp++ = i | distbits[i] << 8 | (m.dist - distbase[i]) << 16;
- s->dfreq[i]++;
+ n = bisect(lenbase, Nlen, m.len);
+ *s->rend++ = n << 16 | (m.len - lenbase[n]);
+ s->lfreq[Nlit + n + 1]++;
+ n = bisect(distbase, Ndist, m.dist);
+ *s->rend++ = n << 16 | (m.dist - distbase[n]);
+ s->dfreq[n]++;
+}
+
+static void recordlit(State *s, int c) {
+ s->run++;
+ s->lfreq[c]++;
+}
+
+/* 32 bit multiplicative hash */
+static uint gethash(uchar *p) {
+ return 2654435761U * ((p[0]<<16) + (p[1]<<8) + p[2]) >> (32 - HashBits);
}
-static short gethash(uchar *p) {
- return (257 * p[0] + 263 * p[1] + 269 * p[2]) % HashSize;
+/*
+static uint murmurhash(uchar *p) {
+ enum {m = 0x5bd1e995};
+ uint h = 0x6789abcd;
+
+ h ^= (p[2] << 16) + (p[1] << 8) + p[0];
+ h *= m;
+ h ^= h >> 13;
+ h *= m;
+ h ^= h >> 15;
+ return h % HashSize;
}
+*/
static Match getmatch(State *s, uchar *src, int len) {
int i, j;
@@ -495,57 +506,48 @@ static Match getmatch(State *s, uchar *src, int len) {
}
pos = s->chain[pos];
}
- if (m.len < MinMatch || (m.len == MinMatch && m.dist > 1<<12))
+ if (m.len < MinMatch || (m.len == MinMatch && m.dist > BigDist))
m.len = 0;
return m;
}
-static void lzinit(State *s) {
+static void deflate_init(State *s) {
int i;
- for (i = 0; i < WinSize; i++)
- s->chain[i] = 0;
- for (i = 0; i < HashSize; i++)
- s->head[i] = 0;
- s->pos = 0;
+ memset(s->chain, 0, sizeof(s->chain));
+ memset(s->head, 0, sizeof(s->head));
+ s->pos = s->bits = s->nbits = s->lastblock = 0;
for (i = 0; i < 144; i++)
- fixed_llen[i] = 8;
+ fixllen[i] = 8;
for (; i < 256; i++)
- fixed_llen[i] = 9;
+ fixllen[i] = 9;
for (; i < 280; i++)
- fixed_llen[i] = 7;
+ fixllen[i] = 7;
for (; i < Nlitlen; i++)
- fixed_llen[i] = 8;
- huffcodes(fixed_lcode, fixed_llen, Nlitlen);
+ fixllen[i] = 8;
for (i = 0; i < Ndist; i++)
- fixed_dlen[i] = 5;
- huffcodes(fixed_dcode, fixed_dlen, Ndist);
- s->bits = 0;
- s->nbits = 0;
- s->lastblock = 0;
+ fixdlen[i] = 5;
+ huffcodes(fixlcode, fixllen, Nlitlen);
+ huffcodes(fixdcode, fixdlen, Ndist);
}
static void newblock(State *s) {
- int i;
-
- s->lzp = s->lzbuf;
- s->runlen = 0;
- for (i = 0; i < Nlitlen; i++)
- s->lfreq[i] = 0;
- for (i = 0; i < Ndist; i++)
- s->dfreq[i] = 0;
- s->lfreq[256]++; /* EOB */
+ s->rend = s->rbuf;
+ s->run = 0;
+ memset(s->lfreq, 0, sizeof(s->lfreq));
+ memset(s->dfreq, 0, sizeof(s->dfreq));
+ s->lfreq[EOB]++;
}
int deflate(uchar *src, int len, uchar *dst) {
State s;
int step;
- int hash;
+ uint hash;
Match m;
Match prevm = {0, 0};
uchar prevc = 0;
- lzinit(&s);
+ deflate_init(&s);
newblock(&s);
s.dst = dst;
s.src = src;
@@ -554,9 +556,9 @@ int deflate(uchar *src, int len, uchar *dst) {
if (m.len > prevm.len) {
if (prevm.len)
recordlit(&s, prevc);
+ step = 1;
prevm = m;
prevc = *src;
- step = 1;
} else if (prevm.len) {
recordmatch(&s, prevm);
step = prevm.len - 1;
@@ -576,16 +578,16 @@ int deflate(uchar *src, int len, uchar *dst) {
len--;
step--;
}
- if (prevm.len == 0 && src - s.src > 1<<15) {
- if (s.runlen)
- *s.lzp++ = s.runlen | LzRunlen;
+ if (prevm.len == 0 && src - s.src > BlockLen) {
+ if (s.run)
+ *s.rend++ = s.run | RunlenFlag;
deflate_block(&s, src - s.src);
newblock(&s);
s.src = src;
}
}
- if (s.runlen)
- *s.lzp++ = s.runlen | LzRunlen;
+ if (s.run)
+ *s.rend++ = s.run | RunlenFlag;
s.lastblock = 1;
deflate_block(&s, src - s.src);
putbits(&s, 0, 7);