commit a19449abc842b601ab7903089f6e87352c957b97
parent 2d40c927d7473c92826756c595461756bfdee095
Author: nsz <nszabolcs@gmail.com>
Date: Sun, 21 Jun 2009 21:23:24 +0200
deflate_simple wip
Diffstat:
deflate_simple.c | | | 692 | ++++++++++++++++++++++++++++++++++++------------------------------------------- |
1 file changed, 314 insertions(+), 378 deletions(-)
diff --git a/deflate_simple.c b/deflate_simple.c
@@ -1,17 +1,19 @@
#include <stdlib.h>
#include <string.h>
+#include <stdio.h>
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef unsigned int uint;
enum {
- Nmatch = 32,
+ MinMatch = 3,
+ MaxMatch = 258, /* TODO: unused */
+ MaxChainStep = 64,
HashSize = 2039,
- HashChars = 3,
+ HashChars = 3, /* TODO: unused */
WinSize = 1 << 15,
- WinMask = WinSize - 1,
- SymSize = 1 << 16,
+ SymSize = 1 << 16
};
enum {
@@ -29,31 +31,20 @@ typedef struct {
} Match;
typedef struct {
- short next;
- short prev; /* index within s->win */
- short hash;
-} Entry;
-
-typedef struct {
uchar *len; /* bit length of the code */
ushort *code;
} Huff;
typedef struct {
- short tab[HashSize]; /* position of hash chain heads */
- int pos; /* position in win and data */
- Entry win[WinSize]; /* hash chains */
- uchar data[WinSize]; /* uncompressed character data */
- uchar pending[HashChars]; /* characters from previous compress */
- int npending;
-
- ushort *symbuf; /* lz77 literal and (len,dist) sequence */
+ ushort head[HashSize]; /* position of hash chain heads */
+ ushort chain[WinSize]; /* hash chains */
+ int pos; /* position in hash chain */
+
+ ushort symbuf[SymSize]; /* lz77 literal and (len,dist) sequence */
int symstart;
int symlen;
- uchar *outbuf; /* encoded output */
- int outpos;
- int outlen;
+ uchar *dst; /* compressed output */
uint bits;
int nbits;
@@ -69,9 +60,11 @@ typedef struct {
Huff clen;
} State;
+#if 0
/* TODO: globals.. */
static Huff lhuff; /* fixed lit/len huffman code tree */
static Huff dhuff; /* fixed distance huffman code tree */
+#endif
/* base offset and extra bits tables */
static uchar lenbits[Nlen] = {
@@ -92,154 +85,97 @@ static uchar clenorder[Nclen] = {
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
};
-static short hash_chars(uchar *p) {
+static short gethash(uchar *p) {
return (257 * p[0] + 263 * p[1] + 269 * p[2]) % HashSize;
}
-static int lz_init(State *s) {
+static void lzinit(State *s) {
int i;
for (i = 0; i < WinSize; i++)
- s->win[i].next = s->win[i].prev = s->win[i].hash = -1;
+ s->chain[i] = 0;
for (i = 0; i < HashSize; i++)
- s->tab[i] = -1;
+ s->head[i] = 0;
s->pos = 0;
- s->npending = 0;
- return 1;
-}
-
-static void lz_advance(State *s, uchar c, int hash) {
- int pos = s->pos;
- int next;
-
- /* remove the hash entry at pos from its chain */
- if (s->win[pos].prev != -1)
- s->win[s->win[pos].prev].next = -1;
- else if (s->win[pos].hash != -1)
- s->tab[s->win[pos].hash] = -1;
-
- /* create a new entry at pos */
- s->win[pos].hash = hash;
- s->win[pos].prev = -1;
- s->win[pos].next = next = s->tab[hash];
- s->tab[hash] = pos;
- if (next != -1)
- s->win[next].prev = pos;
- s->data[pos] = c;
- s->pos = (pos + 1) & WinMask;
}
-/* s->data[]: past, s->data[s->pos]: present, data[]: future */
-static uchar getbyte(State *s, uchar *data, int n) {
- return n < 0 ? s->data[(s->pos + n) & WinMask] : data[n];
-}
-
-static void literal(State *s, ushort lit);
-static void match(State *s, ushort dist, ushort len);
-
-static void lz_compress(State *s, uchar *data, int len) {
- int i, hash, dist, off, nmatch, matchlen, nadvance;
- Match defermatch, matches[Nmatch];
- int deferchr;
+static Match getmatch(State *s, uchar *src, int len) {
+ int i, j;
int pos;
-
- /* add pending characters from previous compress */
- for (i = 0; i < s->npending; i++) {
- uchar a[HashChars];
- int j;
-
- if (len + s->npending - i < HashChars) {
- for (j = i; j < s->npending; j++)
- s->pending[j - i] = s->pending[j];
- break;
+ int dist;
+ int lastdist = 0;
+ Match m = {0, 0};
+
+ if (len >= MinMatch) {
+ pos = s->head[gethash(src)];
+ for (i = 0; i < MaxChainStep; i++) {
+ dist = (WinSize + s->pos - pos) % WinSize;
+ if (dist <= lastdist)
+ break;
+ lastdist = dist;
+ for (j = 0; j < len; j++)
+ if (src[j] != src[j - dist])
+ break;
+ if (j > m.len) {
+ m.dist = dist;
+ m.len = j;
+ }
+ pos = s->chain[pos];
}
- for (j = 0; j < HashChars; j++)
- a[j] = i + j < s->npending ? s->pending[i + j] : data[i + j - s->npending];
- lz_advance(s, a[0], hash_chars(a));
+ if (m.len < MinMatch)
+ m.len = 0;
}
- s->npending -= i;
+ return m;
+}
+
+static void emitlit(State *s, ushort lit);
+static void emitmatch(State *s, Match m);
+
+static void lzcompress(State *s, uchar *src, int len) {
+ int step;
+ int hash;
+ Match m;
+ Match prevm = {0, 0};
+ uchar prevc = '\0';
- defermatch.len = 0;
- deferchr = '\0';
while (len > 0) {
- if (len >= HashChars) {
- /* find matches at least HashChars long */
- hash = hash_chars(data);
- nmatch = 0;
- for (pos = s->tab[hash]; pos != -1; pos = s->win[pos].next) {
- /* dist = 1 if pos == s->pos - 1 */
- /* dist = WinSize if pos == s->pos */
- dist = WinSize - (pos + WinSize - s->pos) & WinMask;
- for (i = 0; i < HashChars; i++)
- if (data[i] != getbyte(s, data, i - dist))
- break;
- if (i == HashChars) {
- matches[nmatch].dist = dist;
- matches[nmatch].len = HashChars;
- if (++nmatch >= Nmatch)
- break;
- }
- }
- } else {
- nmatch = 0;
- hash = -1;
- }
- if (nmatch > 0) {
- /* nmatch potential matches, we assume longer match is better */
- matchlen = HashChars;
- while (matchlen < len) {
- int j;
-
- for (i = j = 0; i < nmatch; i++)
- if (getbyte(s, data, matchlen) == getbyte(s, data, matchlen - matches[i].dist))
- matches[j++] = matches[i];
- if (j == 0)
- break;
- matchlen++;
- nmatch = j;
- }
- /* among the longest matches favour the shorter distance (matches[0]) */
- matches[0].len = matchlen;
- if (defermatch.len > 0) {
- if (matches[0].len > defermatch.len + 1) {
- /* We have a better match. Emit the deferred char,
- * and defer this match. */
- literal(s, deferchr);
- defermatch = matches[0];
- deferchr = data[0];
- nadvance = 1;
+ m = getmatch(s, src, len);
+ if (m.len > 0)
+ if (prevm.len > 0) {
+ if (m.len > prevm.len + 1) {
+ emitlit(s, prevc);
+ prevm = m;
+ prevc = *src;
+ step = 1;
} else {
- /* We don't have a better match. Do the deferred one. */
- match(s, defermatch.dist, defermatch.len);
- nadvance = defermatch.len - 1;
- defermatch.len = 0;
+ emitmatch(s, prevm);
+ step = prevm.len - 1;
+ prevm.len = 0;
}
} else {
- /* There was no deferred match. Defer this one. */
- defermatch = matches[0];
- deferchr = data[0];
- nadvance = 1;
+ prevm = m;
+ prevc = *src;
+ step = 1;
}
- } else {
- /* no matches, emit the deferred match or a literal. */
- if (defermatch.len > 0) {
- match(s, defermatch.dist, defermatch.len);
- nadvance = defermatch.len - 1;
- defermatch.len = 0;
+ else
+ if (prevm.len > 0) {
+ emitmatch(s, prevm);
+ step = prevm.len - 1;
+ prevm.len = 0;
} else {
- literal(s, data[0]);
- nadvance = 1;
+ emitlit(s, *src);
+ step = 1;
}
- }
- /* advance winpos and keep the window and hash chains consistent. */
- while (nadvance > 0) {
- if (len >= HashChars)
- lz_advance(s, *data, hash_chars(data));
- else
- s->pending[s->npending++] = *data;
- data++;
+ while (step > 0) {
+ if (len >= MinMatch) {
+ hash = gethash(src);
+ s->chain[s->pos] = s->head[hash];
+ s->head[hash] = s->pos;
+ s->pos = (s->pos + 1) % WinSize;
+ }
+ src++;
len--;
- nadvance--;
+ step--;
}
}
}
@@ -255,44 +191,32 @@ static uint revcode(uint c, int n) {
return r;
}
-/* huffman codes from code lengths, return max code length */
-static int huffcodes(ushort *codes, const uchar *lens, int n) {
- int c[CodeBits];
- int i, left, max;
+/* build huffman code tree from code lengths */
+static void huffcodes(ushort *codes, const uchar *lens, int n) {
+ int count[CodeBits];
+ int offs[CodeBits];
+ int i, code;
- /* count code lengths */
+ /* count code lengths and calc first code (offs) for each length */
for (i = 0; i < CodeBits; i++)
- c[i] = 0;
+ count[i] = 0;
for (i = 0; i < n; i++)
- c[lens[i]]++;
-
- /* calc max code lengths */
- for (i = CodeBits - 1; i > 0; i--)
- if (c[i])
- break;
- max = i;
-
- /* check if length is over-subscribed or incomplete */
- for (left = 2, i = 1; i <= max; i++) {
- left -= c[i];
- if (left < 0)
- /* over-subscribed */
- return -1;
- left <<= 1;
+ count[lens[i]]++;
+ count[0] = 0;
+ for (code = i = 0; i < CodeBits; i++) {
+ offs[i] = code;
+ code += count[i];
+ if (code > (1 << i))
+ /* over-subscribed */return;
+ code <<= 1;
}
- if (left > 0)
- /* incomlpete */;
-
- /* calc first code for each length */
- for (i = 2; i <= max; i++)
- c[i] += c[i-1];
-
+ if (code < (1 << i))
+ /* incomplete */;
for (i = 0; i < n; i++)
if (lens[i])
- codes[i] = revcode(c[lens[i]]++, lens[i]);
+ codes[i] = revcode(offs[lens[i]]++, lens[i]);
else
codes[i] = 0;
- return max;
}
/* min-heap for buildhuf() */
@@ -364,23 +288,27 @@ static void hufflens(const int *freqs, uchar *lens, int nsym) {
if (nsym > Nlitlen)
/* TODO: error */;
- for (n = 0; n < nsym; n++)
+ for (len = n = 0; n < nsym; n++)
if (freqs[n] > 0)
- heappush(heap, n, freqs[n], n);
+ heappush(heap, len++, freqs[n], n);
/* build code tree */
- for (len = n; len > 1; n++) {
+ while (len > 1) {
heappop(heap, len--, &wi, &i);
heappop(heap, len--, &wj, &j);
parent[i] = n;
parent[j] = n;
- heappush(heap, len++, wi + wj, n);
+ heappush(heap, len++, wi + wj, n++);
+ /* TODO: strore used syms at heap end */
}
/* calc code length */
clen[--n] = 0;
while (n--)
- clen[n] = 1 + clen[parent[n]];
+ if (n >= nsym || freqs[n])
+ clen[n] = 1 + clen[parent[n]];
+ else
+ clen[n] = 0;
/* limit code length to 255 */
for (n = 0; n < nsym; n++)
@@ -470,17 +398,15 @@ static void hufflenslimit(int *freqs, uchar *lens, int nsym, int limit) {
}
/* adding < 16 bits to outbuf */
-static int outbits(State *s, uint bits, int nbits) {
+static void outbits(State *s, uint bits, int nbits) {
+fprintf(stderr, "b:%3d n:%3d\n", bits, nbits);
s->bits |= bits << s->nbits;
s->nbits += nbits;
while (s->nbits >= 8) {
- if (s->outpos == s->outlen)
- return -1;
- s->outbuf[s->outpos++] = s->bits & 0xff;
+ *s->dst++ = s->bits & 0xff;
s->bits >>= 8;
s->nbits -= 8;
}
- return 0;
}
static void putsym(State *s, ushort sym) {
@@ -491,17 +417,75 @@ static ushort getsym(State *s, int i) {
return s->symbuf[(s->symstart + i) % SymSize];
}
+
+static int clensymbols(int *lens, int n, int *syms) {
+ int i, j;
+ int runlen;
+ int nsym = 0;
+
+ for (i = j = 0; i < n; i = j) {
+ for (j++; j < n; j++)
+ if (lens[j] != lens[i])
+ break;
+ runlen = j - i;
+
+ /* encode economically */
+ if (lens[i] == 0) {
+ if (runlen < 3)
+ while (runlen--)
+ syms[nsym++] = 0;
+ else
+ while (runlen > 0) {
+ int k = runlen < 138 ? runlen : 138;
+
+ if (k > runlen - 3 && k < runlen)
+ k = runlen - 3;
+ if (k < 11) {
+ syms[nsym++] = 17;
+ syms[nsym++] = 3;
+ syms[nsym++] = k - 3;
+ } else {
+ syms[nsym++] = 18;
+ syms[nsym++] = 7;
+ syms[nsym++] = k - 11;
+ }
+ runlen -= k;
+ }
+ } else {
+ syms[nsym++] = lens[i];
+ runlen--;
+ if (runlen < 3)
+ while (runlen--)
+ syms[nsym++] = lens[i];
+ else
+ while (runlen > 0) {
+ int k = runlen < 6 ? runlen : 6;
+
+ if (k > runlen - 3 && k < runlen)
+ k = runlen - 3;
+ syms[nsym++] = 16;
+ syms[nsym++] = 2;
+ syms[nsym++] = k - 3;
+ runlen -= k;
+ }
+ }
+ }
+ return nsym;
+}
+
/* build dynamic block or a fixed block of given length */
static void block(State *s, int dynlen, int fixlen) {
int lfreq[Nlitlen], dfreq[Ndist], cfreq[Nclen];
uchar llen[Nlitlen], dlen[Ndist], clen[Nclen];
ushort lcode[Nlitlen], dcode[Ndist], ccode[Nclen];
- int nlit, ndist, nclen, bfinal, btype;
- int tree[Nlitlen + Ndist];
- int treesym[Nlitlen + Ndist];
+ int i, n, nlit, ndist, nclen;
+ int len[Nlitlen + Ndist];
+ int syms[Nlitlen + Ndist];
int codelen[Nclen];
- int i, ntree, ntreesym;
- int dynamic, blocklen;
+ int dynsize;
+ int blocklen;
+ int dynamic;
+ int lastblock;
memset(lfreq, 0, sizeof(lfreq));
memset(dfreq, 0, sizeof(dfreq));
@@ -519,75 +503,24 @@ static void block(State *s, int dynlen, int fixlen) {
i++; /* extra bits */
}
}
+
hufflenslimit(lfreq, llen, Nlitlen, CodeBits-1);
- hufflenslimit(dfreq, dlen, Ndist, CodeBits-1);
huffcodes(lcode, llen, Nlitlen);
- huffcodes(dcode, dlen, Ndist);
-
for (nlit = Nlitlen; nlit > Nlit && llen[nlit-1] == 0; nlit--);
+
+ hufflenslimit(dfreq, dlen, Ndist, CodeBits-1);
+ huffcodes(dcode, dlen, Ndist);
for (ndist = Ndist; ndist > 1 && dlen[ndist-1] == 0; ndist--);
- ntree = 0;
for (i = 0; i < nlit; i++)
- tree[ntree++] = llen[i];
+ len[i] = llen[i];
for (i = 0; i < ndist; i++)
- tree[ntree++] = dlen[i];
- ntreesym = 0;
- for (i = 0; i < ntree;) {
- int j = 1;
- int k;
-
- /* get run length of the same item */
- while (i+j < ntree && tree[i+j] == tree[i])
- j++;
-
- /* encode economically */
- k = j;
- if (tree[i] == 0) {
- if (k < 3)
- while (k--)
- treesym[ntreesym++] = 0;
- else
- while (k > 0) {
- int r = k < 138 ? k : 138;
-
- if (r > k - 3 && r < k)
- r = k - 3;
- if (r < 11) {
- treesym[ntreesym++] = 17;
- treesym[ntreesym++] = 3;
- treesym[ntreesym++] = r - 3;
- } else {
- treesym[ntreesym++] = 18;
- treesym[ntreesym++] = 7;
- treesym[ntreesym++] = r - 11;
- }
- k -= r;
- }
- } else {
- treesym[ntreesym++] = tree[i];
- k--;
- if (k < 3)
- while (k--)
- treesym[ntreesym++] = tree[i];
- else
- while (k > 0) {
- int r = k < 6 ? k : 6;
-
- if (r > k - 3 && r < k)
- r = k - 3;
- treesym[ntreesym++] = 16;
- treesym[ntreesym++] = 2;
- treesym[ntreesym++] = r - 3;
- k -= r;
- }
- }
- i += j;
- }
-
+ len[nlit + i] = dlen[i];
+ n = clensymbols(len, nlit + ndist, syms);
+ /* TODO: move to clensymbols */
memset(cfreq, 0, sizeof(cfreq));
- for (i = 0; i < ntreesym; i++) {
- uint sym = treesym[i];
+ for (i = 0; i < n; i++) {
+ uint sym = syms[i];
cfreq[sym]++;
if (sym >= 16)
@@ -595,103 +528,84 @@ static void block(State *s, int dynlen, int fixlen) {
}
hufflenslimit(cfreq, clen, Nclen, 7);
huffcodes(ccode, clen, Nclen);
-
- /* reorder */
for (i = 0; i < Nclen; i++)
codelen[i] = clen[clenorder[i]];
for (nclen = Nclen; nclen > 4 && codelen[nclen-1] == 0; nclen--);
- /* exact size of dynamic and fixed block */
- {
- int fsize, dsize;
+ /* calc len */
+ dynsize = 3 + 5 + 5 + 4;
+ dynsize += 3 * nclen;
+ for (i = 0; i < n; i++) {
+ uint sym = syms[i];
+
+ dynsize += clen[sym];
+ if (sym >= 16) {
+ i++;
+ dynsize += syms[i];
+ i++;
+ }
+ }
+ for (i = 0; i < dynlen; i++) {
+ ushort sym = getsym(s, i);
- /* dynamic block */
- dsize = 3 + 5 + 5 + 4; /* 3-bit header, nlit, ndist, nclen */
- dsize += 3 * nclen; /* code-length-alphabet code lengths */
+ dynsize += llen[sym];
+ if (sym > Nlit) {
+ i++; /* extra nbits */
+ dynsize += getsym(s, i);
+ i++; /* extra bits */
+ i++; /* dist sym */
+ dynsize += dlen[getsym(s, i)];
+ i++; /* extra nbits */
+ dynsize += getsym(s, i);
+ i++; /* extra bits */
+ }
+ }
+ dynsize += llen[256]; /* EOB */
+/*
+ dynamic = (long long)fsize * dynlen > (long long)dsize * fixlen;
+ blocklen = dynamic ? dlen : fixlen;
+*/
+ dynamic = 1;
+ blocklen = dynlen;
+ lastblock = 1;
+
+ outbits(s, lastblock ? 1 : 0, 1);
+ if (dynamic) {
+ outbits(s, 2, 2);
+ outbits(s, nlit - 257, 5);
+ outbits(s, ndist - 1, 5);
+ outbits(s, nclen - 4, 4);
+ /* clen code lengths */
+ for (i = 0; i < nclen; i++)
+ outbits(s, codelen[i], 3);
/* code lengths */
- for (i = 0; i < ntreesym; i++) {
- uint sym = treesym[i];
+ for (i = 0; i < n; i++) {
+ uint sym = syms[i];
- dsize += clen[sym];
+ outbits(s, ccode[sym], clen[sym]);
if (sym >= 16) {
- i++;
- dsize += treesym[i];
- i++;
+ outbits(s, syms[i+2], syms[i+1]);
+ i += 2;
}
}
/* block data */
for (i = 0; i < dynlen; i++) {
ushort sym = getsym(s, i);
- dsize += llen[sym];
- if (sym > Nlit) {
- i++; /* extra nbits */
- dsize += getsym(s, i);
- i++; /* extra bits */
- i++; /* dist sym */
- dsize += dlen[getsym(s, i)];
- i++; /* extra nbits */
- dsize += getsym(s, i);
- i++; /* extra bits */
- }
- }
- /* EOB */
- dsize += llen[256];
-
- /* fixed block.. TODO */
- fsize = 3;
- /* The actual block data */
- for (i = 0; i < fixlen; i++) {
- uint sym = getsym(s, i);
-
- fsize += fixedllen[sym];
+ outbits(s, lcode[sym], llen[sym]);
if (sym > Nlit) {
- i++; /* extra nbits */
- fsize += getsym(s, i);
- i++; /* extra bits */
- i++; /* dist sym */
- fsize += fixeddlen[getsym(s, i)];
- i++; /* extra nbits */
- fsize += getsym(s, i);
- i++; /* extra bits */
+ outbits(s, getsym(s, i+2), getsym(s, i+1));
+ sym = getsym(s, i+3);
+ outbits(s, dcode[sym], dlen[sym]);
+ outbits(s, getsym(s, i+5), getsym(s, i+4));
+ i += 5;
}
}
/* EOB */
- fsize += fixedllen[256];
-
- dynamic = (long long)fsize * dynlen > (long long)dsize * fixlen;
- blocklen = dynamic ? dlen : fixlen;
- }
-
- /* transmit the block */
-
- /* 3-bit block header */
- bfinal = s->lastblock ? 1 : 0;
- btype = dynamic ? 2 : 1;
- outbits(s, bfinal, 1);
- outbits(s, btype, 2);
-
- if (dynamic) {
- outbits(s, nlit - 257, 5);
- outbits(s, ndist - 1, 5);
- outbits(s, nclen - 4, 4);
-
- /* Code lengths for the auxiliary tree */
- for (i = 0; i < nclen; i++)
- outbits(s, codelen[i], 3);
-
- /* Code lengths for the literal/length and distance trees */
- for (i = 0; i < ntreesym; i++)
- writesym(s, treesym[i], ht);
+ outbits(s, lcode[256], llen[256]);
}
- /* Output the actual symbols from the buffer */
- for (i = 0; i < blocklen; i++)
- writesym(s, getsym(s, i), ht);
-
- /* Output the end-of-data symbol */
- writesym(s, 256, ht);
-
+fprintf(stderr, "blocklen: %d symlen: %d dynsize: %d\n", blocklen, s->symlen, dynsize);
s->symstart = (s->symstart + blocklen) % SymSize;
s->symlen -= blocklen;
}
@@ -708,7 +622,7 @@ static int log2_8(uint n) {
if (n < 1U<<31) n <<= 1, r -= 1*8;
/*
- * result is in [r,r+8), to determine the bottom 3 digits:
+ * result is in [r,r+8), to determine the bottom 3 bits:
* thresholds are 1<<31 times an odd power of 2^(1/16)
*/
if (n <= 0xAD583EEAU) {
@@ -736,48 +650,27 @@ static void chooseblock(State *s) {
lfreq[256] = 1; /* we're bound to need one EOB */
ltotal = 1;
dtotal = 0;
-
- /*
- * Iterate over all possible block lengths, computing the
- * entropic coding approximation to the final length at every
- * stage. We divide the result by the number of symbols
- * encoded, to determine the `value for money' (overall
- * bits-per-symbol count) of a block of that length.
- */
bestlen = -1;
bestvfm = 0;
-
len = 300 * 8; /* very approximate size of the Huffman trees */
-
for (i = 0; i < s->symlen; i++) {
ushort sym = getsym(s, i);
if (i > 0 && sym < Nlit) {
- /*
- * This is a viable point at which to end the block.
- * Compute the value for money.
- */
- int vfm = i * 32768 / len; /* symbols encoded per bit */
+ int vfm = i * 32768 / len;
if (bestlen < 0 || vfm > bestvfm) {
bestlen = i;
bestvfm = vfm;
}
-
longestlen = i;
}
-
- /*
- * Increment the occurrence counter for this symbol, if
- * it's in one of the Huffman alphabets and isn't extra
- * bits.
- */
- len += lfreq[sym] * approxlog2(lfreq[sym]);
- len -= ltotal * approxlog2(ltotal);
+ len += lfreq[sym] * log2_8(lfreq[sym]);
+ len -= ltotal * log2_8(ltotal);
lfreq[sym]++;
ltotal++;
- len -= lfreq[sym] * approxlog2(lfreq[sym]);
- len += ltotal * approxlog2(ltotal);
+ len -= lfreq[sym] * log2_8(lfreq[sym]);
+ len += ltotal * log2_8(ltotal);
if (sym > Nlit) {
i++;
sym = getsym(s, i);
@@ -785,19 +678,30 @@ static void chooseblock(State *s) {
i++;
i++;
sym = getsym(s, i);
- len += dfreq[sym] * approxlog2(dfreq[sym]);
- len -= dtotal * approxlog2(dtotal);
+ len += dfreq[sym] * log2_8(dfreq[sym]);
+ len -= dtotal * log2_8(dtotal);
dfreq[sym]++;
dtotal++;
- len -= dfreq[sym] * approxlog2(dfreq[sym]);
- len += dtotal * approxlog2(dtotal);
+ len -= dfreq[sym] * log2_8(dfreq[sym]);
+ len += dtotal * log2_8(dtotal);
i++;
sym = getsym(s, i);
len += 8 * sym;
i++;
}
}
- block(s, bestlen, longestlen);
+/* block(s, bestlen, longestlen);*/
+ block(s, s->symlen, longestlen);
+}
+
+static int count = 0;
+
+static void emitlit(State *s, ushort lit) {
+ if (s->symlen == SymSize)
+ chooseblock(s);
+fprintf(stderr, "%3d %c\n", lit, lit);
+count++;
+ putsym(s, lit);
}
static int bisect(ushort *base, int len, int n) {
@@ -815,36 +719,68 @@ static int bisect(ushort *base, int len, int n) {
return lo - 1;
}
-/* place a symbol into the symbuf */
-static void literal(State *s, ushort lit) {
- if (s->symlen == SymSize)
- chooseblock(s);
- putsym(s, lit);
-}
-
-static void match(State *s, ushort dist, ushort len) {
+static void emitmatch(State *s, Match m) {
if (s->symlen + 5 >= SymSize)
chooseblock(s);
- while (len > 0) {
- int n;
+fprintf(stderr, "m %d, %d\n", m.dist, m.len);
+count++;
+ while (m.len > 0) {
+ int len;
int i;
/* match length can be 3..258 */
- n = len > 260 ? 258 : len <= 258 ? len : len - 3;
- len -= n;
+ len = m.len > 260 ? 258 : m.len <= 258 ? m.len : m.len - 3;
+ m.len -= len;
- i = bisect(lenbase, Nlen, n);
+ i = bisect(lenbase, Nlen, len);
/* len symbol */
putsym(s, Nlit + i + 1);
/* len extra */
putsym(s, lenbits[i]);
- putsym(s, n - lenbase[i]);
+ putsym(s, len - lenbase[i]);
- i = bisect(distbase, Ndist, dist);
+ i = bisect(distbase, Ndist, m.dist);
/* dist symbol */
putsym(s, i);
/* dist extra */
putsym(s, distbits[i]);
- putsym(s, dist - distbase[i]);
+ putsym(s, m.dist - distbase[i]);
}
}
+
+int deflate(uchar *src, int len, uchar *dst) {
+ State s;
+
+ lzinit(&s);
+ s.symstart = 0;
+ s.symlen = 0;
+ s.bits = 0;
+ s.nbits = 0;
+ s.lastblock = 0;
+ s.dst = dst;
+ lzcompress(&s, src, len);
+ chooseblock(&s);
+ if (s.symlen)
+ block(&s, s.symlen, s.symlen);
+ outbits(&s, 0, 7);
+ return s.dst - dst;
+}
+
+
+#include <stdio.h>
+
+enum {Siz = 1<<24};
+
+int main() {
+ uchar *src;
+ uchar *dst;
+ int len;
+
+ src = malloc(Siz);
+ dst = malloc(Siz);
+ len = fread(src, 1, Siz, stdin);
+ len = deflate(src, len, dst);
+ fwrite(dst, 1, len, stdout);
+ fprintf(stderr, "compressed: %d\n", len);
+ return 0;
+}