flate

deflate implementation
git clone git://git.suckless.org/flate
Log | Files | Refs | README

commit a19449abc842b601ab7903089f6e87352c957b97
parent 2d40c927d7473c92826756c595461756bfdee095
Author: nsz <nszabolcs@gmail.com>
Date:   Sun, 21 Jun 2009 21:23:24 +0200

deflate_simple wip
Diffstat:
deflate_simple.c | 692++++++++++++++++++++++++++++++++++++-------------------------------------------
1 file changed, 314 insertions(+), 378 deletions(-)

diff --git a/deflate_simple.c b/deflate_simple.c @@ -1,17 +1,19 @@ #include <stdlib.h> #include <string.h> +#include <stdio.h> typedef unsigned char uchar; typedef unsigned short ushort; typedef unsigned int uint; enum { - Nmatch = 32, + MinMatch = 3, + MaxMatch = 258, /* TODO: unused */ + MaxChainStep = 64, HashSize = 2039, - HashChars = 3, + HashChars = 3, /* TODO: unused */ WinSize = 1 << 15, - WinMask = WinSize - 1, - SymSize = 1 << 16, + SymSize = 1 << 16 }; enum { @@ -29,31 +31,20 @@ typedef struct { } Match; typedef struct { - short next; - short prev; /* index within s->win */ - short hash; -} Entry; - -typedef struct { uchar *len; /* bit length of the code */ ushort *code; } Huff; typedef struct { - short tab[HashSize]; /* position of hash chain heads */ - int pos; /* position in win and data */ - Entry win[WinSize]; /* hash chains */ - uchar data[WinSize]; /* uncompressed character data */ - uchar pending[HashChars]; /* characters from previous compress */ - int npending; - - ushort *symbuf; /* lz77 literal and (len,dist) sequence */ + ushort head[HashSize]; /* position of hash chain heads */ + ushort chain[WinSize]; /* hash chains */ + int pos; /* position in hash chain */ + + ushort symbuf[SymSize]; /* lz77 literal and (len,dist) sequence */ int symstart; int symlen; - uchar *outbuf; /* encoded output */ - int outpos; - int outlen; + uchar *dst; /* compressed output */ uint bits; int nbits; @@ -69,9 +60,11 @@ typedef struct { Huff clen; } State; +#if 0 /* TODO: globals.. */ static Huff lhuff; /* fixed lit/len huffman code tree */ static Huff dhuff; /* fixed distance huffman code tree */ +#endif /* base offset and extra bits tables */ static uchar lenbits[Nlen] = { @@ -92,154 +85,97 @@ static uchar clenorder[Nclen] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; -static short hash_chars(uchar *p) { +static short gethash(uchar *p) { return (257 * p[0] + 263 * p[1] + 269 * p[2]) % HashSize; } -static int lz_init(State *s) { +static void lzinit(State *s) { int i; for (i = 0; i < WinSize; i++) - s->win[i].next = s->win[i].prev = s->win[i].hash = -1; + s->chain[i] = 0; for (i = 0; i < HashSize; i++) - s->tab[i] = -1; + s->head[i] = 0; s->pos = 0; - s->npending = 0; - return 1; -} - -static void lz_advance(State *s, uchar c, int hash) { - int pos = s->pos; - int next; - - /* remove the hash entry at pos from its chain */ - if (s->win[pos].prev != -1) - s->win[s->win[pos].prev].next = -1; - else if (s->win[pos].hash != -1) - s->tab[s->win[pos].hash] = -1; - - /* create a new entry at pos */ - s->win[pos].hash = hash; - s->win[pos].prev = -1; - s->win[pos].next = next = s->tab[hash]; - s->tab[hash] = pos; - if (next != -1) - s->win[next].prev = pos; - s->data[pos] = c; - s->pos = (pos + 1) & WinMask; } -/* s->data[]: past, s->data[s->pos]: present, data[]: future */ -static uchar getbyte(State *s, uchar *data, int n) { - return n < 0 ? s->data[(s->pos + n) & WinMask] : data[n]; -} - -static void literal(State *s, ushort lit); -static void match(State *s, ushort dist, ushort len); - -static void lz_compress(State *s, uchar *data, int len) { - int i, hash, dist, off, nmatch, matchlen, nadvance; - Match defermatch, matches[Nmatch]; - int deferchr; +static Match getmatch(State *s, uchar *src, int len) { + int i, j; int pos; - - /* add pending characters from previous compress */ - for (i = 0; i < s->npending; i++) { - uchar a[HashChars]; - int j; - - if (len + s->npending - i < HashChars) { - for (j = i; j < s->npending; j++) - s->pending[j - i] = s->pending[j]; - break; + int dist; + int lastdist = 0; + Match m = {0, 0}; + + if (len >= MinMatch) { + pos = s->head[gethash(src)]; + for (i = 0; i < MaxChainStep; i++) { + dist = (WinSize + s->pos - pos) % WinSize; + if (dist <= lastdist) + break; + lastdist = dist; + for (j = 0; j < len; j++) + if (src[j] != src[j - dist]) + break; + if (j > m.len) { + m.dist = dist; + m.len = j; + } + pos = s->chain[pos]; } - for (j = 0; j < HashChars; j++) - a[j] = i + j < s->npending ? s->pending[i + j] : data[i + j - s->npending]; - lz_advance(s, a[0], hash_chars(a)); + if (m.len < MinMatch) + m.len = 0; } - s->npending -= i; + return m; +} + +static void emitlit(State *s, ushort lit); +static void emitmatch(State *s, Match m); + +static void lzcompress(State *s, uchar *src, int len) { + int step; + int hash; + Match m; + Match prevm = {0, 0}; + uchar prevc = '\0'; - defermatch.len = 0; - deferchr = '\0'; while (len > 0) { - if (len >= HashChars) { - /* find matches at least HashChars long */ - hash = hash_chars(data); - nmatch = 0; - for (pos = s->tab[hash]; pos != -1; pos = s->win[pos].next) { - /* dist = 1 if pos == s->pos - 1 */ - /* dist = WinSize if pos == s->pos */ - dist = WinSize - (pos + WinSize - s->pos) & WinMask; - for (i = 0; i < HashChars; i++) - if (data[i] != getbyte(s, data, i - dist)) - break; - if (i == HashChars) { - matches[nmatch].dist = dist; - matches[nmatch].len = HashChars; - if (++nmatch >= Nmatch) - break; - } - } - } else { - nmatch = 0; - hash = -1; - } - if (nmatch > 0) { - /* nmatch potential matches, we assume longer match is better */ - matchlen = HashChars; - while (matchlen < len) { - int j; - - for (i = j = 0; i < nmatch; i++) - if (getbyte(s, data, matchlen) == getbyte(s, data, matchlen - matches[i].dist)) - matches[j++] = matches[i]; - if (j == 0) - break; - matchlen++; - nmatch = j; - } - /* among the longest matches favour the shorter distance (matches[0]) */ - matches[0].len = matchlen; - if (defermatch.len > 0) { - if (matches[0].len > defermatch.len + 1) { - /* We have a better match. Emit the deferred char, - * and defer this match. */ - literal(s, deferchr); - defermatch = matches[0]; - deferchr = data[0]; - nadvance = 1; + m = getmatch(s, src, len); + if (m.len > 0) + if (prevm.len > 0) { + if (m.len > prevm.len + 1) { + emitlit(s, prevc); + prevm = m; + prevc = *src; + step = 1; } else { - /* We don't have a better match. Do the deferred one. */ - match(s, defermatch.dist, defermatch.len); - nadvance = defermatch.len - 1; - defermatch.len = 0; + emitmatch(s, prevm); + step = prevm.len - 1; + prevm.len = 0; } } else { - /* There was no deferred match. Defer this one. */ - defermatch = matches[0]; - deferchr = data[0]; - nadvance = 1; + prevm = m; + prevc = *src; + step = 1; } - } else { - /* no matches, emit the deferred match or a literal. */ - if (defermatch.len > 0) { - match(s, defermatch.dist, defermatch.len); - nadvance = defermatch.len - 1; - defermatch.len = 0; + else + if (prevm.len > 0) { + emitmatch(s, prevm); + step = prevm.len - 1; + prevm.len = 0; } else { - literal(s, data[0]); - nadvance = 1; + emitlit(s, *src); + step = 1; } - } - /* advance winpos and keep the window and hash chains consistent. */ - while (nadvance > 0) { - if (len >= HashChars) - lz_advance(s, *data, hash_chars(data)); - else - s->pending[s->npending++] = *data; - data++; + while (step > 0) { + if (len >= MinMatch) { + hash = gethash(src); + s->chain[s->pos] = s->head[hash]; + s->head[hash] = s->pos; + s->pos = (s->pos + 1) % WinSize; + } + src++; len--; - nadvance--; + step--; } } } @@ -255,44 +191,32 @@ static uint revcode(uint c, int n) { return r; } -/* huffman codes from code lengths, return max code length */ -static int huffcodes(ushort *codes, const uchar *lens, int n) { - int c[CodeBits]; - int i, left, max; +/* build huffman code tree from code lengths */ +static void huffcodes(ushort *codes, const uchar *lens, int n) { + int count[CodeBits]; + int offs[CodeBits]; + int i, code; - /* count code lengths */ + /* count code lengths and calc first code (offs) for each length */ for (i = 0; i < CodeBits; i++) - c[i] = 0; + count[i] = 0; for (i = 0; i < n; i++) - c[lens[i]]++; - - /* calc max code lengths */ - for (i = CodeBits - 1; i > 0; i--) - if (c[i]) - break; - max = i; - - /* check if length is over-subscribed or incomplete */ - for (left = 2, i = 1; i <= max; i++) { - left -= c[i]; - if (left < 0) - /* over-subscribed */ - return -1; - left <<= 1; + count[lens[i]]++; + count[0] = 0; + for (code = i = 0; i < CodeBits; i++) { + offs[i] = code; + code += count[i]; + if (code > (1 << i)) + /* over-subscribed */return; + code <<= 1; } - if (left > 0) - /* incomlpete */; - - /* calc first code for each length */ - for (i = 2; i <= max; i++) - c[i] += c[i-1]; - + if (code < (1 << i)) + /* incomplete */; for (i = 0; i < n; i++) if (lens[i]) - codes[i] = revcode(c[lens[i]]++, lens[i]); + codes[i] = revcode(offs[lens[i]]++, lens[i]); else codes[i] = 0; - return max; } /* min-heap for buildhuf() */ @@ -364,23 +288,27 @@ static void hufflens(const int *freqs, uchar *lens, int nsym) { if (nsym > Nlitlen) /* TODO: error */; - for (n = 0; n < nsym; n++) + for (len = n = 0; n < nsym; n++) if (freqs[n] > 0) - heappush(heap, n, freqs[n], n); + heappush(heap, len++, freqs[n], n); /* build code tree */ - for (len = n; len > 1; n++) { + while (len > 1) { heappop(heap, len--, &wi, &i); heappop(heap, len--, &wj, &j); parent[i] = n; parent[j] = n; - heappush(heap, len++, wi + wj, n); + heappush(heap, len++, wi + wj, n++); + /* TODO: strore used syms at heap end */ } /* calc code length */ clen[--n] = 0; while (n--) - clen[n] = 1 + clen[parent[n]]; + if (n >= nsym || freqs[n]) + clen[n] = 1 + clen[parent[n]]; + else + clen[n] = 0; /* limit code length to 255 */ for (n = 0; n < nsym; n++) @@ -470,17 +398,15 @@ static void hufflenslimit(int *freqs, uchar *lens, int nsym, int limit) { } /* adding < 16 bits to outbuf */ -static int outbits(State *s, uint bits, int nbits) { +static void outbits(State *s, uint bits, int nbits) { +fprintf(stderr, "b:%3d n:%3d\n", bits, nbits); s->bits |= bits << s->nbits; s->nbits += nbits; while (s->nbits >= 8) { - if (s->outpos == s->outlen) - return -1; - s->outbuf[s->outpos++] = s->bits & 0xff; + *s->dst++ = s->bits & 0xff; s->bits >>= 8; s->nbits -= 8; } - return 0; } static void putsym(State *s, ushort sym) { @@ -491,17 +417,75 @@ static ushort getsym(State *s, int i) { return s->symbuf[(s->symstart + i) % SymSize]; } + +static int clensymbols(int *lens, int n, int *syms) { + int i, j; + int runlen; + int nsym = 0; + + for (i = j = 0; i < n; i = j) { + for (j++; j < n; j++) + if (lens[j] != lens[i]) + break; + runlen = j - i; + + /* encode economically */ + if (lens[i] == 0) { + if (runlen < 3) + while (runlen--) + syms[nsym++] = 0; + else + while (runlen > 0) { + int k = runlen < 138 ? runlen : 138; + + if (k > runlen - 3 && k < runlen) + k = runlen - 3; + if (k < 11) { + syms[nsym++] = 17; + syms[nsym++] = 3; + syms[nsym++] = k - 3; + } else { + syms[nsym++] = 18; + syms[nsym++] = 7; + syms[nsym++] = k - 11; + } + runlen -= k; + } + } else { + syms[nsym++] = lens[i]; + runlen--; + if (runlen < 3) + while (runlen--) + syms[nsym++] = lens[i]; + else + while (runlen > 0) { + int k = runlen < 6 ? runlen : 6; + + if (k > runlen - 3 && k < runlen) + k = runlen - 3; + syms[nsym++] = 16; + syms[nsym++] = 2; + syms[nsym++] = k - 3; + runlen -= k; + } + } + } + return nsym; +} + /* build dynamic block or a fixed block of given length */ static void block(State *s, int dynlen, int fixlen) { int lfreq[Nlitlen], dfreq[Ndist], cfreq[Nclen]; uchar llen[Nlitlen], dlen[Ndist], clen[Nclen]; ushort lcode[Nlitlen], dcode[Ndist], ccode[Nclen]; - int nlit, ndist, nclen, bfinal, btype; - int tree[Nlitlen + Ndist]; - int treesym[Nlitlen + Ndist]; + int i, n, nlit, ndist, nclen; + int len[Nlitlen + Ndist]; + int syms[Nlitlen + Ndist]; int codelen[Nclen]; - int i, ntree, ntreesym; - int dynamic, blocklen; + int dynsize; + int blocklen; + int dynamic; + int lastblock; memset(lfreq, 0, sizeof(lfreq)); memset(dfreq, 0, sizeof(dfreq)); @@ -519,75 +503,24 @@ static void block(State *s, int dynlen, int fixlen) { i++; /* extra bits */ } } + hufflenslimit(lfreq, llen, Nlitlen, CodeBits-1); - hufflenslimit(dfreq, dlen, Ndist, CodeBits-1); huffcodes(lcode, llen, Nlitlen); - huffcodes(dcode, dlen, Ndist); - for (nlit = Nlitlen; nlit > Nlit && llen[nlit-1] == 0; nlit--); + + hufflenslimit(dfreq, dlen, Ndist, CodeBits-1); + huffcodes(dcode, dlen, Ndist); for (ndist = Ndist; ndist > 1 && dlen[ndist-1] == 0; ndist--); - ntree = 0; for (i = 0; i < nlit; i++) - tree[ntree++] = llen[i]; + len[i] = llen[i]; for (i = 0; i < ndist; i++) - tree[ntree++] = dlen[i]; - ntreesym = 0; - for (i = 0; i < ntree;) { - int j = 1; - int k; - - /* get run length of the same item */ - while (i+j < ntree && tree[i+j] == tree[i]) - j++; - - /* encode economically */ - k = j; - if (tree[i] == 0) { - if (k < 3) - while (k--) - treesym[ntreesym++] = 0; - else - while (k > 0) { - int r = k < 138 ? k : 138; - - if (r > k - 3 && r < k) - r = k - 3; - if (r < 11) { - treesym[ntreesym++] = 17; - treesym[ntreesym++] = 3; - treesym[ntreesym++] = r - 3; - } else { - treesym[ntreesym++] = 18; - treesym[ntreesym++] = 7; - treesym[ntreesym++] = r - 11; - } - k -= r; - } - } else { - treesym[ntreesym++] = tree[i]; - k--; - if (k < 3) - while (k--) - treesym[ntreesym++] = tree[i]; - else - while (k > 0) { - int r = k < 6 ? k : 6; - - if (r > k - 3 && r < k) - r = k - 3; - treesym[ntreesym++] = 16; - treesym[ntreesym++] = 2; - treesym[ntreesym++] = r - 3; - k -= r; - } - } - i += j; - } - + len[nlit + i] = dlen[i]; + n = clensymbols(len, nlit + ndist, syms); + /* TODO: move to clensymbols */ memset(cfreq, 0, sizeof(cfreq)); - for (i = 0; i < ntreesym; i++) { - uint sym = treesym[i]; + for (i = 0; i < n; i++) { + uint sym = syms[i]; cfreq[sym]++; if (sym >= 16) @@ -595,103 +528,84 @@ static void block(State *s, int dynlen, int fixlen) { } hufflenslimit(cfreq, clen, Nclen, 7); huffcodes(ccode, clen, Nclen); - - /* reorder */ for (i = 0; i < Nclen; i++) codelen[i] = clen[clenorder[i]]; for (nclen = Nclen; nclen > 4 && codelen[nclen-1] == 0; nclen--); - /* exact size of dynamic and fixed block */ - { - int fsize, dsize; + /* calc len */ + dynsize = 3 + 5 + 5 + 4; + dynsize += 3 * nclen; + for (i = 0; i < n; i++) { + uint sym = syms[i]; + + dynsize += clen[sym]; + if (sym >= 16) { + i++; + dynsize += syms[i]; + i++; + } + } + for (i = 0; i < dynlen; i++) { + ushort sym = getsym(s, i); - /* dynamic block */ - dsize = 3 + 5 + 5 + 4; /* 3-bit header, nlit, ndist, nclen */ - dsize += 3 * nclen; /* code-length-alphabet code lengths */ + dynsize += llen[sym]; + if (sym > Nlit) { + i++; /* extra nbits */ + dynsize += getsym(s, i); + i++; /* extra bits */ + i++; /* dist sym */ + dynsize += dlen[getsym(s, i)]; + i++; /* extra nbits */ + dynsize += getsym(s, i); + i++; /* extra bits */ + } + } + dynsize += llen[256]; /* EOB */ +/* + dynamic = (long long)fsize * dynlen > (long long)dsize * fixlen; + blocklen = dynamic ? dlen : fixlen; +*/ + dynamic = 1; + blocklen = dynlen; + lastblock = 1; + + outbits(s, lastblock ? 1 : 0, 1); + if (dynamic) { + outbits(s, 2, 2); + outbits(s, nlit - 257, 5); + outbits(s, ndist - 1, 5); + outbits(s, nclen - 4, 4); + /* clen code lengths */ + for (i = 0; i < nclen; i++) + outbits(s, codelen[i], 3); /* code lengths */ - for (i = 0; i < ntreesym; i++) { - uint sym = treesym[i]; + for (i = 0; i < n; i++) { + uint sym = syms[i]; - dsize += clen[sym]; + outbits(s, ccode[sym], clen[sym]); if (sym >= 16) { - i++; - dsize += treesym[i]; - i++; + outbits(s, syms[i+2], syms[i+1]); + i += 2; } } /* block data */ for (i = 0; i < dynlen; i++) { ushort sym = getsym(s, i); - dsize += llen[sym]; - if (sym > Nlit) { - i++; /* extra nbits */ - dsize += getsym(s, i); - i++; /* extra bits */ - i++; /* dist sym */ - dsize += dlen[getsym(s, i)]; - i++; /* extra nbits */ - dsize += getsym(s, i); - i++; /* extra bits */ - } - } - /* EOB */ - dsize += llen[256]; - - /* fixed block.. TODO */ - fsize = 3; - /* The actual block data */ - for (i = 0; i < fixlen; i++) { - uint sym = getsym(s, i); - - fsize += fixedllen[sym]; + outbits(s, lcode[sym], llen[sym]); if (sym > Nlit) { - i++; /* extra nbits */ - fsize += getsym(s, i); - i++; /* extra bits */ - i++; /* dist sym */ - fsize += fixeddlen[getsym(s, i)]; - i++; /* extra nbits */ - fsize += getsym(s, i); - i++; /* extra bits */ + outbits(s, getsym(s, i+2), getsym(s, i+1)); + sym = getsym(s, i+3); + outbits(s, dcode[sym], dlen[sym]); + outbits(s, getsym(s, i+5), getsym(s, i+4)); + i += 5; } } /* EOB */ - fsize += fixedllen[256]; - - dynamic = (long long)fsize * dynlen > (long long)dsize * fixlen; - blocklen = dynamic ? dlen : fixlen; - } - - /* transmit the block */ - - /* 3-bit block header */ - bfinal = s->lastblock ? 1 : 0; - btype = dynamic ? 2 : 1; - outbits(s, bfinal, 1); - outbits(s, btype, 2); - - if (dynamic) { - outbits(s, nlit - 257, 5); - outbits(s, ndist - 1, 5); - outbits(s, nclen - 4, 4); - - /* Code lengths for the auxiliary tree */ - for (i = 0; i < nclen; i++) - outbits(s, codelen[i], 3); - - /* Code lengths for the literal/length and distance trees */ - for (i = 0; i < ntreesym; i++) - writesym(s, treesym[i], ht); + outbits(s, lcode[256], llen[256]); } - /* Output the actual symbols from the buffer */ - for (i = 0; i < blocklen; i++) - writesym(s, getsym(s, i), ht); - - /* Output the end-of-data symbol */ - writesym(s, 256, ht); - +fprintf(stderr, "blocklen: %d symlen: %d dynsize: %d\n", blocklen, s->symlen, dynsize); s->symstart = (s->symstart + blocklen) % SymSize; s->symlen -= blocklen; } @@ -708,7 +622,7 @@ static int log2_8(uint n) { if (n < 1U<<31) n <<= 1, r -= 1*8; /* - * result is in [r,r+8), to determine the bottom 3 digits: + * result is in [r,r+8), to determine the bottom 3 bits: * thresholds are 1<<31 times an odd power of 2^(1/16) */ if (n <= 0xAD583EEAU) { @@ -736,48 +650,27 @@ static void chooseblock(State *s) { lfreq[256] = 1; /* we're bound to need one EOB */ ltotal = 1; dtotal = 0; - - /* - * Iterate over all possible block lengths, computing the - * entropic coding approximation to the final length at every - * stage. We divide the result by the number of symbols - * encoded, to determine the `value for money' (overall - * bits-per-symbol count) of a block of that length. - */ bestlen = -1; bestvfm = 0; - len = 300 * 8; /* very approximate size of the Huffman trees */ - for (i = 0; i < s->symlen; i++) { ushort sym = getsym(s, i); if (i > 0 && sym < Nlit) { - /* - * This is a viable point at which to end the block. - * Compute the value for money. - */ - int vfm = i * 32768 / len; /* symbols encoded per bit */ + int vfm = i * 32768 / len; if (bestlen < 0 || vfm > bestvfm) { bestlen = i; bestvfm = vfm; } - longestlen = i; } - - /* - * Increment the occurrence counter for this symbol, if - * it's in one of the Huffman alphabets and isn't extra - * bits. - */ - len += lfreq[sym] * approxlog2(lfreq[sym]); - len -= ltotal * approxlog2(ltotal); + len += lfreq[sym] * log2_8(lfreq[sym]); + len -= ltotal * log2_8(ltotal); lfreq[sym]++; ltotal++; - len -= lfreq[sym] * approxlog2(lfreq[sym]); - len += ltotal * approxlog2(ltotal); + len -= lfreq[sym] * log2_8(lfreq[sym]); + len += ltotal * log2_8(ltotal); if (sym > Nlit) { i++; sym = getsym(s, i); @@ -785,19 +678,30 @@ static void chooseblock(State *s) { i++; i++; sym = getsym(s, i); - len += dfreq[sym] * approxlog2(dfreq[sym]); - len -= dtotal * approxlog2(dtotal); + len += dfreq[sym] * log2_8(dfreq[sym]); + len -= dtotal * log2_8(dtotal); dfreq[sym]++; dtotal++; - len -= dfreq[sym] * approxlog2(dfreq[sym]); - len += dtotal * approxlog2(dtotal); + len -= dfreq[sym] * log2_8(dfreq[sym]); + len += dtotal * log2_8(dtotal); i++; sym = getsym(s, i); len += 8 * sym; i++; } } - block(s, bestlen, longestlen); +/* block(s, bestlen, longestlen);*/ + block(s, s->symlen, longestlen); +} + +static int count = 0; + +static void emitlit(State *s, ushort lit) { + if (s->symlen == SymSize) + chooseblock(s); +fprintf(stderr, "%3d %c\n", lit, lit); +count++; + putsym(s, lit); } static int bisect(ushort *base, int len, int n) { @@ -815,36 +719,68 @@ static int bisect(ushort *base, int len, int n) { return lo - 1; } -/* place a symbol into the symbuf */ -static void literal(State *s, ushort lit) { - if (s->symlen == SymSize) - chooseblock(s); - putsym(s, lit); -} - -static void match(State *s, ushort dist, ushort len) { +static void emitmatch(State *s, Match m) { if (s->symlen + 5 >= SymSize) chooseblock(s); - while (len > 0) { - int n; +fprintf(stderr, "m %d, %d\n", m.dist, m.len); +count++; + while (m.len > 0) { + int len; int i; /* match length can be 3..258 */ - n = len > 260 ? 258 : len <= 258 ? len : len - 3; - len -= n; + len = m.len > 260 ? 258 : m.len <= 258 ? m.len : m.len - 3; + m.len -= len; - i = bisect(lenbase, Nlen, n); + i = bisect(lenbase, Nlen, len); /* len symbol */ putsym(s, Nlit + i + 1); /* len extra */ putsym(s, lenbits[i]); - putsym(s, n - lenbase[i]); + putsym(s, len - lenbase[i]); - i = bisect(distbase, Ndist, dist); + i = bisect(distbase, Ndist, m.dist); /* dist symbol */ putsym(s, i); /* dist extra */ putsym(s, distbits[i]); - putsym(s, dist - distbase[i]); + putsym(s, m.dist - distbase[i]); } } + +int deflate(uchar *src, int len, uchar *dst) { + State s; + + lzinit(&s); + s.symstart = 0; + s.symlen = 0; + s.bits = 0; + s.nbits = 0; + s.lastblock = 0; + s.dst = dst; + lzcompress(&s, src, len); + chooseblock(&s); + if (s.symlen) + block(&s, s.symlen, s.symlen); + outbits(&s, 0, 7); + return s.dst - dst; +} + + +#include <stdio.h> + +enum {Siz = 1<<24}; + +int main() { + uchar *src; + uchar *dst; + int len; + + src = malloc(Siz); + dst = malloc(Siz); + len = fread(src, 1, Siz, stdin); + len = deflate(src, len, dst); + fwrite(dst, 1, len, stdout); + fprintf(stderr, "compressed: %d\n", len); + return 0; +}