commit 0a06e6487a7525f7cb49d3b211c551abe56e596c
parent df9760ead6012c345cf371d650a617d540364a95
Author: nsz <nszabolcs@gmail.com>
Date: Sat, 4 Jul 2009 12:23:53 +0200
inflate: fix buildhuf when distlen = 0, simple deflate
Diffstat:
3 files changed, 198 insertions(+), 234 deletions(-)
diff --git a/deflate_simple.c b/deflate_simple.c
@@ -35,12 +35,25 @@ typedef struct {
ushort *code;
} Huff;
+enum {
+ SymLitlen,
+ SymDist,
+ SymExtra,
+ SymClen
+};
+
+typedef struct {
+ uchar type; /* litlen, dist, extra, clen */
+ uchar len; /* bit length if extra */
+ ushort n; /* symbol or code if extra */
+} Sym;
+
typedef struct {
ushort head[HashSize]; /* position of hash chain heads */
ushort chain[WinSize]; /* hash chains */
int pos; /* position in hash chain */
- ushort symbuf[SymSize]; /* lz77 literal and (len,dist) sequence */
+ Sym symbuf[SymSize]; /* lz77 literal and (len,dist) sequence */
int symstart;
int symlen;
@@ -193,68 +206,67 @@ static uint revcode(uint c, int n) {
/* build huffman code tree from code lengths */
static void huffcodes(ushort *codes, const uchar *lens, int n) {
- int count[CodeBits];
- int offs[CodeBits];
- int i, code;
+ int c[CodeBits];
+ int i, code, count;
- /* count code lengths and calc first code (offs) for each length */
+ /* count code lengths and calc first code for each length */
for (i = 0; i < CodeBits; i++)
- count[i] = 0;
+ c[i] = 0;
for (i = 0; i < n; i++)
- count[lens[i]]++;
- count[0] = 0;
- for (code = i = 0; i < CodeBits; i++) {
- offs[i] = code;
- code += count[i];
+ c[lens[i]]++;
+ for (code = 0, i = 1; i < CodeBits; i++) {
+ count = c[i];
+ c[i] = code;
+ code += count;
if (code > (1 << i))
- /* over-subscribed */return;
+ abort(); /* over-subscribed */
code <<= 1;
}
if (code < (1 << i))
/* incomplete */;
for (i = 0; i < n; i++)
if (lens[i])
- codes[i] = revcode(offs[lens[i]]++, lens[i]);
+ codes[i] = revcode(c[lens[i]]++, lens[i]);
else
codes[i] = 0;
}
-/* min-heap for buildhuf() */
+/* min-heap for hufflen() */
typedef struct {
- int key;
- int value;
-} Item;
+ int k;
+ int v;
+} HeapItem;
static int heapparent(int n) {return (n - 1)/2;}
static int heapleft(int n) {return 2 * n + 1;}
static int heapright(int n) {return 2 * n + 2;}
-static void heappush(Item *heap, int len, int key, int value) {
- int n, dad;
- Item tmp;
+static void heappush(HeapItem *heap, int len, int k, int v) {
+ int n, p;
+ HeapItem tmp;
- heap[len].key = key;
- heap[len].value = value;
+ heap[len].k = k;
+ heap[len].v = v;
n = len;
while (n > 0) {
- dad = heapparent(n);
- if (heap[n].key < heap[dad].key) {
+ p = heapparent(n);
+ if (heap[n].k < heap[p].k) {
tmp = heap[n];
- heap[n] = heap[dad];
- heap[dad] = tmp;
- n = dad;
+ heap[n] = heap[p];
+ heap[p] = tmp;
+ n = p;
} else
break;
}
}
-static void heappop(Item *heap, int len, int *key, int *value) {
- int n, lc, rc, child;
- Item tmp;
+static void heappop(HeapItem *heap, int len, int *k, int *v) {
+ int n, lc, rc, c;
+ HeapItem tmp;
- *key = heap[0].key;
- *value = heap[0].value;
+ *k = heap[0].k;
+ *v = heap[0].v;
heap[0] = heap[--len];
n = 0;
for (;;) {
@@ -262,46 +274,42 @@ static void heappop(Item *heap, int len, int *key, int *value) {
rc = heapright(n);
if (lc >= len)
break;
- else if (rc >= len || heap[lc].key < heap[rc].key)
- child = lc;
+ else if (rc >= len || heap[lc].k < heap[rc].k)
+ c = lc;
else
- child = rc;
- if (heap[n].key > heap[child].key) {
+ c = rc;
+ if (heap[n].k > heap[c].k) {
tmp = heap[n];
- heap[n] = heap[child];
- heap[child] = tmp;
+ heap[n] = heap[c];
+ heap[c] = tmp;
} else
break;
- n = child;
+ n = c;
}
}
/* symbol frequencies -> code lengths (limited to 255) */
-static void hufflens(const int *freqs, uchar *lens, int nsym) {
+static void hufflens(uchar *lens, const int *freqs, int nsym) {
int parent[2*Nlitlen-1];
int clen[2*Nlitlen-1];
- Item heap[Nlitlen];
+ HeapItem heap[Nlitlen];
int n, len;
int i, j;
- int wi, wj;
+ int fi, fj;
if (nsym > Nlitlen)
- /* TODO: error */;
-
+ abort();
for (len = n = 0; n < nsym; n++)
if (freqs[n] > 0)
heappush(heap, len++, freqs[n], n);
-
/* build code tree */
while (len > 1) {
- heappop(heap, len--, &wi, &i);
- heappop(heap, len--, &wj, &j);
+ heappop(heap, len--, &fi, &i);
+ heappop(heap, len--, &fj, &j);
parent[i] = n;
parent[j] = n;
- heappush(heap, len++, wi + wj, n++);
- /* TODO: strore used syms at heap end */
+ heappush(heap, len++, fi + fj, n++);
}
-
/* calc code length */
clen[--n] = 0;
while (n--)
@@ -309,7 +317,6 @@ static void hufflens(const int *freqs, uchar *lens, int nsym) {
clen[n] = 1 + clen[parent[n]];
else
clen[n] = 0;
-
/* limit code length to 255 */
for (n = 0; n < nsym; n++)
lens[n] = clen[n] > 255 ? 255 : clen[n];
@@ -327,15 +334,14 @@ static int fib(int n) {
return b;
}
-/* might modify freqs, limit code length */
-static void hufflenslimit(int *freqs, uchar *lens, int nsym, int limit) {
- uint minfreq, sumfreq, nrealsym;
- int num, den, adjust;
- int i, maxprob, count;
+/* symbol frequencies -> (limited) code lengths */
+static void hufflenslimit(uchar *lens, int *freqs, int nsym, int limit) {
+ uint minfreq, sumfreq;
+ int i, adjust, maxprob, count;
/* TODO: fewer than two symbols: add new */
if (nsym < 2)
- /* error */ abort();
+ abort();
count = 0;
for (i = 0; i < nsym; i++)
if (freqs[i] > 0)
@@ -346,58 +352,48 @@ static void hufflenslimit(int *freqs, uchar *lens, int nsym, int limit) {
freqs[i] = 1;
count--; /* what? */
}
-
/* try building the codes without limit */
- hufflens(freqs, lens, nsym);
-
+ hufflens(lens, freqs, nsym);
for (i = 0; i < nsym; i++)
if (lens[i] > limit)
break;
if (i == nsym)
return;
-
/*
- * The Huffman algorithm can only ever generate a code length
- * of N bits or more if there is a symbol whose probability is
- * less than the reciprocal of the (N+2)th Fibonacci number.
+ * code length can be N bits or more only if the probability of a
+ * symbol is less than the reciprocal of the (N+2)th fibonacci number
*
- * If hufflens() has returned a long code, we add a constant to
- * all the (non-zero) symbol frequencies, causing them to become
- * more balanced and call hufflens() again.
+ * if hufflens() returned a long code, we add a constant to all the
+ * symbol frequencies and call hufflens() again
*/
maxprob = fib(limit + 3);
- sumfreq = nrealsym = 0;
+ sumfreq = 0;
minfreq = (uint)-1;
- for (i = 0; i < nsym; i++) {
- if (freqs[i] == 0)
- continue;
- if (minfreq > freqs[i])
- minfreq = freqs[i];
- sumfreq += freqs[i];
- nrealsym++;
- }
+ for (i = 0; i < nsym; i++)
+ if (freqs[i]) {
+ if (minfreq > freqs[i])
+ minfreq = freqs[i];
+ sumfreq += freqs[i];
+ }
if (minfreq > sumfreq / maxprob)
- /* error */ abort();
-
+ abort();
/*
- * increase freqs with adjust, the smallest integer such that
- * (sumfreq + nrealsym * adjust)/(minfreq + adjust) < maxprob
+ * adjust is the smallest integer such that
+ * (sumfreq + count * adjust)/(minfreq + adjust) < maxprob
*/
- num = sumfreq - minfreq * maxprob;
- den = maxprob - nrealsym;
- adjust = (num + den - 1) / den;
+ i = maxprob - count;
+ adjust = (sumfreq - minfreq * maxprob + i - 1) / i;
for (i = 0; i < nsym; i++)
- if (freqs[i] != 0)
+ if (freqs[i])
freqs[i] += adjust;
-
/* rebuild the hufftree */
- hufflens(freqs, lens, nsym);
+ hufflens(lens, freqs, nsym);
for (i = 0; i < nsym; i++)
if (lens[i] > limit)
- /* error */ abort();
+ abort();
}
-/* adding < 16 bits to outbuf */
+/* adding < 16 bits to dst */
static void outbits(State *s, uint bits, int nbits) {
fprintf(stderr, "b:%3d n:%3d\n", bits, nbits);
s->bits |= bits << s->nbits;
@@ -409,31 +405,43 @@ fprintf(stderr, "b:%3d n:%3d\n", bits, nbits);
}
}
-static void putsym(State *s, ushort sym) {
+static Sym makesym(int type, int len, int sym) {
+ Sym s;
+
+ s.type = type;
+ s.len = len;
+ s.n = sym;
+ return s;
+}
+
+static void putsym(State *s, Sym sym) {
s->symbuf[(s->symstart + s->symlen++) % SymSize] = sym;
}
-static ushort getsym(State *s, int i) {
+static Sym getsym(State *s, int i) {
return s->symbuf[(s->symstart + i) % SymSize];
}
-
-static int clensymbols(int *lens, int n, int *syms) {
+static int clensymbols(Sym *syms, uchar *llen, int nlit, uchar *dlen, int ndist) {
+ int lens[Nlitlen + Ndist];
int i, j;
int runlen;
int nsym = 0;
- for (i = j = 0; i < n; i = j) {
- for (j++; j < n; j++)
+ for (i = 0; i < nlit; i++)
+ lens[i] = llen[i];
+ for (i = 0; i < ndist; i++)
+ lens[nlit + i] = dlen[i];
+ for (i = j = 0; i < nlit + ndist; i = j) {
+ for (j++; j < nlit + ndist; j++)
if (lens[j] != lens[i])
break;
runlen = j - i;
-
/* encode economically */
if (lens[i] == 0) {
if (runlen < 3)
while (runlen--)
- syms[nsym++] = 0;
+ syms[nsym++] = makesym(SymClen, 0, 0);
else
while (runlen > 0) {
int k = runlen < 138 ? runlen : 138;
@@ -441,31 +449,28 @@ static int clensymbols(int *lens, int n, int *syms) {
if (k > runlen - 3 && k < runlen)
k = runlen - 3;
if (k < 11) {
- syms[nsym++] = 17;
- syms[nsym++] = 3;
- syms[nsym++] = k - 3;
+ syms[nsym++] = makesym(SymClen, 0, 17);
+ syms[nsym++] = makesym(SymExtra, 3, k - 3);
} else {
- syms[nsym++] = 18;
- syms[nsym++] = 7;
- syms[nsym++] = k - 11;
+ syms[nsym++] = makesym(SymClen, 0, 18);
+ syms[nsym++] = makesym(SymExtra, 7, k - 11);
}
runlen -= k;
}
} else {
- syms[nsym++] = lens[i];
+ syms[nsym++] = makesym(SymClen, 0, lens[i]);
runlen--;
if (runlen < 3)
while (runlen--)
- syms[nsym++] = lens[i];
+ syms[nsym++] = makesym(SymClen, 0, lens[i]);
else
while (runlen > 0) {
int k = runlen < 6 ? runlen : 6;
if (k > runlen - 3 && k < runlen)
k = runlen - 3;
- syms[nsym++] = 16;
- syms[nsym++] = 2;
- syms[nsym++] = k - 3;
+ syms[nsym++] = makesym(SymClen, 0, 16);
+ syms[nsym++] = makesym(SymExtra, 2, k - 3);
runlen -= k;
}
}
@@ -478,10 +483,10 @@ static void block(State *s, int dynlen, int fixlen) {
int lfreq[Nlitlen], dfreq[Ndist], cfreq[Nclen];
uchar llen[Nlitlen], dlen[Ndist], clen[Nclen];
ushort lcode[Nlitlen], dcode[Ndist], ccode[Nclen];
- int i, n, nlit, ndist, nclen;
- int len[Nlitlen + Ndist];
- int syms[Nlitlen + Ndist];
- int codelen[Nclen];
+ Sym syms[Nlitlen + Ndist];
+ Sym sym;
+ int nsym;
+ int i, nlit, ndist, nclen;
int dynsize;
int blocklen;
int dynamic;
@@ -491,74 +496,48 @@ static void block(State *s, int dynlen, int fixlen) {
memset(dfreq, 0, sizeof(dfreq));
lfreq[256] = 1; /* EOB */
for (i = 0; i < s->symlen; i++) {
- ushort sym = getsym(s, i);
-
- lfreq[sym]++;
- if (sym > Nlit) {
- i++; /* extra nbits */
- i++; /* extra bits */
- i++; /* dist sym */
- dfreq[getsym(s, i)]++;
- i++; /* extra nbits */
- i++; /* extra bits */
- }
+ sym = getsym(s, i);
+ if (sym.type == SymLitlen)
+ lfreq[sym.n]++;
+ else if (sym.type == SymDist)
+ dfreq[sym.n]++;
}
-
- hufflenslimit(lfreq, llen, Nlitlen, CodeBits-1);
+ hufflenslimit(llen, lfreq, Nlitlen, CodeBits-1);
+ hufflenslimit(dlen, dfreq, Ndist, CodeBits-1);
huffcodes(lcode, llen, Nlitlen);
- for (nlit = Nlitlen; nlit > Nlit && llen[nlit-1] == 0; nlit--);
-
- hufflenslimit(dfreq, dlen, Ndist, CodeBits-1);
huffcodes(dcode, dlen, Ndist);
+ for (nlit = Nlitlen; nlit > Nlit && llen[nlit-1] == 0; nlit--);
for (ndist = Ndist; ndist > 1 && dlen[ndist-1] == 0; ndist--);
- for (i = 0; i < nlit; i++)
- len[i] = llen[i];
- for (i = 0; i < ndist; i++)
- len[nlit + i] = dlen[i];
- n = clensymbols(len, nlit + ndist, syms);
- /* TODO: move to clensymbols */
+ nsym = clensymbols(syms, llen, nlit, dlen, ndist);
memset(cfreq, 0, sizeof(cfreq));
- for (i = 0; i < n; i++) {
- uint sym = syms[i];
-
- cfreq[sym]++;
- if (sym >= 16)
- i += 2;
+ for (i = 0; i < nsym; i++) {
+ sym = syms[i];
+ if (sym.type == SymClen)
+ cfreq[sym.n]++;
}
- hufflenslimit(cfreq, clen, Nclen, 7);
+ hufflenslimit(clen, cfreq, Nclen, 7);
huffcodes(ccode, clen, Nclen);
- for (i = 0; i < Nclen; i++)
- codelen[i] = clen[clenorder[i]];
- for (nclen = Nclen; nclen > 4 && codelen[nclen-1] == 0; nclen--);
+ for (nclen = Nclen; nclen > 4 && clen[clenorder[nclen-1]] == 0; nclen--);
- /* calc len */
+ /* calc size */
dynsize = 3 + 5 + 5 + 4;
dynsize += 3 * nclen;
- for (i = 0; i < n; i++) {
- uint sym = syms[i];
-
- dynsize += clen[sym];
- if (sym >= 16) {
- i++;
- dynsize += syms[i];
- i++;
- }
+ for (i = 0; i < nsym; i++) {
+ sym = syms[i];
+ if (sym.type == SymClen)
+ dynsize += clen[sym.n];
+ if (sym.type == SymExtra)
+ dynsize += sym.len;
}
for (i = 0; i < dynlen; i++) {
- ushort sym = getsym(s, i);
-
- dynsize += llen[sym];
- if (sym > Nlit) {
- i++; /* extra nbits */
- dynsize += getsym(s, i);
- i++; /* extra bits */
- i++; /* dist sym */
- dynsize += dlen[getsym(s, i)];
- i++; /* extra nbits */
- dynsize += getsym(s, i);
- i++; /* extra bits */
- }
+ sym = getsym(s, i);
+ if (sym.type == SymLitlen)
+ dynsize += llen[sym.n];
+ if (sym.type == SymDist)
+ dynsize += dlen[sym.n];
+ if (sym.type == SymExtra)
+ dynsize += sym.len;
}
dynsize += llen[256]; /* EOB */
/*
@@ -577,29 +556,24 @@ static void block(State *s, int dynlen, int fixlen) {
outbits(s, nclen - 4, 4);
/* clen code lengths */
for (i = 0; i < nclen; i++)
- outbits(s, codelen[i], 3);
+ outbits(s, clen[clenorder[i]], 3);
/* code lengths */
- for (i = 0; i < n; i++) {
- uint sym = syms[i];
-
- outbits(s, ccode[sym], clen[sym]);
- if (sym >= 16) {
- outbits(s, syms[i+2], syms[i+1]);
- i += 2;
- }
+ for (i = 0; i < nsym; i++) {
+ sym = syms[i];
+ if (sym.type == SymClen)
+ outbits(s, ccode[sym.n], clen[sym.n]);
+ if (sym.type == SymExtra)
+ outbits(s, sym.n, sym.len);
}
/* block data */
for (i = 0; i < dynlen; i++) {
- ushort sym = getsym(s, i);
-
- outbits(s, lcode[sym], llen[sym]);
- if (sym > Nlit) {
- outbits(s, getsym(s, i+2), getsym(s, i+1));
- sym = getsym(s, i+3);
- outbits(s, dcode[sym], dlen[sym]);
- outbits(s, getsym(s, i+5), getsym(s, i+4));
- i += 5;
- }
+ sym = getsym(s, i);
+ if (sym.type == SymLitlen)
+ outbits(s, lcode[sym.n], llen[sym.n]);
+ if (sym.type == SymDist)
+ outbits(s, dcode[sym.n], dlen[sym.n]);
+ if (sym.type == SymExtra)
+ outbits(s, sym.n, sym.len);
}
/* EOB */
outbits(s, lcode[256], llen[256]);
@@ -620,10 +594,9 @@ static int log2_8(uint n) {
if (n < 1U<<28) n <<= 4, r -= 4*8;
if (n < 1U<<30) n <<= 2, r -= 2*8;
if (n < 1U<<31) n <<= 1, r -= 1*8;
-
/*
* result is in [r,r+8), to determine the bottom 3 bits:
- * thresholds are 1<<31 times an odd power of 2^(1/16)
+ * thresholds are 1 << 31 times an odd power of 2^(1/16)
*/
if (n <= 0xAD583EEAU) {
if (n <= 0x91C3D373U)
@@ -641,53 +614,46 @@ static int log2_8(uint n) {
static void chooseblock(State *s) {
int lfreq[Nlitlen], dfreq[Ndist];
- int i, len, bestlen, longestlen = 0;
+ int i, len, bestlen, longestlen;
int ltotal, dtotal;
int bestvfm;
+ Sym sym;
memset(lfreq, 0, sizeof(lfreq));
memset(dfreq, 0, sizeof(dfreq));
lfreq[256] = 1; /* we're bound to need one EOB */
ltotal = 1;
- dtotal = 0;
+ dtotal = longestlen = bestvfm = 0;
bestlen = -1;
- bestvfm = 0;
len = 300 * 8; /* very approximate size of the Huffman trees */
for (i = 0; i < s->symlen; i++) {
- ushort sym = getsym(s, i);
-
- if (i > 0 && sym < Nlit) {
- int vfm = i * 32768 / len;
-
- if (bestlen < 0 || vfm > bestvfm) {
- bestlen = i;
- bestvfm = vfm;
+ sym = getsym(s, i);
+ if (sym.type == SymLitlen) {
+ if (sym.n < Nlit) {
+ int vfm = i * 32768 / len;
+
+ if (vfm > bestvfm) {
+ bestlen = i;
+ bestvfm = vfm;
+ }
+ longestlen = i;
}
- longestlen = i;
+ len += lfreq[sym.n] * log2_8(lfreq[sym.n]);
+ len -= ltotal * log2_8(ltotal);
+ lfreq[sym.n]++;
+ ltotal++;
+ len -= lfreq[sym.n] * log2_8(lfreq[sym.n]);
+ len += ltotal * log2_8(ltotal);
}
- len += lfreq[sym] * log2_8(lfreq[sym]);
- len -= ltotal * log2_8(ltotal);
- lfreq[sym]++;
- ltotal++;
- len -= lfreq[sym] * log2_8(lfreq[sym]);
- len += ltotal * log2_8(ltotal);
- if (sym > Nlit) {
- i++;
- sym = getsym(s, i);
- len += 8 * sym;
- i++;
- i++;
- sym = getsym(s, i);
- len += dfreq[sym] * log2_8(dfreq[sym]);
+ if (sym.type == SymExtra)
+ len += 8 * sym.len;
+ if (sym.type == SymDist) {
+ len += dfreq[sym.n] * log2_8(dfreq[sym.n]);
len -= dtotal * log2_8(dtotal);
- dfreq[sym]++;
+ dfreq[sym.n]++;
dtotal++;
- len -= dfreq[sym] * log2_8(dfreq[sym]);
+ len -= dfreq[sym.n] * log2_8(dfreq[sym.n]);
len += dtotal * log2_8(dtotal);
- i++;
- sym = getsym(s, i);
- len += 8 * sym;
- i++;
}
}
/* block(s, bestlen, longestlen);*/
@@ -701,7 +667,7 @@ static void emitlit(State *s, ushort lit) {
chooseblock(s);
fprintf(stderr, "%3d %c\n", lit, lit);
count++;
- putsym(s, lit);
+ putsym(s, makesym(SymLitlen, 0, lit));
}
static int bisect(ushort *base, int len, int n) {
@@ -733,18 +699,12 @@ count++;
m.len -= len;
i = bisect(lenbase, Nlen, len);
- /* len symbol */
- putsym(s, Nlit + i + 1);
- /* len extra */
- putsym(s, lenbits[i]);
- putsym(s, len - lenbase[i]);
+ putsym(s, makesym(SymLitlen, 0, Nlit + i + 1));
+ putsym(s, makesym(SymExtra, lenbits[i], len - lenbase[i]));
i = bisect(distbase, Ndist, m.dist);
- /* dist symbol */
- putsym(s, i);
- /* dist extra */
- putsym(s, distbits[i]);
- putsym(s, m.dist - distbase[i]);
+ putsym(s, makesym(SymDist, 0, i));
+ putsym(s, makesym(SymExtra, distbits[i], m.dist - distbase[i]));
}
}
diff --git a/inflate.c b/inflate.c
@@ -136,8 +136,10 @@ static int build_huff(Huff *huff, uchar *lens, uint n, uint nbits) {
count[i] = 0;
for (i = 0; i < n; i++)
count[lens[i]]++;
- if (count[0] == n)
- return -1;
+ if (count[0] == n) {
+ huff->nbits = table[0].len = 0;
+ return 0;
+ }
count[0] = 0;
/* bound code lengths, force nbits to be within the bounds */
diff --git a/inflate_callback.c b/inflate_callback.c
@@ -115,8 +115,10 @@ static int build_huff(Huff *huff, uchar *lens, uint n, uint nbits) {
count[i] = 0;
for (i = 0; i < n; i++)
count[lens[i]]++;
- if (count[0] == n)
- return -1;
+ if (count[0] == n) {
+ huff->nbits = table[0].len = 0;
+ return 0;
+ }
count[0] = 0;
/* bound code lengths, force nbits to be within the bounds */