commit c10b847118d0735b2f65b002110a781ac2e8f0a5
parent b598e8d90d3680b89058f1bdc63ff598461091af
Author: nsz <nszabolcs@gmail.com>
Date: Sun, 23 Aug 2009 04:56:25 +0200
-avail +endpos +skip +fillwin mods (wip)
Diffstat:
4 files changed, 127 insertions(+), 74 deletions(-)
diff --git a/Makefile b/Makefile
@@ -30,8 +30,8 @@ profinf: inflate.c deflate.c sflate.c crc.c adler.c
# grep alloc a.valgrind
rm -f a.out a.valgrind *.gcno *.gcda gmon.out
-profdef: inflate.c deflate.c sflate.c crc.c adler.c
- gcc -O1 -fprofile-arcs -ftest-coverage -pg -g -Wall $+
+profdef: deflate.c inflate.c sflate.c crc.c adler.c
+ gcc -O0 -fprofile-arcs -ftest-coverage -pg -g -Wall $+
./a.out <a.u >/dev/null
gcov -b $+ >/dev/null
gprof a.out >$<.gprof
diff --git a/crc.c b/crc.c
@@ -3,7 +3,7 @@
uint crctab[256];
void crc32init(void) {
- static const uint poly = 0xEDB88320;
+ static const uint poly = 0xedb88320;
int i,j;
for (i = 0; i < 256; ++i) {
diff --git a/deflate.c b/deflate.c
@@ -2,6 +2,7 @@
#include <string.h>
#include "flate.h"
+#include <stdio.h>
enum {
CodeBits = 16, /* max number of bits in a code + 1 */
EOB = 256, /* end of block symbol */
@@ -18,12 +19,12 @@ enum {
HashBits = 13,
HashSize = 1 << HashBits, /* hash table size */
BigDist = 1 << 12, /* max match distance for short match length */
- MaxDist = WinSize - MaxMatch + 1, /* worst case available history */
- StartPos = WinSize - MaxMatch + 1, /* block start position */
- EndPos = 2*WinSize - MaxMatch + 1, /* block end position */
- DstSize = WinSize + MaxMatch + 6, /* worst case compressed block size */
+ BlockSize = (1 << 15) - 234, /* --- */
+ MaxDist = WinSize - MaxMatch,
+ WinBufSize = 2*WinSize + MaxMatch,
+ DstSize = BlockSize + MaxMatch + 6, /* worst case compressed block size */
LzSize = 1 << 13, /* lz buffer size */
- LzEnd = LzSize - 2, /* guard that lzbuf does not overflow in the next iteration */
+ LzEnd = LzSize - 2, /* guard lzbuf not to overflow in the next iteration */
LzLitFlag = 1 << 15 /* marks literal run length in lz buffer */
};
@@ -42,7 +43,8 @@ typedef struct {
int eof; /* end of input */
int pos; /* position in input win */
int startpos; /* block start pos in input win */
- int avail; /* available in input win */
+ int endpos; /* end of available bytes in win */
+ int skip; /* skipped hash chain updates (until next iter) */
Match prevm; /* previous (deferred) match */
uchar *src; /* input data (not yet in win) */
uchar *srcend;
@@ -50,7 +52,7 @@ typedef struct {
uchar *dstbegin; /* start position of unflushed data in dstbuf */
uint bits;
int nbits;
- uchar win[2*WinSize]; /* input window */
+ uchar win[WinBufSize]; /* input window */
ushort head[HashSize]; /* position of hash chain heads */
ushort chain[WinSize]; /* hash chain */
LzCode lzbuf[LzSize]; /* literal run length, match len, match dist */
@@ -326,7 +328,7 @@ static void deflate_block(State *s) {
uchar *p;
int dynsize, fixsize, uncsize;
int blocklen = s->pos - s->startpos;
-/* int dyntree; */
+ int dyntree;
/* calc dynamic codes */
hufflens(llen, s->lfreq, Nlitlen, CodeBits-1);
@@ -357,7 +359,7 @@ static void deflate_block(State *s) {
if (c == 18)
dynsize += 7;
}
-/* dyntree = dynsize - 3; */
+ dyntree = dynsize - 3;
for (lz = s->lzbuf, p = s->win + s->startpos; lz != s->lz; lz++)
if (lz->bits & LzLitFlag)
for (n = lz->n; n > 0; n--, p++) {
@@ -380,7 +382,7 @@ static void deflate_block(State *s) {
dynsize += llen[EOB];
/* emit block */
- putbits(s, s->eof, 1);
+ putbits(s, s->eof && s->pos == s->endpos, 1);
if (dynsize < fixsize && dynsize < uncsize) {
/* dynamic code */
putbits(s, 2, 2);
@@ -415,9 +417,9 @@ static void deflate_block(State *s) {
s->dst += blocklen;
}
/*
-fprintf(stderr, "blen:%d [%d,%d] lzlen:%d dynlen:%d (tree:%d rate:%.3f) fixlen:%d (rate:%.3f) unclen:%d (rate:%.3f) avail:%d\n",
+fprintf(stderr, "blen:%d [%d,%d] lzlen:%d dynlen:%d (tree:%d rate:%.3f) fixlen:%d (rate:%.3f) unclen:%d (rate:%.3f)\n",
blocklen, s->startpos, s->pos, s->lz - s->lzbuf, dynsize, dyntree, dynsize/(float)blocklen,
- fixsize, fixsize/(float)blocklen, uncsize, uncsize/(float)blocklen, s->avail);
+ fixsize, fixsize/(float)blocklen, uncsize, uncsize/(float)blocklen);
*/
}
@@ -451,6 +453,7 @@ static void flushlit(State *s) {
static void recordmatch(State *s, Match m) {
int n;
+/*fprintf(stderr, "m %d %d\n", m.len, m.dist);*/
flushlit(s);
n = bisect(lenbase, Nlen, m.len);
s->lz->n = n;
@@ -466,6 +469,7 @@ static void recordmatch(State *s, Match m) {
/* update literal run length */
static void recordlit(State *s, int c) {
+/*fprintf(stderr, "l %c\n", c);*/
s->nlit++;
s->lfreq[c]++;
}
@@ -477,46 +481,60 @@ static int gethash(uchar *p) {
/* update hash chain at the current position */
static int updatechain(State *s) {
- int h, i;
-
- if (s->avail < MinMatch)
- return 0;
- h = gethash(s->win + s->pos);
- i = s->head[h];
- s->head[h] = s->pos;
- if (i >= s->pos || s->pos - i > MaxDist)
- i = 0;
- s->chain[s->pos % WinSize] = i;
- return i;
+ int hash, next = 0, p = s->pos, i = p;
+
+ if (s->endpos - p < MinMatch)
+ p = s->endpos - MinMatch;
+ for (i -= s->skip; i <= p; i++) {
+ hash = gethash(s->win + i);
+ next = s->head[hash];
+ s->head[hash] = i;
+ if (next >= i || i - next >= MaxDist)
+ next = 0;
+ s->chain[i % WinSize] = next;
+ }
+ s->skip = 0;
+ return next;
}
-/* find longest match, mpos is the next in the hash chain */
-static Match getmatch(State *s, int mpos) {
+/* find longest match, next position in the hash chain is given */
+static Match getmatch(State *s, int next) {
Match m = {0, MinMatch-1};
int len;
- int maxlen = s->avail < MaxMatch ? s->avail : MaxMatch;
int limit = s->pos - MaxDist;
int chainlen = MaxChainLen;
uchar *q;
uchar *p = s->win + s->pos;
- uchar *end = p + maxlen;
+ uchar *end = p + MaxMatch;
do {
- q = s->win + mpos;
+ q = s->win + next;
+/*fprintf(stderr,"match: next:%d pos:%d limit:%d\n", next, s->pos, limit);*/
/* next match should be at least m.len+1 long */
if (q[m.len] != p[m.len] || q[m.len-1] != p[m.len-1] || q[0] != p[0])
continue;
+ /* loop unroll: MaxMatch % 10 == 8 */
+/* while (*++q == *++p && *++q == *++p
+ && *++q == *++p && *++q == *++p
+ && *++q == *++p && *++q == *++p
+ && *++q == *++p && ++p != end && *++q == *p
+ && *++q == *++p && *++q == *++p);
+*/
while (++p != end && *++q == *p);
- len = maxlen - (end - p);
+ len = MaxMatch - (end - p);
p -= len;
+/*fprintf(stderr,"match: len:%d dist:%d\n", len, s->pos - next);*/
if (len > m.len) {
- m.dist = s->pos - mpos;
+ m.dist = s->pos - next;
m.len = len;
- if (len == maxlen)
+ if (len == MaxMatch)
+ return m;
+ if (s->pos + len >= s->endpos) { /* TODO: overflow */
+ m.len = s->endpos - s->pos;
return m;
+ }
}
- /* >= limit can be allowed except when limit == 0 */
- } while ((mpos = s->chain[mpos % WinSize]) > limit && --chainlen);
+ } while ((next = s->chain[next % WinSize]) > limit && --chainlen);
if (m.len < MinMatch || (m.len == MinMatch && m.dist > BigDist))
m.len = 0;
return m;
@@ -533,27 +551,42 @@ static void startblock(State *s) {
s->lfreq[EOB]++;
}
-/* if block should be ended then end it and shift input window */
-static int endblock(State *s) {
+static int shiftwin(State *s) {
int n;
- if (s->pos >= EndPos || s->lz - s->lzbuf >= LzEnd || (s->eof && s->avail == 0)) {
+ if (s->startpos < WinSize)
+ return 0;
+ memmove(s->win, s->win + WinSize, WinBufSize - WinSize);
+ for (n = 0; n < HashSize; n++)
+ s->head[n] = s->head[n] > WinSize ? s->head[n] - WinSize : 0;
+ for (n = 0; n < WinSize; n++) {
+ s->chain[n] = s->chain[n] > WinSize ? s->chain[n] - WinSize : 0;
+/* n++;
+ s->chain[n] = s->chain[n] > WinSize ? s->chain[n] - WinSize : 0;
+ n++;
+ s->chain[n] = s->chain[n] > WinSize ? s->chain[n] - WinSize : 0;
+ n++;
+ s->chain[n] = s->chain[n] > WinSize ? s->chain[n] - WinSize : 0;
+*/ }
+ s->pos -= WinSize;
+ s->startpos -= WinSize;
+ s->endpos -= WinSize;
+ return 1;
+}
+
+/* if block should be ended then end it and shift input window */
+static int endblock(State *s) {
+ if ((s->pos >= 2*WinSize && !shiftwin(s)) ||
+ s->pos - s->startpos >= BlockSize ||
+ s->lz - s->lzbuf >= LzEnd ||
+ (s->eof && s->pos == s->endpos)) {
/* deflate block */
flushlit(s);
if (s->prevm.len)
s->pos--;
deflate_block(s);
- if (s->eof) {
+ if (s->eof && s->pos == s->endpos)
putbits(s, 0, 7);
- return 1;
- }
- /* shift input window */
- memcpy(s->win, s->win + WinSize, WinSize);
- for (n = 0; n < HashSize; n++)
- s->head[n] = s->head[n] > WinSize ? s->head[n] - WinSize : 0;
- for (n = 0; n < WinSize; n++)
- s->chain[n] = s->chain[n] > WinSize ? s->chain[n] - WinSize : 0;
- s->pos -= WinSize;
return 1;
}
return 0;
@@ -563,32 +596,44 @@ static int endblock(State *s) {
static int fillwin(State *s) {
int n, k;
- /* s->avail < MaxMatch && s->pos < EndPos */
- if (!s->eof) {
+/* todo: lzend */
+ if (s->endpos < WinBufSize && !s->eof) {
k = s->srcend - s->src;
- n = 2*WinSize - s->pos - s->avail; /* s->avail + n >= MaxMatch */
+ n = WinBufSize - s->endpos;
if (k > n)
k = n;
- memcpy(s->win + s->pos + s->avail, s->src, k);
+ memcpy(s->win + s->endpos, s->src, k);
s->src += k;
- s->avail += k;
- if (s->avail < MaxMatch) /* s->srcend == s->src */
+ s->endpos += k;
+/*
+fprintf(stderr,"fill: pos:%d k:%d n:%d end:%d start:%d\n", s->pos, k, n, s->endpos, s->startpos);
+*/
+ if (s->endpos < WinBufSize)
return 0;
}
return 1;
}
+static int calcguard(State *s) {
+ int p = s->endpos - MaxMatch;
+ int q = s->startpos + BlockSize;
+
+ return p < q ? p : q;
+}
+
/* deflate compress from s->src into s->dstbuf */
static int deflate_state(State *s) {
Match m;
- int head;
+ int next;
+ int guard;
+ LzCode *lzend;
if (s->state == FlateIn)
s->eof = s->src == s->srcend;
else if (s->state == FlateOut) {
if (s->dstbegin < s->dst)
return (s->state = FlateOut);
- if (s->eof)
+ if (s->eof && s->pos == s->endpos)
return (s->state = FlateOk);
startblock(s);
if (s->prevm.len)
@@ -596,32 +641,40 @@ static int deflate_state(State *s) {
} else
return s->state;
+ lzend = s->lzbuf + LzSize - 2;
+ guard = calcguard(s);
+/*fprintf(stderr,"guard:%d pos:%d\n", guard, s->pos);*/
for (;;) {
- if (s->avail < MaxMatch) {
+ if (s->pos >= guard || s->lz >= lzend) {
+/*fprintf(stderr,"guard:%d pos:%d len:%d end:%d start:%d\n", guard, s->pos, s->pos - s->startpos, s->endpos, s->startpos);*/
if (endblock(s))
return (s->state = FlateOut);
if (!fillwin(s))
return (s->state = FlateIn);
+ guard = calcguard(s);
}
- head = updatechain(s);
- if (head)
- m = getmatch(s, head);
- if (head && m.len > s->prevm.len) {
+/*
+if (s->lz - s->lzbuf > LzEnd - 3)
+fprintf(stderr, "near lzend: %d, len:%d\n", s->lz-s->lzbuf, s->pos - s->startpos);
+*/
+ next = updatechain(s);
+ if (next)
+{
+ m = getmatch(s, next);
+/*fprintf(stderr, "pos:%d len:%d next:%d match:%d,%d\n", s->pos, s->pos - s->startpos, next, m.len, m.dist);*/
+}
+ if (next && m.len > s->prevm.len) {
if (s->prevm.len)
recordlit(s, s->win[s->pos-1]);
s->prevm = m;
} else if (s->prevm.len) {
recordmatch(s, s->prevm);
- s->prevm.len -= 2;
- do {
- s->pos++;
- s->avail--;
- updatechain(s);
- } while (--s->prevm.len);
+ s->skip = s->prevm.len - 2;
+ s->prevm.len = 0;
+ s->pos += s->skip;
} else
recordlit(s, s->win[s->pos]);
s->pos++;
- s->avail--;
}
}
@@ -650,9 +703,9 @@ static State *alloc_state(void) {
s->state = FlateOut;
s->src = s->srcend = 0;
s->dst = s->dstbegin = s->dstbuf;
- s->pos = s->startpos = StartPos;
+ s->pos = s->startpos = s->endpos = WinSize;
s->eof = 0;
- s->avail = 0;
+ s->skip = 0;
s->prevm.len = 0;
return s;
}
diff --git a/sflate.c b/sflate.c
@@ -162,7 +162,7 @@ static uint dummysum(uchar *p, int n, uint sum) {
int compress_stream(FILE *in, FILE *out) {
FlateStream s;
int k, n;
- enum {Nin = 1<<13, Nout = 1<<15};
+ enum {Nin = 1<<15, Nout = 1<<15};
s.in = malloc(Nin);
s.out = malloc(Nout);
@@ -218,7 +218,7 @@ int decompress_stream(FILE *in, FILE *out) {
FlateStream s;
uchar *begin;
int k, n;
- enum {Nin = 1<<13, Nout = 1<<15};
+ enum {Nin = 1<<15, Nout = 1<<15};
s.in = begin = malloc(Nin);
s.out = malloc(Nout);
@@ -349,7 +349,7 @@ int main(int argc, char *argv[]) {
break;
}
if (verbose == 'v')
- fprintf(stderr, "in:%d out:%d checksum: 0x%08x (header:%d compressed len:%d footer:%d extra input bytes:%s)\n",
+ fprintf(stderr, "in:%d out:%d checksum: 0x%08x (header:%d data:%d footer:%d extra input:%s)\n",
nin, nout, sum, headerlen, (comp == 'c' ? nout : nin) - headerlen - footerlen - extralen,
footerlen, extralen ? "yes" : "no");
if (n != FlateOk)