flate

deflate implementation
git clone git://git.suckless.org/flate
Log | Files | Refs | README

commit 3928a4216079f8891ba262c78cfa1e02ad75d998
parent 44ccdf16a1e951aff295edcbc76b23b4117a36e8
Author: nsz <nszabolcs@gmail.com>
Date:   Tue, 11 Aug 2009 14:46:50 +0200

todo update
Diffstat:
README | 4++--
TODO | 44++++++++++++++++++++++++++++++++++++++++++++
deflate.c | 85+++++++++++++++++++++++++++----------------------------------------------------
flate.h | 2--
inflate.c | 10----------
5 files changed, 75 insertions(+), 70 deletions(-)

diff --git a/README b/README @@ -14,6 +14,6 @@ build: make features: - inflate: decode raw deflate compressed data - inflate_simple: minimal implementation deflate: raw deflate compression + inflate: decode raw deflate compressed data + inflate_simple: minimal inflate diff --git a/TODO b/TODO @@ -0,0 +1,44 @@ +flate +----- +man +error message ? +inflate assumes Flate* < 0 +_reset _free +_zlib _gzip _zip +deflate, inflate exmplae -> flate_example +test, benchmark: + empty block + all zero (rle) + uncompressed block + test with small nin/nout (1,2..9 bytes) + + +inflate +------- +error message +init globals +(rev lookup vs revinc) +(test/optimize uncompressed block) +read less than 7 bits in clen decode +bound checks in clen decode + + +deflate +------- +space opt: + overlap rbuf/dstwin + hufflen: overlap arrays (parent+heap only) +time opt: + match loop unroll + less branching in deflate_state (check avail > MinMatch once) + bisect len/distbase -> lookup table + rolling hash +better compression: + block split heuristics with lfreq, dfreq + (zlib huffcode trick: if same freq then shorter code goes to the one with smaller subtree) +code cleanups: + bounds on the compressend size + verify dstsize + input from in+nin instead of src+srcend + setting s->state.. + ushort vs int diff --git a/deflate.c b/deflate.c @@ -1,54 +1,27 @@ #include <stdlib.h> #include <string.h> -#include <stdio.h> #include "flate.h" -/* -TODO: - space opt: - overlap rbuf/dstwin - hufflen: overlap arrays (parent+heap only) - time opt: - match loop unroll - less branching in deflate_state (check avail > MinMatch once) - bisect len/distbase -> lookup table - rolling hash - better compression: - block split heuristics with lfreq, dfreq - (zlib huffcode trick: if same freq then shorter code goes to the one with smaller subtree) - code cleanups: - bounds on the compressend size - verify dstsize - input from in+nin instead of src+srcend - setting s->state.. - ushort vs int - tests: - empty block - all zero (rle) - uncompressed block - test with small nin/nout (1,2..9 bytes) -*/ - enum { - MinMatch = 3, /* min match length */ - MaxMatch = 258, /* max match length */ - MaxChainLen = 256, /* max length of hash chain */ + CodeBits = 16, /* max number of bits in a code + 1 */ + EOB = 256, /* end of block symbol */ + Nlit = 256, /* number of lit codes */ + Nlen = 29, /* number of len codes */ + Nlitlen = Nlit + Nlen + 3, /* litlen codes + block end + 2 unused */ + Ndist = 30, /* number of distance codes */ + Nclen = 19 /* number of code length codes */ + MinMatch = 3, /* min match length */ + MaxMatch = 258, /* max match length */ + MaxDist = 1 << 15, /* max match distance */ + + MaxChainLen = 256, /* max length of hash chain */ HashBits = 13, HashSize = 1 << HashBits, /* hash table size */ BigDist = 1 << 12, /* max match distance for short match length */ - MaxDist = 1 << 15, /* max match distance */ WinSize = 2*MaxDist + MaxMatch, /* input window size */ DstSize = MaxDist + 2*MaxMatch + 6, /* output window size (worst case compressed block size) */ LzSize = 1 << 13, /* lz buffer size */ LzLitFlag = 1 << 15, /* marks literal run length in lz buffer */ - - CodeBits = 16, /* max number of bits in a code + 1 */ - EOB = 256, /* end of block symbol */ - Nlit = 256, /* number of lit codes */ - Nlen = 29, /* number of len codes */ - Nlitlen = Nlit + Nlen + 3, /* litlen codes + block end + 2 unused */ - Ndist = 30, /* number of distance codes */ - Nclen = 19 /* number of code length codes */ }; typedef struct { @@ -62,32 +35,32 @@ typedef struct { } LzCode; typedef struct { - int state; /* prev return value */ - int eof; /* end of input */ - int pos; /* position in input win */ - int startpos; /* block start pos in input win */ - int avail; /* available in input win */ - Match prevm; /* previous (deferred) match */ - uchar *src; /* input data (not yet in win) */ + int state; /* prev return value */ + int eof; /* end of input */ + int pos; /* position in input win */ + int startpos; /* block start pos in input win */ + int avail; /* available in input win */ + Match prevm; /* previous (deferred) match */ + uchar *src; /* input data (not yet in win) */ uchar *srcend; - uchar *dst; /* compressed output (position in dstbuf) */ - uchar *dstbegin; /* start position of unflushed data in dstbuf */ + uchar *dst; /* compressed output (position in dstbuf) */ + uchar *dstbegin; /* start position of unflushed data in dstbuf */ uint bits; int nbits; - uchar win[WinSize]; /* input window */ - ushort head[HashSize]; /* position of hash chain heads */ - ushort chain[MaxDist]; /* hash chain */ - LzCode lzbuf[LzSize]; /* literal run length, match len, match dist */ - LzCode *lz; /* current pos in lzbuf */ - int nlit; /* literal run length */ + uchar win[WinSize]; /* input window */ + ushort head[HashSize]; /* position of hash chain heads */ + ushort chain[MaxDist]; /* hash chain */ + LzCode lzbuf[LzSize]; /* literal run length, match len, match dist */ + LzCode *lz; /* current pos in lzbuf */ + int nlit; /* literal run length */ ushort lfreq[Nlitlen]; ushort dfreq[Ndist]; uchar dstbuf[DstSize]; } State; -static uchar fixllen[Nlitlen]; /* fixed lit/len huffman code tree */ +static uchar fixllen[Nlitlen]; /* fixed lit/len huffman code tree */ static ushort fixlcode[Nlitlen]; -static uchar fixdlen[Ndist]; /* fixed distance huffman code tree */ +static uchar fixdlen[Ndist]; /* fixed distance huffman code tree */ static ushort fixdcode[Ndist]; /* base offset and extra bits tables */ diff --git a/flate.h b/flate.h @@ -9,7 +9,6 @@ enum { FlateIn = -2, FlateOut = -3 }; -/* TODO inflate assumes Flate* < 0 */ typedef struct { int nin; @@ -20,7 +19,6 @@ typedef struct { void *state; } FlateStream; -/* TODO: _reset _free */ int deflate(FlateStream *s); int deflate_callback(int (*r)(void *, int, void *), void *rdata, int (*w)(void *, int, void *), void *wdata); int inflate(FlateStream *s); diff --git a/inflate.c b/inflate.c @@ -2,16 +2,6 @@ #include <string.h> #include "flate.h" -/* -TODO: - error message - init globals - (rev lookup vs revinc) - (test/optimize uncompressed block) - read less than 7 bits in clen decode - bound checks in clen decode -*/ - enum { CodeBits = 16, /* max number of bits in a code + 1 */ LitlenTableBits = 9, /* litlen code bits used in lookup table */