flate

deflate implementation
git clone git://git.suckless.org/flate
Log | Files | Refs | README

commit 76b3cac7919f6ea0663abb27e104a7c7a4235f56
parent c4c8fd422936440a24e5972a115be1b253076d0a
Author: nsz <nszabolcs@gmail.com>
Date:   Fri, 21 Aug 2009 15:03:20 +0200

+sflate, inflate returns extra bytes after compressed input
Diffstat:
Makefile | 7+++++--
TODO | 5++++-
adler.c | 4+++-
crc.c | 12++++++++----
flate.h | 4++++
flatez.c | 132-------------------------------------------------------------------------------
inflate.c | 7++++++-
sflate.c | 359+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
8 files changed, 389 insertions(+), 141 deletions(-)

diff --git a/Makefile b/Makefile @@ -2,11 +2,14 @@ CFLAGS=-O3 -Wall -ansi -pedantic LDFLAGS= SRC=inflate.c inflate_example.c inflate_simple.c \ - deflate.c deflate_example.c + deflate.c deflate_example.c \ + sflate.c crc.c adler.c OBJ=${SRC:.c=.o} -EXE=inflate inflate_simple deflate +EXE=sflate inflate inflate_simple deflate all: ${EXE} +sflate: sflate.o crc.o adler.o inflate.o deflate.o + ${CC} -o $@ $^ ${LDFLAGS} inflate: inflate.o inflate_example.o ${CC} -o $@ $^ ${LDFLAGS} inflate_simple: inflate_simple.o diff --git a/TODO b/TODO @@ -1,6 +1,8 @@ flate ----- +fill output entirely man +globals error message ? inflate assumes Flate* < 0 _init _reset _free ? @@ -15,7 +17,7 @@ test, benchmark: inflate ------- -init globals +callback interface: reading past the end of compressed data: unreachable data (rev lookup vs revinc) (test/optimize uncompressed block) read less than 7 bits in clen decode @@ -36,6 +38,7 @@ better compression: (zlib huffcode trick: if same freq then shorter code goes to the one with smaller subtree) last block can be allowed to be larger code cleanups: + better organization (configurable blocksize) bounds on the compressend size input from in+nin instead of src+srcend setting s->state.. diff --git a/adler.c b/adler.c @@ -1,9 +1,11 @@ +#include "flate.h" + enum { AdlerBase = 65521, /* largest 16bit prime */ AdlerN = 5552 /* max iters before 32bit overflow */ }; -uint adlersum(uchar *p, int n, uint adler) { +uint adler32(uchar *p, int n, uint adler) { uint s1 = adler & 0xffff; uint s2 = (adler >> 16) & 0xffff; uchar *ep; diff --git a/crc.c b/crc.c @@ -1,10 +1,14 @@ +#include "flate.h" + uint crctab[256]; -void crcinit(void) { +void crc32init(void) { static const uint poly = 0xEDB88320; + int i,j; for (i = 0; i < 256; ++i) { - crc = i; + uint crc = i; + for (j = 0; j < 8; j++) { if (crc & 1) crc = (crc >> 1) ^ poly; @@ -15,11 +19,11 @@ void crcinit(void) { } } -uint crcsum(uchar *p, int n, uint crc) { +uint crc32(uchar *p, int n, uint crc) { uchar *ep = p + n; crc ^= 0xffffffff; while (p < ep) - crc = crctab[(crc & 0xff) ^ *buf++] ^ (crc >> 8); + crc = crctab[(crc & 0xff) ^ *p++] ^ (crc >> 8); return crc ^ 0xffffffff; } diff --git a/flate.h b/flate.h @@ -23,3 +23,7 @@ int deflate(FlateStream *s); int deflate_callback(int (*r)(void *, int, void *), void *rdata, int (*w)(void *, int, void *), void *wdata); int inflate(FlateStream *s); int inflate_callback(int (*r)(void *, int, void *), void *rdata, int (*w)(void *, int, void *), void *wdata); + +uint adler32(uchar *p, int n, uint adler); +void crc32init(void); +uint crc32(uchar *p, int n, uint crc); diff --git a/flatez.c b/flatez.c @@ -1,132 +0,0 @@ - -static int checkfooter(uchar *p, uint sum) { - return sum == ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]); -} - -enum { - ZlibCM = 7 << 4, - ZlibCINFO = 8, - ZlibFLEV = 3 << 6, - ZlibFDICT = 1 << 5, - ZlibFCHK = 31 - (((ZlibCM | ZlibCINFO) << 8) | ZlibFLEV) % 31, -}; - -int deflate_zlib_header(uchar *p, int n) { - if (n < 2) - return FlateErr; - p[0] = ZlibCM | ZlibCINFO; /* deflate method, 32K window size */ - p[1] = ZlibFLEV | ZlibFCHK; /* highest compression */ - return 2; -} - -int deflate_zlib_footer(uchar *p, int n, uint sum) { - if (n < 4) - return FlateErr; - p[0] = sum >> 24; - p[1] = sum >> 16; - p[2] = sum >> 8; - p[3] = sum; - return 4; -} - -int inflate_zlib_header(uchar *p, int n) { - if (n < 2) - return FlateErr; - if (((p[0] << 8) | p[1]) % 31) - return FlateErr; - if ((p[0] & 0xf0) != ZlibCM || (p[0] & 0x0f) > ZlibCINFO) - return FlateErr; - if (p[1] & ZlibFDICT) - return FlateErr; - return 2; -} - -int inflate_zlib_footer(uchar *p, int n, uint sum) { - if (n < 4 || !checkfooter(p, sum)) - return FlateErr; - return 4; -} - - -enum { - GZipID1 = 0x1f, - GZipID2 = 0x8b, - GZipCM = 8, - GZipFHCRC = 1 << 1, - GZipFEXTRA = 1 << 2, - GZipFNAME = 1 << 3, - GZipFCOMM = 1 << 4, - GZipXFL = 2, - GZipOS = 255 -}; - -int deflate_gzip_header(uchar *p, int n) { - int k; - - if (n < 10) - return FlateErr; - for (k = 0; k < n; k++) - p[k] = 0; - p[0] = GZipID1; - p[1] = GZipID2; - p[2] = GZipCM; - p[8] = GZipXFL; - p[9] = GZipOS; - return 10; -} - -int deflate_gzip_footer(uchar *p, int n, uint sum, uint len) { - if (n < 8) - return FlateErr; - p[0] = sum >> 24; - p[1] = sum >> 16; - p[2] = sum >> 8; - p[3] = sum; - p[4] = len >> 24; - p[5] = len >> 16; - p[6] = len >> 8; - p[7] = len; - return 8; -} - -int inflate_gzip_header(uchar *p, int n) { - int k = 10; - - if (k > n) - return FlateErr; - if (p[0] != GZipID1 || p[1] != GZipID2 || p[2] != GZipCM) - return FlateErr; - if (p[3] & GZipFEXTRA) { - k += 2 + ((p[k] << 8) | p[k+1]); - if (k > n) - return FlateErr; - } - if (p[3] & GZipFNAME) { - for (; k < n; k++) - if (p[k] == 0) - break; - k++; - if (k > n) - return FlateErr; - } - if (p[3] & GZipFCOMM) { - for (; k < n; k++) - if (p[k] == 0) - break; - k++; - if (k > n) - return FlateErr; - } - if (p[3] & GZipFHCRC) { - k += 2; - if (k > n) - return FlateErr; - } - return k; -} - -int inflate_gzip_footer(uchar *p, int n, uint sum, uint len) { - if (n < 8 || !checkfooter(p, sum) || !checkfooter(p+4, len)) - return FlateErr; - return 8; -} diff --git a/inflate.c b/inflate.c @@ -575,7 +575,7 @@ static int inflate_state(State *s) { s->state = BlockHead; break; default: - return s->err = "corrupt state.", FlateErr; + return s->err = "corrupt internal state.", FlateErr; } } } @@ -631,6 +631,11 @@ int inflate(FlateStream *stream) { s->posout = 0; } if (n == FlateOk || n == FlateErr) { + if (s->nbits || s->src < s->srcend) { + s->nbits /= 8; + stream->in = s->src - s->nbits; + stream->nin = s->srcend - s->src + s->nbits; + } stream->err = s->err; free(s); stream->state = 0; diff --git a/sflate.c b/sflate.c @@ -0,0 +1,359 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "flate.h" + +static int checkfooter(uchar *p, uint sum) { + return sum == ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]); +} + +enum { + ZlibCM = 7 << 4, + ZlibCINFO = 8, + ZlibFLEV = 3 << 6, + ZlibFDICT = 1 << 5, + ZlibFCHK = 31 - (((ZlibCM | ZlibCINFO) << 8) | ZlibFLEV) % 31 +}; + +int deflate_zlib_header(uchar *p, int n) { + if (n < 2) + return FlateErr; + p[0] = ZlibCM | ZlibCINFO; /* deflate method, 32K window size */ + p[1] = ZlibFLEV | ZlibFCHK; /* highest compression */ + return 2; +} + +int deflate_zlib_footer(uchar *p, int n, uint sum, uint len, uint zlen) { + if (n < 4) + return FlateErr; + p[0] = sum >> 24; + p[1] = sum >> 16; + p[2] = sum >> 8; + p[3] = sum; + return 4; +} + +int inflate_zlib_header(uchar *p, int n) { + if (n < 2) + return FlateErr; + if (((p[0] << 8) | p[1]) % 31) + return FlateErr; + if ((p[0] & 0xf0) != ZlibCM || (p[0] & 0x0f) > ZlibCINFO) + return FlateErr; + if (p[1] & ZlibFDICT) + return FlateErr; + return 2; +} + +int inflate_zlib_footer(uchar *p, int n, uint sum, uint len, uint zlen) { + if (n < 4 || !checkfooter(p, sum)) + return FlateErr; + return 4; +} + + +enum { + GZipID1 = 0x1f, + GZipID2 = 0x8b, + GZipCM = 8, + GZipFHCRC = 1 << 1, + GZipFEXTRA = 1 << 2, + GZipFNAME = 1 << 3, + GZipFCOMM = 1 << 4, + GZipXFL = 2, + GZipOS = 255 +}; + +int deflate_gzip_header(uchar *p, int n) { + int k; + + if (n < 10) + return FlateErr; + for (k = 0; k < n; k++) + p[k] = 0; + p[0] = GZipID1; + p[1] = GZipID2; + p[2] = GZipCM; + p[8] = GZipXFL; + p[9] = GZipOS; + return 10; +} + +int deflate_gzip_footer(uchar *p, int n, uint sum, uint len, uint zlen) { + if (n < 8) + return FlateErr; + p[0] = sum >> 24; + p[1] = sum >> 16; + p[2] = sum >> 8; + p[3] = sum; + p[4] = len >> 24; + p[5] = len >> 16; + p[6] = len >> 8; + p[7] = len; + return 8; +} + +int inflate_gzip_header(uchar *p, int n) { + int k = 10; + + if (k > n) + return FlateErr; + if (p[0] != GZipID1 || p[1] != GZipID2 || p[2] != GZipCM) + return FlateErr; + if (p[3] & GZipFEXTRA) { + k += 2 + ((p[k] << 8) | p[k+1]); + if (k > n) + return FlateErr; + } + if (p[3] & GZipFNAME) { + for (; k < n; k++) + if (p[k] == 0) + break; + k++; + if (k > n) + return FlateErr; + } + if (p[3] & GZipFCOMM) { + for (; k < n; k++) + if (p[k] == 0) + break; + k++; + if (k > n) + return FlateErr; + } + if (p[3] & GZipFHCRC) { + k += 2; + if (k > n) + return FlateErr; + } + return k; +} + +int inflate_gzip_footer(uchar *p, int n, uint sum, uint len, uint zlen) { + if (n < 8 || !checkfooter(p, sum) || !checkfooter(p+4, len)) + return FlateErr; + return 8; +} + + +/* example usage */ + +static int (*header)(uchar *, int); +static int (*footer)(uchar *, int, uint, uint, uint); +static uint (*checksum)(uchar *, int, uint); +static char *err; +static uint sum; +static uint nin; +static uint nout; +static uint headerlen; +static uint footerlen; +static uint extralen; + +static int dummyheader(uchar *p, int n) { + return 0; +} +static int dummyfooter(uchar *p, int n, uint sum, uint len, uint zlen) { + return 0; +} +static uint dummysum(uchar *p, int n, uint sum) { + return 0; +} + +/* compress, using FlateStream interface */ +int compress_stream(FILE *in, FILE *out) { + FlateStream s; + int k, n; + enum {Nin = 1<<13, Nout = 1<<15}; + + s.in = malloc(Nin); + s.out = malloc(Nout); + s.nin = 0; + s.nout = Nout; + s.err = 0; + s.state = 0; + + k = header(s.out, s.nout); + if (k == FlateErr) { + s.err = "header error."; + n = FlateErr; + } else { + headerlen = s.nout = k; + n = FlateOut; + } + for (;; n = deflate(&s)) + switch (n) { + case FlateOk: + k = footer(s.out, s.nout, sum, nin, nout - headerlen); + if (k == FlateErr) { + s.err = "footer error."; + n = FlateErr; + } else if (k != fwrite(s.out, 1, k, out)) { + s.err = "write error."; + n = FlateErr; + } else { + footerlen = k; + nout += k; + } + case FlateErr: + free(s.in); + free(s.out); + err = s.err; + return n; + case FlateIn: + s.nin = fread(s.in, 1, Nin, in); + nin += s.nin; + sum = checksum(s.in, s.nin, sum); + break; + case FlateOut: + k = fwrite(s.out, 1, s.nout, out); + if (k != s.nout) + s.err = "write error."; + nout += k; + s.nout = Nout; + break; + } +} + +/* decompress, using FlateStream interface */ +int decompress_stream(FILE *in, FILE *out) { + FlateStream s; + uchar *begin; + int k, n; + enum {Nin = 1<<13, Nout = 1<<15}; + + s.in = begin = malloc(Nin); + s.out = malloc(Nout); + s.nout = Nout; + s.err = 0; + s.state = 0; + + s.nin = fread(s.in, 1, Nin, in); + nin += s.nin; + k = header(s.in, s.nin); + if (k == FlateErr) { + s.err = "header error."; + n = FlateErr; + } else { + headerlen = k; + s.nin -= k; + s.in += k; + n = inflate(&s); + } + for (;; n = inflate(&s)) + switch (n) { + case FlateOk: + memmove(begin, s.in, s.nin); + k = fread(begin, 1, Nin-s.nin, in); + nin += k; + s.nin += k; + k = footer(begin, s.nin, sum, nout, nin - s.nin - headerlen); + if (k == FlateErr) { + s.err = "footer error."; + n = FlateErr; + } else { + footerlen = k; + extralen = s.nin - k; + } + case FlateErr: + free(begin); + free(s.out); + err = s.err; + return n; + case FlateIn: + s.in = begin; + s.nin = fread(s.in, 1, Nin, in); + nin += s.nin; + break; + case FlateOut: + k = fwrite(s.out, 1, s.nout, out); + if (k != s.nout) + s.err = "write error."; + sum = checksum(s.out, k, sum); + nout += k; + s.nout = Nout; + break; + } +} + +int main(int argc, char *argv[]) { + char comp = 'c'; + char fmt = 'r'; + char verbose = 'q'; + int (*call)(FILE *, FILE*); + int n, i; + + for (i = 1; i < argc; i++) { + if (argv[i][0] == '-' && argv[i][1] && argv[i][2] == 0) + switch (argv[i][1]) { + case 'q': + case 'v': + verbose = argv[i][1]; + continue; + case 'c': + case 'd': + comp = argv[i][1]; + continue; + case 'r': + case 'g': + case 'z': + case 'p': + fmt = argv[i][1]; + continue; + } + fprintf(stderr, "usage: %s [-q|-v] [-c|-d] [-r|-g|-z|-p]\n\n" + "deflate stream compression" + " -q quiet (default)\n" + " -v verbose\n" + " -c compress (default)\n" + " -d decompress\n" + " -r raw (default)\n" + " -g gzip\n" + " -z zlib\n" + " -p pkzip\n", argv[0]); + return -1; + } + call = comp == 'c' ? compress_stream : decompress_stream; + switch (fmt) { + case 'r': + header = dummyheader; + footer = dummyfooter; + checksum = dummysum; + n = call(stdin, stdout); + break; + case 'g': + if (comp == 'c') { + header = deflate_gzip_header; + footer = deflate_gzip_footer; + } else { + header = inflate_gzip_header; + footer = inflate_gzip_footer; + } + checksum = crc32; + crc32init(); + n = call(stdin, stdout); + break; + case 'z': + if (comp == 'c') { + header = deflate_zlib_header; + footer = deflate_zlib_footer; + } else { + header = inflate_zlib_header; + footer = inflate_zlib_footer; + } + checksum = adler32; + n = call(stdin, stdout); + break; + case 'p': + default: + err = "uninplemented."; + n = FlateErr; + break; + } + if (verbose == 'v') + fprintf(stderr, "in:%d out:%d checksum: 0x%08x (header:%d compressed len:%d footer:%d extra input bytes:%s)\n", + nin, nout, sum, headerlen, (comp == 'c' ? nout : nin) - headerlen - footerlen - extralen, + footerlen, extralen ? "yes" : "no"); + if (n != FlateOk) + fprintf(stderr, "error: %s\n", err); + return n; +}