commit 76b3cac7919f6ea0663abb27e104a7c7a4235f56
parent c4c8fd422936440a24e5972a115be1b253076d0a
Author: nsz <nszabolcs@gmail.com>
Date: Fri, 21 Aug 2009 15:03:20 +0200
+sflate, inflate returns extra bytes after compressed input
Diffstat:
Makefile | | | 7 | +++++-- |
TODO | | | 5 | ++++- |
adler.c | | | 4 | +++- |
crc.c | | | 12 | ++++++++---- |
flate.h | | | 4 | ++++ |
flatez.c | | | 132 | ------------------------------------------------------------------------------- |
inflate.c | | | 7 | ++++++- |
sflate.c | | | 359 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
8 files changed, 389 insertions(+), 141 deletions(-)
diff --git a/Makefile b/Makefile
@@ -2,11 +2,14 @@
CFLAGS=-O3 -Wall -ansi -pedantic
LDFLAGS=
SRC=inflate.c inflate_example.c inflate_simple.c \
- deflate.c deflate_example.c
+ deflate.c deflate_example.c \
+ sflate.c crc.c adler.c
OBJ=${SRC:.c=.o}
-EXE=inflate inflate_simple deflate
+EXE=sflate inflate inflate_simple deflate
all: ${EXE}
+sflate: sflate.o crc.o adler.o inflate.o deflate.o
+ ${CC} -o $@ $^ ${LDFLAGS}
inflate: inflate.o inflate_example.o
${CC} -o $@ $^ ${LDFLAGS}
inflate_simple: inflate_simple.o
diff --git a/TODO b/TODO
@@ -1,6 +1,8 @@
flate
-----
+fill output entirely
man
+globals
error message ?
inflate assumes Flate* < 0
_init _reset _free ?
@@ -15,7 +17,7 @@ test, benchmark:
inflate
-------
-init globals
+callback interface: reading past the end of compressed data: unreachable data
(rev lookup vs revinc)
(test/optimize uncompressed block)
read less than 7 bits in clen decode
@@ -36,6 +38,7 @@ better compression:
(zlib huffcode trick: if same freq then shorter code goes to the one with smaller subtree)
last block can be allowed to be larger
code cleanups:
+ better organization (configurable blocksize)
bounds on the compressend size
input from in+nin instead of src+srcend
setting s->state..
diff --git a/adler.c b/adler.c
@@ -1,9 +1,11 @@
+#include "flate.h"
+
enum {
AdlerBase = 65521, /* largest 16bit prime */
AdlerN = 5552 /* max iters before 32bit overflow */
};
-uint adlersum(uchar *p, int n, uint adler) {
+uint adler32(uchar *p, int n, uint adler) {
uint s1 = adler & 0xffff;
uint s2 = (adler >> 16) & 0xffff;
uchar *ep;
diff --git a/crc.c b/crc.c
@@ -1,10 +1,14 @@
+#include "flate.h"
+
uint crctab[256];
-void crcinit(void) {
+void crc32init(void) {
static const uint poly = 0xEDB88320;
+ int i,j;
for (i = 0; i < 256; ++i) {
- crc = i;
+ uint crc = i;
+
for (j = 0; j < 8; j++) {
if (crc & 1)
crc = (crc >> 1) ^ poly;
@@ -15,11 +19,11 @@ void crcinit(void) {
}
}
-uint crcsum(uchar *p, int n, uint crc) {
+uint crc32(uchar *p, int n, uint crc) {
uchar *ep = p + n;
crc ^= 0xffffffff;
while (p < ep)
- crc = crctab[(crc & 0xff) ^ *buf++] ^ (crc >> 8);
+ crc = crctab[(crc & 0xff) ^ *p++] ^ (crc >> 8);
return crc ^ 0xffffffff;
}
diff --git a/flate.h b/flate.h
@@ -23,3 +23,7 @@ int deflate(FlateStream *s);
int deflate_callback(int (*r)(void *, int, void *), void *rdata, int (*w)(void *, int, void *), void *wdata);
int inflate(FlateStream *s);
int inflate_callback(int (*r)(void *, int, void *), void *rdata, int (*w)(void *, int, void *), void *wdata);
+
+uint adler32(uchar *p, int n, uint adler);
+void crc32init(void);
+uint crc32(uchar *p, int n, uint crc);
diff --git a/flatez.c b/flatez.c
@@ -1,132 +0,0 @@
-
-static int checkfooter(uchar *p, uint sum) {
- return sum == ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]);
-}
-
-enum {
- ZlibCM = 7 << 4,
- ZlibCINFO = 8,
- ZlibFLEV = 3 << 6,
- ZlibFDICT = 1 << 5,
- ZlibFCHK = 31 - (((ZlibCM | ZlibCINFO) << 8) | ZlibFLEV) % 31,
-};
-
-int deflate_zlib_header(uchar *p, int n) {
- if (n < 2)
- return FlateErr;
- p[0] = ZlibCM | ZlibCINFO; /* deflate method, 32K window size */
- p[1] = ZlibFLEV | ZlibFCHK; /* highest compression */
- return 2;
-}
-
-int deflate_zlib_footer(uchar *p, int n, uint sum) {
- if (n < 4)
- return FlateErr;
- p[0] = sum >> 24;
- p[1] = sum >> 16;
- p[2] = sum >> 8;
- p[3] = sum;
- return 4;
-}
-
-int inflate_zlib_header(uchar *p, int n) {
- if (n < 2)
- return FlateErr;
- if (((p[0] << 8) | p[1]) % 31)
- return FlateErr;
- if ((p[0] & 0xf0) != ZlibCM || (p[0] & 0x0f) > ZlibCINFO)
- return FlateErr;
- if (p[1] & ZlibFDICT)
- return FlateErr;
- return 2;
-}
-
-int inflate_zlib_footer(uchar *p, int n, uint sum) {
- if (n < 4 || !checkfooter(p, sum))
- return FlateErr;
- return 4;
-}
-
-
-enum {
- GZipID1 = 0x1f,
- GZipID2 = 0x8b,
- GZipCM = 8,
- GZipFHCRC = 1 << 1,
- GZipFEXTRA = 1 << 2,
- GZipFNAME = 1 << 3,
- GZipFCOMM = 1 << 4,
- GZipXFL = 2,
- GZipOS = 255
-};
-
-int deflate_gzip_header(uchar *p, int n) {
- int k;
-
- if (n < 10)
- return FlateErr;
- for (k = 0; k < n; k++)
- p[k] = 0;
- p[0] = GZipID1;
- p[1] = GZipID2;
- p[2] = GZipCM;
- p[8] = GZipXFL;
- p[9] = GZipOS;
- return 10;
-}
-
-int deflate_gzip_footer(uchar *p, int n, uint sum, uint len) {
- if (n < 8)
- return FlateErr;
- p[0] = sum >> 24;
- p[1] = sum >> 16;
- p[2] = sum >> 8;
- p[3] = sum;
- p[4] = len >> 24;
- p[5] = len >> 16;
- p[6] = len >> 8;
- p[7] = len;
- return 8;
-}
-
-int inflate_gzip_header(uchar *p, int n) {
- int k = 10;
-
- if (k > n)
- return FlateErr;
- if (p[0] != GZipID1 || p[1] != GZipID2 || p[2] != GZipCM)
- return FlateErr;
- if (p[3] & GZipFEXTRA) {
- k += 2 + ((p[k] << 8) | p[k+1]);
- if (k > n)
- return FlateErr;
- }
- if (p[3] & GZipFNAME) {
- for (; k < n; k++)
- if (p[k] == 0)
- break;
- k++;
- if (k > n)
- return FlateErr;
- }
- if (p[3] & GZipFCOMM) {
- for (; k < n; k++)
- if (p[k] == 0)
- break;
- k++;
- if (k > n)
- return FlateErr;
- }
- if (p[3] & GZipFHCRC) {
- k += 2;
- if (k > n)
- return FlateErr;
- }
- return k;
-}
-
-int inflate_gzip_footer(uchar *p, int n, uint sum, uint len) {
- if (n < 8 || !checkfooter(p, sum) || !checkfooter(p+4, len))
- return FlateErr;
- return 8;
-}
diff --git a/inflate.c b/inflate.c
@@ -575,7 +575,7 @@ static int inflate_state(State *s) {
s->state = BlockHead;
break;
default:
- return s->err = "corrupt state.", FlateErr;
+ return s->err = "corrupt internal state.", FlateErr;
}
}
}
@@ -631,6 +631,11 @@ int inflate(FlateStream *stream) {
s->posout = 0;
}
if (n == FlateOk || n == FlateErr) {
+ if (s->nbits || s->src < s->srcend) {
+ s->nbits /= 8;
+ stream->in = s->src - s->nbits;
+ stream->nin = s->srcend - s->src + s->nbits;
+ }
stream->err = s->err;
free(s);
stream->state = 0;
diff --git a/sflate.c b/sflate.c
@@ -0,0 +1,359 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "flate.h"
+
+static int checkfooter(uchar *p, uint sum) {
+ return sum == ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]);
+}
+
+enum {
+ ZlibCM = 7 << 4,
+ ZlibCINFO = 8,
+ ZlibFLEV = 3 << 6,
+ ZlibFDICT = 1 << 5,
+ ZlibFCHK = 31 - (((ZlibCM | ZlibCINFO) << 8) | ZlibFLEV) % 31
+};
+
+int deflate_zlib_header(uchar *p, int n) {
+ if (n < 2)
+ return FlateErr;
+ p[0] = ZlibCM | ZlibCINFO; /* deflate method, 32K window size */
+ p[1] = ZlibFLEV | ZlibFCHK; /* highest compression */
+ return 2;
+}
+
+int deflate_zlib_footer(uchar *p, int n, uint sum, uint len, uint zlen) {
+ if (n < 4)
+ return FlateErr;
+ p[0] = sum >> 24;
+ p[1] = sum >> 16;
+ p[2] = sum >> 8;
+ p[3] = sum;
+ return 4;
+}
+
+int inflate_zlib_header(uchar *p, int n) {
+ if (n < 2)
+ return FlateErr;
+ if (((p[0] << 8) | p[1]) % 31)
+ return FlateErr;
+ if ((p[0] & 0xf0) != ZlibCM || (p[0] & 0x0f) > ZlibCINFO)
+ return FlateErr;
+ if (p[1] & ZlibFDICT)
+ return FlateErr;
+ return 2;
+}
+
+int inflate_zlib_footer(uchar *p, int n, uint sum, uint len, uint zlen) {
+ if (n < 4 || !checkfooter(p, sum))
+ return FlateErr;
+ return 4;
+}
+
+
+enum {
+ GZipID1 = 0x1f,
+ GZipID2 = 0x8b,
+ GZipCM = 8,
+ GZipFHCRC = 1 << 1,
+ GZipFEXTRA = 1 << 2,
+ GZipFNAME = 1 << 3,
+ GZipFCOMM = 1 << 4,
+ GZipXFL = 2,
+ GZipOS = 255
+};
+
+int deflate_gzip_header(uchar *p, int n) {
+ int k;
+
+ if (n < 10)
+ return FlateErr;
+ for (k = 0; k < n; k++)
+ p[k] = 0;
+ p[0] = GZipID1;
+ p[1] = GZipID2;
+ p[2] = GZipCM;
+ p[8] = GZipXFL;
+ p[9] = GZipOS;
+ return 10;
+}
+
+int deflate_gzip_footer(uchar *p, int n, uint sum, uint len, uint zlen) {
+ if (n < 8)
+ return FlateErr;
+ p[0] = sum >> 24;
+ p[1] = sum >> 16;
+ p[2] = sum >> 8;
+ p[3] = sum;
+ p[4] = len >> 24;
+ p[5] = len >> 16;
+ p[6] = len >> 8;
+ p[7] = len;
+ return 8;
+}
+
+int inflate_gzip_header(uchar *p, int n) {
+ int k = 10;
+
+ if (k > n)
+ return FlateErr;
+ if (p[0] != GZipID1 || p[1] != GZipID2 || p[2] != GZipCM)
+ return FlateErr;
+ if (p[3] & GZipFEXTRA) {
+ k += 2 + ((p[k] << 8) | p[k+1]);
+ if (k > n)
+ return FlateErr;
+ }
+ if (p[3] & GZipFNAME) {
+ for (; k < n; k++)
+ if (p[k] == 0)
+ break;
+ k++;
+ if (k > n)
+ return FlateErr;
+ }
+ if (p[3] & GZipFCOMM) {
+ for (; k < n; k++)
+ if (p[k] == 0)
+ break;
+ k++;
+ if (k > n)
+ return FlateErr;
+ }
+ if (p[3] & GZipFHCRC) {
+ k += 2;
+ if (k > n)
+ return FlateErr;
+ }
+ return k;
+}
+
+int inflate_gzip_footer(uchar *p, int n, uint sum, uint len, uint zlen) {
+ if (n < 8 || !checkfooter(p, sum) || !checkfooter(p+4, len))
+ return FlateErr;
+ return 8;
+}
+
+
+/* example usage */
+
+static int (*header)(uchar *, int);
+static int (*footer)(uchar *, int, uint, uint, uint);
+static uint (*checksum)(uchar *, int, uint);
+static char *err;
+static uint sum;
+static uint nin;
+static uint nout;
+static uint headerlen;
+static uint footerlen;
+static uint extralen;
+
+static int dummyheader(uchar *p, int n) {
+ return 0;
+}
+static int dummyfooter(uchar *p, int n, uint sum, uint len, uint zlen) {
+ return 0;
+}
+static uint dummysum(uchar *p, int n, uint sum) {
+ return 0;
+}
+
+/* compress, using FlateStream interface */
+int compress_stream(FILE *in, FILE *out) {
+ FlateStream s;
+ int k, n;
+ enum {Nin = 1<<13, Nout = 1<<15};
+
+ s.in = malloc(Nin);
+ s.out = malloc(Nout);
+ s.nin = 0;
+ s.nout = Nout;
+ s.err = 0;
+ s.state = 0;
+
+ k = header(s.out, s.nout);
+ if (k == FlateErr) {
+ s.err = "header error.";
+ n = FlateErr;
+ } else {
+ headerlen = s.nout = k;
+ n = FlateOut;
+ }
+ for (;; n = deflate(&s))
+ switch (n) {
+ case FlateOk:
+ k = footer(s.out, s.nout, sum, nin, nout - headerlen);
+ if (k == FlateErr) {
+ s.err = "footer error.";
+ n = FlateErr;
+ } else if (k != fwrite(s.out, 1, k, out)) {
+ s.err = "write error.";
+ n = FlateErr;
+ } else {
+ footerlen = k;
+ nout += k;
+ }
+ case FlateErr:
+ free(s.in);
+ free(s.out);
+ err = s.err;
+ return n;
+ case FlateIn:
+ s.nin = fread(s.in, 1, Nin, in);
+ nin += s.nin;
+ sum = checksum(s.in, s.nin, sum);
+ break;
+ case FlateOut:
+ k = fwrite(s.out, 1, s.nout, out);
+ if (k != s.nout)
+ s.err = "write error.";
+ nout += k;
+ s.nout = Nout;
+ break;
+ }
+}
+
+/* decompress, using FlateStream interface */
+int decompress_stream(FILE *in, FILE *out) {
+ FlateStream s;
+ uchar *begin;
+ int k, n;
+ enum {Nin = 1<<13, Nout = 1<<15};
+
+ s.in = begin = malloc(Nin);
+ s.out = malloc(Nout);
+ s.nout = Nout;
+ s.err = 0;
+ s.state = 0;
+
+ s.nin = fread(s.in, 1, Nin, in);
+ nin += s.nin;
+ k = header(s.in, s.nin);
+ if (k == FlateErr) {
+ s.err = "header error.";
+ n = FlateErr;
+ } else {
+ headerlen = k;
+ s.nin -= k;
+ s.in += k;
+ n = inflate(&s);
+ }
+ for (;; n = inflate(&s))
+ switch (n) {
+ case FlateOk:
+ memmove(begin, s.in, s.nin);
+ k = fread(begin, 1, Nin-s.nin, in);
+ nin += k;
+ s.nin += k;
+ k = footer(begin, s.nin, sum, nout, nin - s.nin - headerlen);
+ if (k == FlateErr) {
+ s.err = "footer error.";
+ n = FlateErr;
+ } else {
+ footerlen = k;
+ extralen = s.nin - k;
+ }
+ case FlateErr:
+ free(begin);
+ free(s.out);
+ err = s.err;
+ return n;
+ case FlateIn:
+ s.in = begin;
+ s.nin = fread(s.in, 1, Nin, in);
+ nin += s.nin;
+ break;
+ case FlateOut:
+ k = fwrite(s.out, 1, s.nout, out);
+ if (k != s.nout)
+ s.err = "write error.";
+ sum = checksum(s.out, k, sum);
+ nout += k;
+ s.nout = Nout;
+ break;
+ }
+}
+
+int main(int argc, char *argv[]) {
+ char comp = 'c';
+ char fmt = 'r';
+ char verbose = 'q';
+ int (*call)(FILE *, FILE*);
+ int n, i;
+
+ for (i = 1; i < argc; i++) {
+ if (argv[i][0] == '-' && argv[i][1] && argv[i][2] == 0)
+ switch (argv[i][1]) {
+ case 'q':
+ case 'v':
+ verbose = argv[i][1];
+ continue;
+ case 'c':
+ case 'd':
+ comp = argv[i][1];
+ continue;
+ case 'r':
+ case 'g':
+ case 'z':
+ case 'p':
+ fmt = argv[i][1];
+ continue;
+ }
+ fprintf(stderr, "usage: %s [-q|-v] [-c|-d] [-r|-g|-z|-p]\n\n"
+ "deflate stream compression"
+ " -q quiet (default)\n"
+ " -v verbose\n"
+ " -c compress (default)\n"
+ " -d decompress\n"
+ " -r raw (default)\n"
+ " -g gzip\n"
+ " -z zlib\n"
+ " -p pkzip\n", argv[0]);
+ return -1;
+ }
+ call = comp == 'c' ? compress_stream : decompress_stream;
+ switch (fmt) {
+ case 'r':
+ header = dummyheader;
+ footer = dummyfooter;
+ checksum = dummysum;
+ n = call(stdin, stdout);
+ break;
+ case 'g':
+ if (comp == 'c') {
+ header = deflate_gzip_header;
+ footer = deflate_gzip_footer;
+ } else {
+ header = inflate_gzip_header;
+ footer = inflate_gzip_footer;
+ }
+ checksum = crc32;
+ crc32init();
+ n = call(stdin, stdout);
+ break;
+ case 'z':
+ if (comp == 'c') {
+ header = deflate_zlib_header;
+ footer = deflate_zlib_footer;
+ } else {
+ header = inflate_zlib_header;
+ footer = inflate_zlib_footer;
+ }
+ checksum = adler32;
+ n = call(stdin, stdout);
+ break;
+ case 'p':
+ default:
+ err = "uninplemented.";
+ n = FlateErr;
+ break;
+ }
+ if (verbose == 'v')
+ fprintf(stderr, "in:%d out:%d checksum: 0x%08x (header:%d compressed len:%d footer:%d extra input bytes:%s)\n",
+ nin, nout, sum, headerlen, (comp == 'c' ? nout : nin) - headerlen - footerlen - extralen,
+ footerlen, extralen ? "yes" : "no");
+ if (n != FlateOk)
+ fprintf(stderr, "error: %s\n", err);
+ return n;
+}