commit ccb962b2ea91a7764fac2f3755c9d25b9ccdde66
parent 63d4079b8a326d43e70ad72dd219860fcb586354
Author: Kris Maglione <kris@suckless.org>
Date: Mon, 19 Jul 2010 12:50:19 -0400
[9libs] Sync with plan9port.
Diffstat:
30 files changed, 837 insertions(+), 325 deletions(-)
diff --git a/include/fmt.h b/include/fmt.h
@@ -27,10 +27,22 @@ struct Fmt{
void *farg; /* to make flush a closure */
int nfmt; /* num chars formatted so far */
va_list args; /* args passed to dofmt */
- int r; /* % format Rune */
+ Rune r; /* % format Rune */
int width;
int prec;
unsigned long flags;
+ char *decimal; /* decimal point; cannot be "" */
+
+ /* For %'d */
+ char *thousands; /* separator for thousands */
+
+ /*
+ * Each char is an integer indicating #digits before next separator. Values:
+ * \xFF: no more grouping (or \x7F; defined to be CHAR_MAX in POSIX)
+ * \x00: repeat previous indefinitely
+ * \x**: count that many
+ */
+ char *grouping; /* descriptor of separator placement */
};
enum{
@@ -40,7 +52,8 @@ enum{
FmtSharp = FmtPrec << 1,
FmtSpace = FmtSharp << 1,
FmtSign = FmtSpace << 1,
- FmtZero = FmtSign << 1,
+ FmtApost = FmtSign << 1,
+ FmtZero = FmtApost << 1,
FmtUnsigned = FmtZero << 1,
FmtShort = FmtUnsigned << 1,
FmtLong = FmtShort << 1,
@@ -121,6 +134,7 @@ double fmtcharstod(int(*f)(void*), void*);
int fmtfdflush(Fmt*);
int fmtfdinit(Fmt*, int fd, char *buf, int size);
int fmtinstall(int, int (*f)(Fmt*));
+void fmtlocaleinit(Fmt*, char *decimal, char *thousands, char *grouping);
int fmtprint(Fmt*, const char*, ...);
int fmtrune(Fmt*, int);
int fmtrunestrcpy(Fmt*, Rune*);
diff --git a/include/stuff/util.h b/include/stuff/util.h
@@ -28,10 +28,6 @@ enum {
GInvert = 1<<0,
};
-enum {
- Runemax = (1 << (sizeof(Rune) * 8)) - 1,
-};
-
#define utf8locale() (!strcmp(nl_langinfo(CODESET), "UTF-8"))
#ifdef VARARGCK
diff --git a/include/utf.h b/include/utf.h
@@ -1,14 +1,15 @@
#ifndef _UTF_H_
#define _UTF_H_ 1
-typedef unsigned short Rune; /* 16 bits */
+typedef unsigned int Rune; /* 32 bits */
enum
{
- UTFmax = 3, /* maximum bytes per rune */
+ UTFmax = 4, /* maximum bytes per rune */
Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
Runeself = 0x80, /* rune and UTF sequences are the same (<) */
- Runeerror = 0xFFFD, /* decoding error in UTF */
+ Runeerror = 0xFFFD, /* decoding error in UTF */
+ Runemax = 0x10FFFF /* maximum rune value */
};
/* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */
@@ -19,7 +20,7 @@ int islowerrune(Rune);
int isspacerune(Rune);
int istitlerune(Rune);
int isupperrune(Rune);
-int runelen(Rune);
+int runelen(long);
int runenlen(const Rune*, int);
Rune* runestrcat(Rune*, const Rune*);
Rune* runestrchr(const Rune*, Rune);
diff --git a/lib/libbio/bgetrune.c b/lib/libbio/bgetrune.c
@@ -7,7 +7,7 @@ Bgetrune(Biobuf *bp)
{
int c, i;
Rune rune;
- char str[4];
+ char str[UTFmax];
c = Bgetc(bp);
if(c < Runeself) { /* one char */
diff --git a/lib/libbio/binit.c b/lib/libbio/binit.c
@@ -123,13 +123,13 @@ Bopen(const char *name, int mode)
return 0;
case OREAD:
- f = open(name, OREAD);
+ f = open(name, mode);
if(f < 0)
return 0;
break;
case OWRITE:
- f = creat(name, 0666);
+ f = creat(name, mode);
if(f < 0)
return 0;
}
diff --git a/lib/libbio/bputrune.c b/lib/libbio/bputrune.c
@@ -6,7 +6,7 @@ int
Bputrune(Biobuf *bp, long c)
{
Rune rune;
- char str[4];
+ char str[UTFmax];
int n;
rune = c;
diff --git a/lib/libbio/bvprint.c b/lib/libbio/bvprint.c
@@ -29,8 +29,11 @@ Bvprint(Biobuf *bp, const char *fmt, va_list arg)
f.flush = fmtBflush;
f.farg = bp;
f.nfmt = 0;
+ fmtlocaleinit(&f, nil, nil, nil);
n = fmtvprint(&f, fmt, arg);
bp->ocount = (char*)f.to - (char*)f.stop;
+ if(n == 0)
+ n = f.nfmt;
return n;
}
diff --git a/lib/libfmt/Makefile b/lib/libfmt/Makefile
@@ -17,6 +17,7 @@ OBJ=\
fmt\
fmtfd\
fmtfdflush\
+ fmtlocale\
fmtlock\
fmtprint\
fmtquote\
diff --git a/lib/libfmt/dofmt.c b/lib/libfmt/dofmt.c
@@ -139,8 +139,10 @@ __fmtcpy(Fmt *f, const void *vm, int n, int sz)
m = (char*)vm;
me = m + sz;
- w = f->width;
fl = f->flags;
+ w = 0;
+ if(fl & FmtWidth)
+ w = f->width;
if((fl & FmtPrec) && n > f->prec)
n = f->prec;
if(f->runes){
@@ -194,8 +196,10 @@ __fmtrcpy(Fmt *f, const void *vm, int n)
int w;
m = (Rune*)vm;
- w = f->width;
fl = f->flags;
+ w = 0;
+ if(fl & FmtWidth)
+ w = f->width;
if((fl & FmtPrec) && n > f->prec)
n = f->prec;
if(f->runes){
@@ -324,10 +328,14 @@ __percentfmt(Fmt *f)
int
__ifmt(Fmt *f)
{
- char buf[70], *p, *conv;
+ char buf[140], *p, *conv;
+ /* 140: for 64 bits of binary + 3-byte sep every 4 digits */
uvlong vu;
ulong u;
int neg, base, i, n, fl, w, isv;
+ int ndig, len, excess, bytelen;
+ char *grouping;
+ char *thousands;
neg = 0;
fl = f->flags;
@@ -366,21 +374,25 @@ __ifmt(Fmt *f)
u = va_arg(f->args, int);
}
conv = "0123456789abcdef";
+ grouping = "\4"; /* for hex, octal etc. (undefined by spec but nice) */
+ thousands = f->thousands;
switch(f->r){
case 'd':
case 'i':
case 'u':
base = 10;
- break;
- case 'x':
- base = 16;
+ grouping = f->grouping;
break;
case 'X':
- base = 16;
conv = "0123456789ABCDEF";
+ /* fall through */
+ case 'x':
+ base = 16;
+ thousands = ":";
break;
case 'b':
base = 2;
+ thousands = ":";
break;
case 'o':
base = 8;
@@ -398,7 +410,11 @@ __ifmt(Fmt *f)
}
}
p = buf + sizeof buf - 1;
- n = 0;
+ n = 0; /* in runes */
+ excess = 0; /* number of bytes > number runes */
+ ndig = 0;
+ len = utflen(thousands);
+ bytelen = strlen(thousands);
if(isv){
while(vu){
i = vu % base;
@@ -407,6 +423,12 @@ __ifmt(Fmt *f)
*p-- = ',';
n++;
}
+ if((fl & FmtApost) && __needsep(&ndig, &grouping)){
+ n += len;
+ excess += bytelen - len;
+ p -= bytelen;
+ memmove(p+1, thousands, bytelen);
+ }
*p-- = conv[i];
n++;
}
@@ -418,16 +440,47 @@ __ifmt(Fmt *f)
*p-- = ',';
n++;
}
+ if((fl & FmtApost) && __needsep(&ndig, &grouping)){
+ n += len;
+ excess += bytelen - len;
+ p -= bytelen;
+ memmove(p+1, thousands, bytelen);
+ }
*p-- = conv[i];
n++;
}
}
if(n == 0){
- *p-- = '0';
- n = 1;
+ /*
+ * "The result of converting a zero value with
+ * a precision of zero is no characters." - ANSI
+ *
+ * "For o conversion, # increases the precision, if and only if
+ * necessary, to force the first digit of the result to be a zero
+ * (if the value and precision are both 0, a single 0 is printed)." - ANSI
+ */
+ if(!(fl & FmtPrec) || f->prec != 0 || (f->r == 'o' && (fl & FmtSharp))){
+ *p-- = '0';
+ n = 1;
+ if(fl & FmtApost)
+ __needsep(&ndig, &grouping);
+ }
+
+ /*
+ * Zero values don't get 0x.
+ */
+ if(f->r == 'x' || f->r == 'X')
+ fl &= ~FmtSharp;
}
- for(w = f->prec; n < w && p > buf+3; n++)
+ for(w = f->prec; n < w && p > buf+3; n++){
+ if((fl & FmtApost) && __needsep(&ndig, &grouping)){
+ n += len;
+ excess += bytelen - len;
+ p -= bytelen;
+ memmove(p+1, thousands, bytelen);
+ }
*p-- = '0';
+ }
if(neg || (fl & (FmtSign|FmtSpace)))
n++;
if(fl & FmtSharp){
@@ -441,9 +494,19 @@ __ifmt(Fmt *f)
}
}
if((fl & FmtZero) && !(fl & (FmtLeft|FmtPrec))){
- for(w = f->width; n < w && p > buf+3; n++)
+ w = 0;
+ if(fl & FmtWidth)
+ w = f->width;
+ for(; n < w && p > buf+3; n++){
+ if((fl & FmtApost) && __needsep(&ndig, &grouping)){
+ n += len;
+ excess += bytelen - len;
+ p -= bytelen;
+ memmove(p+1, thousands, bytelen);
+ }
*p-- = '0';
- f->width = 0;
+ }
+ f->flags &= ~FmtWidth;
}
if(fl & FmtSharp){
if(base == 16)
@@ -458,7 +521,7 @@ __ifmt(Fmt *f)
else if(fl & FmtSpace)
*p-- = ' ';
f->flags &= ~FmtPrec;
- return __fmtcpy(f, p + 1, n, n);
+ return __fmtcpy(f, p + 1, n, n + excess);
}
int
@@ -499,6 +562,9 @@ __flagfmt(Fmt *f)
case '#':
f->flags |= FmtSharp;
break;
+ case '\'':
+ f->flags |= FmtApost;
+ break;
case ' ':
f->flags |= FmtSpace;
break;
@@ -526,12 +592,13 @@ __flagfmt(Fmt *f)
int
__badfmt(Fmt *f)
{
- char x[3];
+ char x[2+UTFmax];
+ int n;
x[0] = '%';
- x[1] = f->r;
- x[2] = '%';
- f->prec = 3;
- __fmtcpy(f, (const void*)x, 3, 3);
+ n = 1 + runetochar(x+1, &f->r);
+ x[n++] = '%';
+ f->prec = n;
+ __fmtcpy(f, (const void*)x, n, n);
return 0;
}
diff --git a/lib/libfmt/dorfmt.c b/lib/libfmt/dorfmt.c
@@ -19,6 +19,7 @@
/* format the output into f->to and return the number of characters fmted */
+/* BUG: THIS FILE IS NOT UPDATED TO THE NEW SPEC */
int
dorfmt(Fmt *f, const Rune *fmt)
{
@@ -30,8 +31,8 @@ dorfmt(Fmt *f, const Rune *fmt)
nfmt = f->nfmt;
for(;;){
if(f->runes){
- rt = f->to;
- rs = f->stop;
+ rt = (Rune*)f->to;
+ rs = (Rune*)f->stop;
while((r = *fmt++) && r != '%'){
FMTRCHAR(f, rt, rs, r);
}
@@ -41,8 +42,8 @@ dorfmt(Fmt *f, const Rune *fmt)
return f->nfmt - nfmt;
f->stop = rs;
}else{
- t = f->to;
- s = f->stop;
+ t = (char*)f->to;
+ s = (char*)f->stop;
while((r = *fmt++) && r != '%'){
FMTRUNE(f, t, f->stop, r);
}
@@ -53,8 +54,9 @@ dorfmt(Fmt *f, const Rune *fmt)
f->stop = s;
}
- fmt = __fmtdispatch(f, (Rune*)fmt, 1);
+ fmt = (Rune*)__fmtdispatch(f, (Rune*)fmt, 1);
if(fmt == nil)
return -1;
}
+ return 0; /* not reached */
}
diff --git a/lib/libfmt/fltfmt.c b/lib/libfmt/fltfmt.c
@@ -18,8 +18,10 @@
#include <stdlib.h>
#include <errno.h>
#include <stdarg.h>
-#include <ctype.h>
+#include <fmt.h>
+#include <assert.h>
#include "plan9.h"
+#include "fmt.h"
#include "fmtdef.h"
enum
@@ -52,8 +54,8 @@ static double pows10[] =
1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149,
1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159,
};
-
-#define pow10(x) fmtpow10(x)
+#define npows10 ((int)(sizeof(pows10)/sizeof(pows10[0])))
+#define pow10(x) fmtpow10(x)
static double
pow10(int n)
@@ -63,330 +65,615 @@ pow10(int n)
neg = 0;
if(n < 0){
- if(n < DBL_MIN_10_EXP){
- return 0.;
- }
neg = 1;
n = -n;
- }else if(n > DBL_MAX_10_EXP){
- return HUGE_VAL;
}
- if(n < (int)(sizeof(pows10)/sizeof(pows10[0])))
+
+ if(n < npows10)
d = pows10[n];
else{
- d = pows10[sizeof(pows10)/sizeof(pows10[0]) - 1];
+ d = pows10[npows10-1];
for(;;){
- n -= sizeof(pows10)/sizeof(pows10[0]) - 1;
- if(n < (int)(sizeof(pows10)/sizeof(pows10[0]))){
+ n -= npows10 - 1;
+ if(n < npows10){
d *= pows10[n];
break;
}
- d *= pows10[sizeof(pows10)/sizeof(pows10[0]) - 1];
+ d *= pows10[npows10 - 1];
}
}
- if(neg){
+ if(neg)
return 1./d;
- }
return d;
}
+/*
+ * add 1 to the decimal integer string a of length n.
+ * if 99999 overflows into 10000, return 1 to tell caller
+ * to move the virtual decimal point.
+ */
static int
-xadd(char *a, int n, int v)
+xadd1(char *a, int n)
{
char *b;
int c;
- if(n < 0 || n >= NSIGNIF)
+ if(n < 0 || n > NSIGNIF)
return 0;
- for(b = a+n; b >= a; b--) {
- c = *b + v;
+ for(b = a+n-1; b >= a; b--) {
+ c = *b + 1;
if(c <= '9') {
*b = c;
return 0;
}
*b = '0';
- v = 1;
}
- *a = '1'; /* overflow adding */
+ /*
+ * need to overflow adding digit.
+ * shift number down and insert 1 at beginning.
+ * decimal is known to be 0s or we wouldn't
+ * have gotten this far. (e.g., 99999+1 => 00000)
+ */
+ a[0] = '1';
return 1;
}
+/*
+ * subtract 1 from the decimal integer string a.
+ * if 10000 underflows into 09999, make it 99999
+ * and return 1 to tell caller to move the virtual
+ * decimal point. this way, xsub1 is inverse of xadd1.
+ */
static int
-xsub(char *a, int n, int v)
+xsub1(char *a, int n)
{
char *b;
int c;
- for(b = a+n; b >= a; b--) {
- c = *b - v;
+ if(n < 0 || n > NSIGNIF)
+ return 0;
+ for(b = a+n-1; b >= a; b--) {
+ c = *b - 1;
if(c >= '0') {
+ if(c == '0' && b == a) {
+ /*
+ * just zeroed the top digit; shift everyone up.
+ * decimal is known to be 9s or we wouldn't
+ * have gotten this far. (e.g., 10000-1 => 09999)
+ */
+ *b = '9';
+ return 1;
+ }
*b = c;
return 0;
}
*b = '9';
- v = 1;
}
- *a = '9'; /* underflow subtracting */
- return 1;
+ /*
+ * can't get here. the number a is always normalized
+ * so that it has a nonzero first digit.
+ */
+ abort();
}
+/*
+ * format exponent like sprintf(p, "e%+02d", e)
+ */
static void
-xdtoa(Fmt *fmt, char *s2, double f)
+xfmtexp(char *p, int e, int ucase)
{
- char s1[NSIGNIF+10];
- double g, h;
- int e, d, i, n;
- int c1, c2, c3, c4, ucase, sign, chr, prec;
+ char se[9];
+ int i;
- prec = FDEFLT;
- if(fmt->flags & FmtPrec)
- prec = fmt->prec;
- if(prec > FDIGIT)
- prec = FDIGIT;
- if(__isNaN(f)) {
- strcpy(s2, "NaN");
- return;
- }
- if(__isInf(f, 1)) {
- strcpy(s2, "+Inf");
- return;
- }
- if(__isInf(f, -1)) {
- strcpy(s2, "-Inf");
- return;
+ *p++ = ucase ? 'E' : 'e';
+ if(e < 0) {
+ *p++ = '-';
+ e = -e;
+ } else
+ *p++ = '+';
+ i = 0;
+ while(e) {
+ se[i++] = e % 10 + '0';
+ e /= 10;
}
- sign = 0;
+ while(i < 2)
+ se[i++] = '0';
+ while(i > 0)
+ *p++ = se[--i];
+ *p++ = '\0';
+}
+
+/*
+ * compute decimal integer m, exp such that:
+ * f = m*10^exp
+ * m is as short as possible with losing exactness
+ * assumes special cases (NaN, +Inf, -Inf) have been handled.
+ */
+static void
+xdtoa(double f, char *s, int *exp, int *neg, int *ns)
+{
+ int c, d, e2, e, ee, i, ndigit, oerrno;
+ char tmp[NSIGNIF+10];
+ double g;
+
+ oerrno = errno; /* in case strtod smashes errno */
+
+ /*
+ * make f non-negative.
+ */
+ *neg = 0;
if(f < 0) {
f = -f;
- sign++;
- }
- ucase = 0;
- chr = fmt->r;
- if(isupper(chr)) {
- ucase = 1;
- chr = tolower(chr);
+ *neg = 1;
}
- e = 0;
- g = f;
- if(g != 0) {
- frexp(f, &e);
- e = e * .301029995664;
- if(e >= -150 && e <= +150) {
- d = 0;
- h = f;
- } else {
- d = e/2;
- h = f * pow10(-d);
- }
- g = h * pow10(d-e);
- while(g < 1) {
- e--;
- g = h * pow10(d-e);
- }
- while(g >= 10) {
- e++;
- g = h * pow10(d-e);
- }
+ /*
+ * must handle zero specially.
+ */
+ if(f == 0){
+ *exp = 0;
+ s[0] = '0';
+ s[1] = '\0';
+ *ns = 1;
+ return;
+ }
+
+ /*
+ * find g,e such that f = g*10^e.
+ * guess 10-exponent using 2-exponent, then fine tune.
+ */
+ frexp(f, &e2);
+ e = (int)(e2 * .301029995664);
+ g = f * pow10(-e);
+ while(g < 1) {
+ e--;
+ g = f * pow10(-e);
+ }
+ while(g >= 10) {
+ e++;
+ g = f * pow10(-e);
}
/*
- * convert NSIGNIF digits and convert
- * back to get accuracy.
+ * convert NSIGNIF digits as a first approximation.
*/
for(i=0; i<NSIGNIF; i++) {
- d = g;
- s1[i] = d + '0';
- g = (g - d) * 10;
+ d = (int)g;
+ s[i] = d+'0';
+ g = (g-d) * 10;
}
- s1[i] = 0;
+ s[i] = 0;
/*
- * try decimal rounding to eliminate 9s
+ * adjust e because s is 314159... not 3.14159...
*/
- c2 = prec + 1;
- if(chr == 'f')
- c2 += e;
- if(c2 >= NSIGNIF-2) {
- strcpy(s2, s1);
- d = e;
- s1[NSIGNIF-2] = '0';
- s1[NSIGNIF-1] = '0';
- sprint(s1+NSIGNIF, "e%d", e-NSIGNIF+1);
- g = strtod(s1, nil);
- if(g == f)
- goto found;
- if(xadd(s1, NSIGNIF-3, 1)) {
- e++;
- sprint(s1+NSIGNIF, "e%d", e-NSIGNIF+1);
- }
- g = strtod(s1, nil);
- if(g == f)
- goto found;
- strcpy(s1, s2);
- e = d;
- }
+ e -= NSIGNIF-1;
+ xfmtexp(s+NSIGNIF, e, 0);
/*
- * convert back so s1 gets exact answer
+ * adjust conversion until strtod(s) == f exactly.
*/
- for(;;) {
- sprint(s1+NSIGNIF, "e%d", e-NSIGNIF+1);
- g = strtod(s1, nil);
+ for(i=0; i<10; i++) {
+ g = fmtstrtod(s, nil);
if(f > g) {
- if(xadd(s1, NSIGNIF-1, 1))
+ if(xadd1(s, NSIGNIF)) {
+ /* gained a digit */
e--;
+ xfmtexp(s+NSIGNIF, e, 0);
+ }
continue;
}
if(f < g) {
- if(xsub(s1, NSIGNIF-1, 1))
+ if(xsub1(s, NSIGNIF)) {
+ /* lost a digit */
e++;
+ xfmtexp(s+NSIGNIF, e, 0);
+ }
continue;
}
break;
}
-found:
/*
- * sign
+ * play with the decimal to try to simplify.
*/
- d = 0;
- i = 0;
- if(sign)
- s2[d++] = '-';
- else if(fmt->flags & FmtSign)
- s2[d++] = '+';
- else if(fmt->flags & FmtSpace)
- s2[d++] = ' ';
/*
- * copy into final place
- * c1 digits of leading '0'
- * c2 digits from conversion
- * c3 digits of trailing '0'
- * c4 digits after '.'
+ * bump last few digits up to 9 if we can
+ */
+ for(i=NSIGNIF-1; i>=NSIGNIF-3; i--) {
+ c = s[i];
+ if(c != '9') {
+ s[i] = '9';
+ g = fmtstrtod(s, nil);
+ if(g != f) {
+ s[i] = c;
+ break;
+ }
+ }
+ }
+
+ /*
+ * add 1 in hopes of turning 9s to 0s
+ */
+ if(s[NSIGNIF-1] == '9') {
+ strcpy(tmp, s);
+ ee = e;
+ if(xadd1(tmp, NSIGNIF)) {
+ ee--;
+ xfmtexp(tmp+NSIGNIF, ee, 0);
+ }
+ g = fmtstrtod(tmp, nil);
+ if(g == f) {
+ strcpy(s, tmp);
+ e = ee;
+ }
+ }
+
+ /*
+ * bump last few digits down to 0 as we can.
+ */
+ for(i=NSIGNIF-1; i>=NSIGNIF-3; i--) {
+ c = s[i];
+ if(c != '0') {
+ s[i] = '0';
+ g = fmtstrtod(s, nil);
+ if(g != f) {
+ s[i] = c;
+ break;
+ }
+ }
+ }
+
+ /*
+ * remove trailing zeros.
+ */
+ ndigit = NSIGNIF;
+ while(ndigit > 1 && s[ndigit-1] == '0'){
+ e++;
+ --ndigit;
+ }
+ s[ndigit] = 0;
+ *exp = e;
+ *ns = ndigit;
+ errno = oerrno;
+}
+
+#ifdef PLAN9PORT
+static char *special[] = { "NaN", "NaN", "+Inf", "+Inf", "-Inf", "-Inf" };
+#else
+static char *special[] = { "nan", "NAN", "inf", "INF", "-inf", "-INF" };
+#endif
+
+int
+__efgfmt(Fmt *fmt)
+{
+ char buf[NSIGNIF+10], *dot, *digits, *p, *s, suf[10], *t;
+ double f;
+ int c, chr, dotwid, e, exp, fl, ndigits, neg, newndigits;
+ int pad, point, prec, realchr, sign, sufwid, ucase, wid, z1, z2;
+ Rune r, *rs, *rt;
+
+ if(fmt->flags&FmtLong)
+ f = va_arg(fmt->args, long double);
+ else
+ f = va_arg(fmt->args, double);
+
+ /*
+ * extract formatting flags
*/
- c1 = 0;
- c2 = prec + 1;
- c3 = 0;
- c4 = prec;
+ fl = fmt->flags;
+ fmt->flags = 0;
+ prec = FDEFLT;
+ if(fl & FmtPrec)
+ prec = fmt->prec;
+ chr = fmt->r;
+ ucase = 0;
switch(chr) {
- default:
- if(xadd(s1, c2, 5))
- e++;
+ case 'A':
+ case 'E':
+ case 'F':
+ case 'G':
+ chr += 'a'-'A';
+ ucase = 1;
break;
+ }
+
+ /*
+ * pick off special numbers.
+ */
+ if(__isNaN(f)) {
+ s = special[0+ucase];
+ special:
+ fmt->flags = fl & (FmtWidth|FmtLeft);
+ return __fmtcpy(fmt, s, strlen(s), strlen(s));
+ }
+ if(__isInf(f, 1)) {
+ s = special[2+ucase];
+ goto special;
+ }
+ if(__isInf(f, -1)) {
+ s = special[4+ucase];
+ goto special;
+ }
+
+ /*
+ * get exact representation.
+ */
+ digits = buf;
+ xdtoa(f, digits, &exp, &neg, &ndigits);
+
+ /*
+ * get locale's decimal point.
+ */
+ dot = fmt->decimal;
+ if(dot == nil)
+ dot = ".";
+ dotwid = utflen(dot);
+
+ /*
+ * now the formatting fun begins.
+ * compute parameters for actual fmt:
+ *
+ * pad: number of spaces to insert before/after field.
+ * z1: number of zeros to insert before digits
+ * z2: number of zeros to insert after digits
+ * point: number of digits to print before decimal point
+ * ndigits: number of digits to use from digits[]
+ * suf: trailing suffix, like "e-5"
+ */
+ realchr = chr;
+ switch(chr){
case 'g':
/*
- * decide on 'e' of 'f' style convers
+ * convert to at most prec significant digits. (prec=0 means 1)
*/
- if(xadd(s1, c2, 5))
- e++;
- if(e >= -5 && e <= prec) {
- c1 = -e - 1;
- c4 = prec - e;
- chr = 'h'; // flag for 'f' style
+ if(prec == 0)
+ prec = 1;
+ if(ndigits > prec) {
+ if(digits[prec] >= '5' && xadd1(digits, prec))
+ exp++;
+ exp += ndigits-prec;
+ ndigits = prec;
+ }
+
+ /*
+ * extra rules for %g (implemented below):
+ * trailing zeros removed after decimal unless FmtSharp.
+ * decimal point only if digit follows.
+ */
+
+ /* fall through to %e */
+ default:
+ case 'e':
+ /*
+ * one significant digit before decimal, no leading zeros.
+ */
+ point = 1;
+ z1 = 0;
+
+ /*
+ * decimal point is after ndigits digits right now.
+ * slide to be after first.
+ */
+ e = exp + (ndigits-1);
+
+ /*
+ * if this is %g, check exponent and convert prec
+ */
+ if(realchr == 'g') {
+ if(-4 <= e && e < prec)
+ goto casef;
+ prec--; /* one digit before decimal; rest after */
+ }
+
+ /*
+ * compute trailing zero padding or truncate digits.
+ */
+ if(1+prec >= ndigits)
+ z2 = 1+prec - ndigits;
+ else {
+ /*
+ * truncate digits
+ */
+ assert(realchr != 'g');
+ newndigits = 1+prec;
+ if(digits[newndigits] >= '5' && xadd1(digits, newndigits)) {
+ /*
+ * had 999e4, now have 100e5
+ */
+ e++;
+ }
+ ndigits = newndigits;
+ z2 = 0;
}
+ xfmtexp(suf, e, ucase);
+ sufwid = strlen(suf);
break;
+
+ casef:
case 'f':
- if(xadd(s1, c2+e, 5))
- e++;
- c1 = -e;
- if(c1 > prec)
- c1 = c2;
- c2 += e;
+ /*
+ * determine where digits go with respect to decimal point
+ */
+ if(ndigits+exp > 0) {
+ point = ndigits+exp;
+ z1 = 0;
+ } else {
+ point = 1;
+ z1 = 1 + -(ndigits+exp);
+ }
+
+ /*
+ * %g specifies prec = number of significant digits
+ * convert to number of digits after decimal point
+ */
+ if(realchr == 'g')
+ prec += z1 - point;
+
+ /*
+ * compute trailing zero padding or truncate digits.
+ */
+ if(point+prec >= z1+ndigits)
+ z2 = point+prec - (z1+ndigits);
+ else {
+ /*
+ * truncate digits
+ */
+ assert(realchr != 'g');
+ newndigits = point+prec - z1;
+ if(newndigits < 0) {
+ z1 += newndigits;
+ newndigits = 0;
+ } else if(newndigits == 0) {
+ /* perhaps round up */
+ if(digits[0] >= '5'){
+ digits[0] = '1';
+ newndigits = 1;
+ goto newdigit;
+ }
+ } else if(digits[newndigits] >= '5' && xadd1(digits, newndigits)) {
+ /*
+ * digits was 999, is now 100; make it 1000
+ */
+ digits[newndigits++] = '0';
+ newdigit:
+ /*
+ * account for new digit
+ */
+ if(z1) /* 0.099 => 0.100 or 0.99 => 1.00*/
+ z1--;
+ else /* 9.99 => 10.00 */
+ point++;
+ }
+ z2 = 0;
+ ndigits = newndigits;
+ }
+ sufwid = 0;
break;
}
-
+
/*
- * clean up c1 c2 and c3
+ * if %g is given without FmtSharp, remove trailing zeros.
+ * must do after truncation, so that e.g. print %.3g 1.001
+ * produces 1, not 1.00. sorry, but them's the rules.
*/
- if(c1 < 0)
- c1 = 0;
- if(c2 < 0)
- c2 = 0;
- if(c2 > NSIGNIF) {
- c3 = c2-NSIGNIF;
- c2 = NSIGNIF;
+ if(realchr == 'g' && !(fl & FmtSharp)) {
+ if(z1+ndigits+z2 >= point) {
+ if(z1+ndigits < point)
+ z2 = point - (z1+ndigits);
+ else{
+ z2 = 0;
+ while(z1+ndigits > point && digits[ndigits-1] == '0')
+ ndigits--;
+ }
+ }
}
/*
- * copy digits
+ * compute width of all digits and decimal point and suffix if any
*/
- while(c1 > 0) {
- if(c1+c2+c3 == c4)
- s2[d++] = '.';
- s2[d++] = '0';
- c1--;
- }
- while(c2 > 0) {
- if(c2+c3 == c4)
- s2[d++] = '.';
- s2[d++] = s1[i++];
- c2--;
+ wid = z1+ndigits+z2;
+ if(wid > point)
+ wid += dotwid;
+ else if(wid == point){
+ if(fl & FmtSharp)
+ wid += dotwid;
+ else
+ point++; /* do not print any decimal point */
}
- while(c3 > 0) {
- if(c3 == c4)
- s2[d++] = '.';
- s2[d++] = '0';
- c3--;
+ wid += sufwid;
+
+ /*
+ * determine sign
+ */
+ sign = 0;
+ if(neg)
+ sign = '-';
+ else if(fl & FmtSign)
+ sign = '+';
+ else if(fl & FmtSpace)
+ sign = ' ';
+ if(sign)
+ wid++;
+
+ /*
+ * compute padding
+ */
+ pad = 0;
+ if((fl & FmtWidth) && fmt->width > wid)
+ pad = fmt->width - wid;
+ if(pad && !(fl & FmtLeft) && (fl & FmtZero)){
+ z1 += pad;
+ point += pad;
+ pad = 0;
}
/*
- * strip trailing '0' on g conv
+ * format the actual field. too bad about doing this twice.
*/
- if(fmt->flags & FmtSharp) {
- if(0 == c4)
- s2[d++] = '.';
- } else
- if(chr == 'g' || chr == 'h') {
- for(n=d-1; n>=0; n--)
- if(s2[n] != '0')
- break;
- for(i=n; i>=0; i--)
- if(s2[i] == '.') {
- d = n;
- if(i != n)
- d++;
- break;
+ if(fmt->runes){
+ if(pad && !(fl & FmtLeft) && __rfmtpad(fmt, pad) < 0)
+ return -1;
+ rt = (Rune*)fmt->to;
+ rs = (Rune*)fmt->stop;
+ if(sign)
+ FMTRCHAR(fmt, rt, rs, sign);
+ while(z1>0 || ndigits>0 || z2>0) {
+ if(z1 > 0){
+ z1--;
+ c = '0';
+ }else if(ndigits > 0){
+ ndigits--;
+ c = *digits++;
+ }else{
+ z2--;
+ c = '0';
}
- }
- if(chr == 'e' || chr == 'g') {
- if(ucase)
- s2[d++] = 'E';
- else
- s2[d++] = 'e';
- c1 = e;
- if(c1 < 0) {
- s2[d++] = '-';
- c1 = -c1;
- } else
- s2[d++] = '+';
- if(c1 >= 100) {
- s2[d++] = c1/100 + '0';
- c1 = c1%100;
+ FMTRCHAR(fmt, rt, rs, c);
+ if(--point == 0) {
+ for(p = dot; *p; ){
+ p += chartorune(&r, p);
+ FMTRCHAR(fmt, rt, rs, r);
+ }
+ }
+ }
+ fmt->nfmt += rt - (Rune*)fmt->to;
+ fmt->to = rt;
+ if(sufwid && __fmtcpy(fmt, suf, sufwid, sufwid) < 0)
+ return -1;
+ if(pad && (fl & FmtLeft) && __rfmtpad(fmt, pad) < 0)
+ return -1;
+ }else{
+ if(pad && !(fl & FmtLeft) && __fmtpad(fmt, pad) < 0)
+ return -1;
+ t = (char*)fmt->to;
+ s = (char*)fmt->stop;
+ if(sign)
+ FMTCHAR(fmt, t, s, sign);
+ while(z1>0 || ndigits>0 || z2>0) {
+ if(z1 > 0){
+ z1--;
+ c = '0';
+ }else if(ndigits > 0){
+ ndigits--;
+ c = *digits++;
+ }else{
+ z2--;
+ c = '0';
+ }
+ FMTCHAR(fmt, t, s, c);
+ if(--point == 0)
+ for(p=dot; *p; p++)
+ FMTCHAR(fmt, t, s, *p);
}
- s2[d++] = c1/10 + '0';
- s2[d++] = c1%10 + '0';
+ fmt->nfmt += t - (char*)fmt->to;
+ fmt->to = t;
+ if(sufwid && __fmtcpy(fmt, suf, sufwid, sufwid) < 0)
+ return -1;
+ if(pad && (fl & FmtLeft) && __fmtpad(fmt, pad) < 0)
+ return -1;
}
- s2[d] = 0;
-}
-
-static int
-floatfmt(Fmt *fmt, double f)
-{
- char s[FDIGIT+10];
-
- xdtoa(fmt, s, f);
- fmt->flags &= FmtWidth|FmtLeft;
- __fmtcpy(fmt, s, strlen(s), strlen(s));
return 0;
}
-int
-__efgfmt(Fmt *f)
-{
- double d;
-
- d = va_arg(f->args, double);
- return floatfmt(f, d);
-}
diff --git a/lib/libfmt/fmt.c b/lib/libfmt/fmt.c
@@ -40,6 +40,7 @@ static Convfmt knownfmt[] = {
' ', __flagfmt,
'#', __flagfmt,
'%', __percentfmt,
+ '\'', __flagfmt,
'+', __flagfmt,
',', __flagfmt,
'-', __flagfmt,
diff --git a/lib/libfmt/fmtdef.h b/lib/libfmt/fmtdef.h
@@ -53,6 +53,7 @@ void __fmtunlock(void);
int __ifmt(Fmt *f);
int __isInf(double d, int sign);
int __isNaN(double d);
+int __needsep(int *ndig, char **grouping);
int __needsquotes(char *s, int *quotelenp);
int __percentfmt(Fmt *f);
void __quotesetup(char *s, Rune *r, int nin, int nout, Quoteinfo *q, int sharp, int runesout);
diff --git a/lib/libfmt/fmtfd.c b/lib/libfmt/fmtfd.c
@@ -41,6 +41,8 @@ fmtfdinit(Fmt *f, int fd, char *buf, int size)
f->stop = buf + size;
f->flush = __fmtFdFlush;
f->farg = (void*)(uintptr_t)fd;
+ f->flags = 0;
f->nfmt = 0;
+ fmtlocaleinit(f, nil, nil, nil);
return 0;
}
diff --git a/lib/libfmt/fmtlocale.c b/lib/libfmt/fmtlocale.c
@@ -0,0 +1,55 @@
+/* Copyright (c) 2004 Google Inc.; see LICENSE */
+
+#include <stdarg.h>
+#include <string.h>
+#include "plan9.h"
+#include "fmt.h"
+#include "fmtdef.h"
+
+/*
+ * Fill in the internationalization stuff in the State structure.
+ * For nil arguments, provide the sensible defaults:
+ * decimal is a period
+ * thousands separator is a comma
+ * thousands are marked every three digits
+ */
+void
+fmtlocaleinit(Fmt *f, char *decimal, char *thousands, char *grouping)
+{
+ if(decimal == nil || decimal[0] == '\0')
+ decimal = ".";
+ if(thousands == nil)
+ thousands = ",";
+ if(grouping == nil)
+ grouping = "\3";
+ f->decimal = decimal;
+ f->thousands = thousands;
+ f->grouping = grouping;
+}
+
+/*
+ * We are about to emit a digit in e.g. %'d. If that digit would
+ * overflow a thousands (e.g.) grouping, tell the caller to emit
+ * the thousands separator. Always advance the digit counter
+ * and pointer into the grouping descriptor.
+ */
+int
+__needsep(int *ndig, char **grouping)
+{
+ int group;
+
+ (*ndig)++;
+ group = *(unsigned char*)*grouping;
+ /* CHAR_MAX means no further grouping. \0 means we got the empty string */
+ if(group == 0xFF || group == 0x7f || group == 0x00)
+ return 0;
+ if(*ndig > group){
+ /* if we're at end of string, continue with this grouping; else advance */
+ if((*grouping)[1] != '\0')
+ (*grouping)++;
+ *ndig = 1;
+ return 1;
+ }
+ return 0;
+}
+
diff --git a/lib/libfmt/fmtquote.c b/lib/libfmt/fmtquote.c
@@ -120,8 +120,10 @@ qstrfmt(char *sin, Rune *rin, Quoteinfo *q, Fmt *f)
rm = rin;
rme = rm + q->nrunesin;
- w = f->width;
fl = f->flags;
+ w = 0;
+ if(fl & FmtWidth)
+ w = f->width;
if(f->runes){
if(!(fl & FmtLeft) && __rfmtpad(f, w - q->nrunesout) < 0)
return -1;
@@ -209,7 +211,7 @@ __quotestrfmt(int runesin, Fmt *f)
outlen = (char*)f->stop - (char*)f->to;
__quotesetup(s, r, nin, outlen, &q, f->flags&FmtSharp, f->runes);
-//print("bytes in %d bytes out %d runes in %d runesout %d\n", q.nbytesin, q.nbytesout, q.nrunesin, q.nrunesout);
+/*print("bytes in %d bytes out %d runes in %d runesout %d\n", q.nbytesin, q.nbytesout, q.nrunesin, q.nrunesout); */
if(runesin){
if(!q.quoted)
diff --git a/lib/libfmt/fmtstr.c b/lib/libfmt/fmtstr.c
@@ -23,5 +23,6 @@ fmtstrflush(Fmt *f)
if(f->start == nil)
return nil;
*(char*)f->to = '\0';
+ f->to = f->start;
return (char*)f->start;
}
diff --git a/lib/libfmt/nan64.c b/lib/libfmt/nan64.c
@@ -6,58 +6,67 @@
*/
#include "plan9.h"
+#include <assert.h>
#include "fmt.h"
#include "fmtdef.h"
-#if defined (__APPLE__) || (__powerpc__)
-#define _NEEDLL
-#endif
-
static uvlong uvnan = ((uvlong)0x7FF00000<<32)|0x00000001;
static uvlong uvinf = ((uvlong)0x7FF00000<<32)|0x00000000;
static uvlong uvneginf = ((uvlong)0xFFF00000<<32)|0x00000000;
+/* gcc sees through the obvious casts. */
+static uvlong
+d2u(double d)
+{
+ union {
+ uvlong v;
+ double d;
+ } u;
+ assert(sizeof(u.d) == sizeof(u.v));
+ u.d = d;
+ return u.v;
+}
+
+static double
+u2d(uvlong v)
+{
+ union {
+ uvlong v;
+ double d;
+ } u;
+ assert(sizeof(u.d) == sizeof(u.v));
+ u.v = v;
+ return u.d;
+}
+
double
__NaN(void)
{
- uvlong *p;
-
- /* gcc complains about "return *(double*)&uvnan;" */
- p = &uvnan;
- return *(double*)p;
+ return u2d(uvnan);
}
int
__isNaN(double d)
{
uvlong x;
- double *p;
-
- p = &d;
- x = *(uvlong*)p;
- return (ulong)(x>>32)==0x7FF00000 && !__isInf(d, 0);
+
+ x = d2u(d);
+ /* IEEE 754: exponent bits 0x7FF and non-zero mantissa */
+ return (x&uvinf) == uvinf && (x&~uvneginf) != 0;
}
double
__Inf(int sign)
{
- uvlong *p;
-
- if(sign < 0)
- p = &uvinf;
- else
- p = &uvneginf;
- return *(double*)p;
+ return u2d(sign < 0 ? uvneginf : uvinf);
}
int
__isInf(double d, int sign)
{
uvlong x;
- double *p;
-
- p = &d;
- x = *(uvlong*)p;
+
+ x = d2u(d);
if(sign == 0)
return x==uvinf || x==uvneginf;
else if(sign > 0)
diff --git a/lib/libfmt/runefmtstr.c b/lib/libfmt/runefmtstr.c
@@ -23,5 +23,6 @@ runefmtstrflush(Fmt *f)
if(f->start == nil)
return nil;
*(Rune*)f->to = '\0';
+ f->to = f->start;
return f->start;
}
diff --git a/lib/libfmt/runevseprint.c b/lib/libfmt/runevseprint.c
@@ -32,6 +32,7 @@ runevseprint(Rune *buf, Rune *e, const char *fmt, va_list args)
f.farg = nil;
f.nfmt = 0;
va_copy(f.args,args);
+ fmtlocaleinit(&f, nil, nil, nil);
dofmt(&f, fmt);
va_end(f.args);
*(Rune*)f.to = '\0';
diff --git a/lib/libfmt/runevsmprint.c b/lib/libfmt/runevsmprint.c
@@ -63,6 +63,7 @@ runefmtstrinit(Fmt *f)
f->flush = runeFmtStrFlush;
f->farg = (void*)(uintptr_t)n;
f->nfmt = 0;
+ fmtlocaleinit(f, nil, nil, nil);
return 0;
}
diff --git a/lib/libfmt/runevsnprint.c b/lib/libfmt/runevsnprint.c
@@ -32,6 +32,7 @@ runevsnprint(Rune *buf, int len, const char *fmt, va_list args)
f.farg = nil;
f.nfmt = 0;
va_copy(f.args,args);
+ fmtlocaleinit(&f, nil, nil, nil);
dofmt(&f, fmt);
va_end(f.args);
*(Rune*)f.to = '\0';
diff --git a/lib/libfmt/sprint.c b/lib/libfmt/sprint.c
@@ -26,8 +26,11 @@ sprint(char *buf, const char *fmt, ...)
/*
* on PowerPC, the stack is near the top of memory, so
* we must be sure not to overflow a 32-bit pointer.
+ *
+ * careful! gcc-4.2 assumes buf+len < buf can never be true and
+ * optimizes the test away. casting to uintptr works around this bug.
*/
- if(buf+len < buf)
+ if((uintptr_t)buf+len < (uintptr_t)buf)
len = -(uintptr_t)buf-1;
va_start(args, fmt);
diff --git a/lib/libfmt/strtod.c b/lib/libfmt/strtod.c
@@ -67,7 +67,7 @@ enum
S4, /* _+#.# #S4 eS5 */
S5, /* _+#.#e +S6 #S7 */
S6, /* _+#.#e+ #S7 */
- S7, /* _+#.#e+# #S7 */
+ S7 /* _+#.#e+# #S7 */
};
static int xcmp(char*, char*);
@@ -239,7 +239,7 @@ fmtstrtod(const char *as, char **aas)
/* close approx by naive conversion */
mid[0] = 0;
mid[1] = 1;
- for(i=0; c=a[i]; i++) {
+ for(i=0; (c=a[i]) != '\0'; i++) {
mid[0] = mid[0]*10 + (c-'0');
mid[1] = mid[1]*10;
if(i >= 8)
@@ -521,7 +521,7 @@ xcmp(char *a, char *b)
{
int c1, c2;
- while(c1 = *b++) {
+ while((c1 = *b++) != '\0') {
c2 = *a++;
if(isupper(c2))
c2 = tolower(c2);
diff --git a/lib/libfmt/test.c b/lib/libfmt/test.c
@@ -40,5 +40,24 @@ main(int argc, char *argv[])
print("%d\n", 23);
print("%i\n", 23);
print("%0.10d\n", 12345);
+
+ /* test %4$d formats */
+ print("%3$d %4$06d %2$d %1$d\n", 444, 333, 111, 222);
+ print("%3$d %4$06d %2$d %1$d\n", 444, 333, 111, 222);
+ print("%3$d %4$*5$06d %2$d %1$d\n", 444, 333, 111, 222, 20);
+ print("%3$hd %4$*5$06d %2$d %1$d\n", 444, 333, (short)111, 222, 20);
+ print("%3$lld %4$*5$06d %2$d %1$d\n", 444, 333, 111LL, 222, 20);
+
+ /* test %'d formats */
+ print("%'d %'d %'d\n", 1, 2222, 33333333);
+ print("%'019d\n", 0);
+ print("%08d %08d %08d\n", 1, 2222, 33333333);
+ print("%'08d %'08d %'08d\n", 1, 2222, 33333333);
+ print("%'x %'X %'b\n", 0x11111111, 0xabcd1234, 12345);
+ print("%'lld %'lld %'lld\n", 1LL, 222222222LL, 3333333333333LL);
+ print("%019lld %019lld %019lld\n", 1LL, 222222222LL, 3333333333333LL);
+ print("%'019lld %'019lld %'019lld\n", 1LL, 222222222LL, 3333333333333LL);
+ print("%'020lld %'020lld %'020lld\n", 1LL, 222222222LL, 3333333333333LL);
+ print("%'llx %'llX %'llb\n", 0x111111111111LL, 0xabcd12345678LL, 112342345LL);
return 0;
}
diff --git a/lib/libfmt/vseprint.c b/lib/libfmt/vseprint.c
@@ -31,6 +31,7 @@ vseprint(char *buf, char *e, const char *fmt, va_list args)
f.farg = nil;
f.nfmt = 0;
va_copy(f.args,args);
+ fmtlocaleinit(&f, nil, nil, nil);
dofmt(&f, fmt);
va_end(f.args);
*(char*)f.to = '\0';
diff --git a/lib/libfmt/vsmprint.c b/lib/libfmt/vsmprint.c
@@ -63,6 +63,7 @@ fmtstrinit(Fmt *f)
f->flush = fmtStrFlush;
f->farg = (void*)(uintptr_t)n;
f->nfmt = 0;
+ fmtlocaleinit(f, nil, nil, nil);
return 0;
}
diff --git a/lib/libfmt/vsnprint.c b/lib/libfmt/vsnprint.c
@@ -32,6 +32,7 @@ vsnprint(char *buf, int len, const char *fmt, va_list args)
f.farg = nil;
f.nfmt = 0;
va_copy(f.args,args);
+ fmtlocaleinit(&f, nil, nil, nil);
dofmt(&f, fmt);
va_end(f.args);
*(char*)f.to = '\0';
diff --git a/lib/libregexp/rregexec.c b/lib/libregexp/rregexec.c
@@ -25,6 +25,7 @@ rregexec1(Reprog *progp, /* program to run */
Relist* tle; /* ends of this and next list */
Relist* nle;
int match;
+ Rune *p;
match = 0;
checkstart = j->startchar;
@@ -44,20 +45,18 @@ rregexec1(Reprog *progp, /* program to run */
if(checkstart) {
switch(j->starttype) {
case RUNE:
- while(*s != j->startchar) {
- if(*s == 0 || s == j->reol)
- return match;
- s++;
- }
+ p = runestrchr(s, j->startchar);
+ if(p == 0 || p == j->reol)
+ return match;
+ s = p;
break;
case BOL:
if(s == bol)
break;
- while(*s != '\n') {
- if(*s == 0 || s == j->reol)
- return match;
- s++;
- }
+ p = runestrchr(s, '\n');
+ if(p == 0 || s == j->reol)
+ return match;
+ s = p+1;
break;
}
}
diff --git a/lib/libutf/rune.c b/lib/libutf/rune.c
@@ -23,27 +23,30 @@ enum
Bit2 = 5,
Bit3 = 4,
Bit4 = 3,
+ Bit5 = 2,
T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
+ T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
- Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
- Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
- Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
+ Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
+ Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
+ Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
+ Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
Maskx = (1<<Bitx)-1, /* 0011 1111 */
Testx = Maskx ^ 0xFF, /* 1100 0000 */
- Bad = Runeerror,
+ Bad = Runeerror
};
int
chartorune(Rune *rune, const char *str)
{
- int c, c1, c2;
+ int c, c1, c2, c3;
long l;
/*
@@ -89,6 +92,25 @@ chartorune(Rune *rune, const char *str)
}
/*
+ * four character sequence
+ * 10000-10FFFF => T4 Tx Tx Tx
+ */
+ if(UTFmax >= 4) {
+ c3 = *(uchar*)(str+3) ^ Tx;
+ if(c3 & Testx)
+ goto bad;
+ if(c < T5) {
+ l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+ if(l <= Rune3)
+ goto bad;
+ if(l > Runemax)
+ goto bad;
+ *rune = l;
+ return 4;
+ }
+ }
+
+ /*
* bad decoding
*/
bad:
@@ -113,7 +135,7 @@ runetochar(char *str, const Rune *rune)
/*
* two character sequence
- * 0080-07FF => T2 Tx
+ * 00080-007FF => T2 Tx
*/
if(c <= Rune2) {
str[0] = T2 | (c >> 1*Bitx);
@@ -123,20 +145,36 @@ runetochar(char *str, const Rune *rune)
/*
* three character sequence
- * 0800-FFFF => T3 Tx Tx
+ * 00800-0FFFF => T3 Tx Tx
*/
- str[0] = T3 | (c >> 2*Bitx);
- str[1] = Tx | ((c >> 1*Bitx) & Maskx);
- str[2] = Tx | (c & Maskx);
- return 3;
+ if(c > Runemax)
+ c = Runeerror;
+ if(c <= Rune3) {
+ str[0] = T3 | (c >> 2*Bitx);
+ str[1] = Tx | ((c >> 1*Bitx) & Maskx);
+ str[2] = Tx | (c & Maskx);
+ return 3;
+ }
+
+ /*
+ * four character sequence
+ * 010000-1FFFFF => T4 Tx Tx Tx
+ */
+ str[0] = T4 | (c >> 3*Bitx);
+ str[1] = Tx | ((c >> 2*Bitx) & Maskx);
+ str[2] = Tx | ((c >> 1*Bitx) & Maskx);
+ str[3] = Tx | (c & Maskx);
+ return 4;
}
int
-runelen(Rune c)
+runelen(long c)
{
+ Rune rune;
char str[10];
- return runetochar(str, &c);
+ rune = c;
+ return runetochar(str, &rune);
}
int
@@ -153,7 +191,10 @@ runenlen(const Rune *r, int nrune)
if(c <= Rune2)
nb += 2;
else
+ if(c <= Rune3 || c > Runemax)
nb += 3;
+ else
+ nb += 4;
}
return nb;
}
@@ -163,13 +204,14 @@ fullrune(const char *str, int n)
{
int c;
- if(n > 0) {
- c = *(uchar*)str;
- if(c < Tx)
- return 1;
- if(n > 1)
- if(c < T3 || n > 2)
- return 1;
- }
- return 0;
+ if(n <= 0)
+ return 0;
+ c = *(uchar*)str;
+ if(c < Tx)
+ return 1;
+ if(c < T3)
+ return n >= 2;
+ if(UTFmax == 3 || c < T4)
+ return n >= 3;
+ return n >= 4;
}