commit c18c526e623588d636ca179efd0ad4ff3d149634
parent ec79f75778ab99c82b16d01786b8fe4a64242b10
Author: Connor Lane Smith <cls@lubutu.com>
Date: Mon, 21 May 2012 17:57:46 +0100
reject bad runes in runelen, runetochar
Diffstat:
rune.c | | | 36 | ++++++++++++++++++++---------------- |
utf.h | | | 5 | +++-- |
2 files changed, 23 insertions(+), 18 deletions(-)
diff --git a/rune.c b/rune.c
@@ -4,14 +4,19 @@
#define MIN(x,y) ((x) < (y) ? (x) : (y))
-#define UTFSEQ(x) ((((x) & 0x80) == 0x00) ? 1 /* 0xxxxxxx */ \
- : (((x) & 0xC0) == 0x80) ? 0 /* 10xxxxxx */ \
- : (((x) & 0xE0) == 0xC0) ? 2 /* 110xxxxx */ \
- : (((x) & 0xF0) == 0xE0) ? 3 /* 1110xxxx */ \
- : (((x) & 0xF8) == 0xF0) ? 4 /* 11110xxx */ \
- : (((x) & 0xFC) == 0xF8) ? 5 /* 111110xx */ \
- : (((x) & 0xFE) == 0xFC) ? 6 /* 1111110x */ \
- : 0 )
+#define UTFSEQ(x) ((((x) & 0x80) == 0x00) ? 1 /* 0xxxxxxx */ \
+ : (((x) & 0xC0) == 0x80) ? 0 /* 10xxxxxx */ \
+ : (((x) & 0xE0) == 0xC0) ? 2 /* 110xxxxx */ \
+ : (((x) & 0xF0) == 0xE0) ? 3 /* 1110xxxx */ \
+ : (((x) & 0xF8) == 0xF0) ? 4 /* 11110xxx */ \
+ : (((x) & 0xFC) == 0xF8) ? 5 /* 111110xx */ \
+ : (((x) & 0xFE) == 0xFC) ? 6 /* 1111110x */ \
+ : 0 )
+
+#define BADRUNE(x) ((x) > Runemax \
+ || ((x) & 0xFFFE) == 0xFFFE \
+ || ((x) >= 0xD800 && (x) <= 0xDFFF) \
+ || ((x) >= 0xFDD0 && (x) <= 0xFDEF))
/*
* runetochar copies one rune at p to at most UTFmax bytes starting at s and
@@ -21,7 +26,7 @@
* If the rune is illegal, runetochar will return 0.
*/
int
-runetochar(char *s, Rune *p)
+runetochar(char *s, const Rune *p)
{
Rune r = *p;
@@ -104,8 +109,7 @@ charntorune(Rune *p, const char *s, size_t len)
return 0;
/* reject invalid runes and overlong sequences */
- if(n > UTFmax || r > 0x10FFFF || runelen(r) < (int)n || (r & 0xFFFE) == 0xFFFE
- || (r >= 0xD800 && r <= 0xDFFF) || (r >= 0xFDD0 && r <= 0xFDEF))
+ if(n > UTFmax || runelen(r) < (int)n || BADRUNE(r))
r = Runeerror;
*p = r;
@@ -119,16 +123,16 @@ charntorune(Rune *p, const char *s, size_t len)
int
runelen(Rune r)
{
- if(r <= 0x7F)
+ if(BADRUNE(r))
+ return 0; /* error */
+ else if(r <= 0x7F)
return 1;
else if(r <= 0x07FF)
return 2;
else if(r <= 0xFFFF)
return 3;
- else if(r <= Runemax)
- return 4;
else
- return 0; /* error */
+ return 4;
}
/*
@@ -136,7 +140,7 @@ runelen(Rune r)
* length len pointed to by p into UTF-8.
*/
size_t
-runenlen(Rune *p, size_t len)
+runenlen(const Rune *p, size_t len)
{
size_t i, n = 0;
diff --git a/utf.h b/utf.h
@@ -13,10 +13,11 @@ enum {
Runemax = 0x10FFFF /* maximum rune value */
};
-int runetochar(char *, Rune *);
+int runetochar(char *, const Rune *);
int chartorune(Rune *, const char *);
int charntorune(Rune *, const char *, size_t);
-int runelen(Rune);
+int runelen(const Rune);
+size_t runenlen(const Rune *, size_t);
int fullrune(const char *, size_t);
char *utfecpy(char *, char *, const char *);
size_t utflen(const char *);