libutf

UTF-8 library
git clone git://git.suckless.org/libutf
Log | Files | Refs | README | LICENSE

commit fb9f3be7e21c40b4994bc33e9722a1ec01d6abf7
parent ce71fa4415c040d404dd2eee246fe532342035c1
Author: Connor Lane Smith <cls@lubutu.com>
Date:   Sat, 26 May 2012 12:45:42 +0100

separate utf string functions from rune.c
Diffstat:
Makefile | 2+-
rune.c | 108-------------------------------------------------------------------------------
utf.c | 110+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 111 insertions(+), 109 deletions(-)

diff --git a/Makefile b/Makefile @@ -2,7 +2,7 @@ include config.mk -SRC = rune.c runetype.c +SRC = rune.c runetype.c utf.c OBJ = $(SRC:.c=.o) LIB = libutf.a diff --git a/rune.c b/rune.c @@ -1,5 +1,4 @@ /* See LICENSE file for copyright and license details. */ -#include <string.h> #include "utf.h" #define MIN(x,y) ((x) < (y) ? (x) : (y)) @@ -126,110 +125,3 @@ fullrune(const char *s, size_t len) return charntorune(&r, s, len) > 0; } - -char * -utfecpy(char *to, char *end, const char *from) -{ - Rune r = Runeerror; - size_t i, n; - - /* seek through to find final full rune */ - for(i = 0; r != '\0' && (n = charntorune(&r, &from[i], end - &to[i])); i += n) - ; - memcpy(to, from, i); /* copy over bytes up to this rune */ - - if(i > 0 && r != '\0') - to[i] = '\0'; /* terminate if unterminated */ - return &to[i]; -} - -size_t -utflen(const char *s) -{ - const char *p = s; - size_t i; - Rune r; - - for(i = 0; *p != '\0'; i++) - p += chartorune(&r, p); - return i; -} - -size_t -utfnlen(const char *s, size_t len) -{ - const char *p = s; - size_t i; - Rune r; - int n; - - for(i = 0; (n = charntorune(&r, p, len-(p-s))) && r != '\0'; i++) - p += n; - return i; -} - -char * -utfrune(const char *s, Rune r) -{ - if(r < Runeself) { - return strchr(s, r); - } - else if(r == Runeerror) { - Rune r0; - int n; - - for(; *s != '\0'; s += n) { - n = chartorune(&r0, s); - if(r == r0) - return (char *)s; - } - } - else { - char buf[UTFmax+1]; - int n; - - if(!(n = runetochar(buf, &r))) - return NULL; - buf[n] = '\0'; - return strstr(s, buf); - } - return NULL; -} - -char * -utfrrune(const char *s, Rune r) -{ - const char *p = NULL; - Rune r0; - int n; - - if(r < Runeself) - return strrchr(s, r); - - for(; *s != '\0'; s += n) { - n = chartorune(&r0, s); - if(r == r0) - p = s; - } - return (char *)p; -} - -char * -utfutf(const char *s, const char *t) -{ - const char *p, *q; - Rune r0, r1, r2; - int n, m; - - for(chartorune(&r0, t); (s = utfrune(s, r0)); s++) { - for(p = s, q = t; *q && *p; p += n, q += m) { - n = chartorune(&r1, p); - m = chartorune(&r2, q); - if(r1 != r2) - break; - } - if(!*q) - return (char *)s; - } - return NULL; -} diff --git a/utf.c b/utf.c @@ -0,0 +1,110 @@ +/* See LICENSE file for copyright and license details. */ +#include <string.h> +#include "utf.h" + +char * +utfecpy(char *to, char *end, const char *from) +{ + Rune r = Runeerror; + size_t i, n; + + /* seek through to find final full rune */ + for(i = 0; r != '\0' && (n = charntorune(&r, &from[i], end - &to[i])); i += n) + ; + memcpy(to, from, i); /* copy over bytes up to this rune */ + + if(i > 0 && r != '\0') + to[i] = '\0'; /* terminate if unterminated */ + return &to[i]; +} + +size_t +utflen(const char *s) +{ + const char *p = s; + size_t i; + Rune r; + + for(i = 0; *p != '\0'; i++) + p += chartorune(&r, p); + return i; +} + +size_t +utfnlen(const char *s, size_t len) +{ + const char *p = s; + size_t i; + Rune r; + int n; + + for(i = 0; (n = charntorune(&r, p, len-(p-s))) && r != '\0'; i++) + p += n; + return i; +} + +char * +utfrune(const char *s, Rune r) +{ + if(r < Runeself) { + return strchr(s, r); + } + else if(r == Runeerror) { + Rune r0; + int n; + + for(; *s != '\0'; s += n) { + n = chartorune(&r0, s); + if(r == r0) + return (char *)s; + } + } + else { + char buf[UTFmax+1]; + int n; + + if(!(n = runetochar(buf, &r))) + return NULL; + buf[n] = '\0'; + return strstr(s, buf); + } + return NULL; +} + +char * +utfrrune(const char *s, Rune r) +{ + const char *p = NULL; + Rune r0; + int n; + + if(r < Runeself) + return strrchr(s, r); + + for(; *s != '\0'; s += n) { + n = chartorune(&r0, s); + if(r == r0) + p = s; + } + return (char *)p; +} + +char * +utfutf(const char *s, const char *t) +{ + const char *p, *q; + Rune r0, r1, r2; + int n, m; + + for(chartorune(&r0, t); (s = utfrune(s, r0)); s++) { + for(p = s, q = t; *q && *p; p += n, q += m) { + n = chartorune(&r1, p); + m = chartorune(&r2, q); + if(r1 != r2) + break; + } + if(!*q) + return (char *)s; + } + return NULL; +}