add manpages rune.3, isalpharune.3 - libutf

commit 74d431464aada2142f77fc661bd6ecc289d780ed
parent c18c526e623588d636ca179efd0ad4ff3d149634
Author: Connor Lane Smith <cls@lubutu.com>
Date:   Mon, 21 May 2012 19:00:45 +0100

add manpages rune.3, isalpharune.3
Diffstat:
Makefile  | 22 +++++++++++++---------
config.mk  | 2 ++
isalpharune.3  | 29 +++++++++++++++++++++++++++++
rune.3  | 157 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
rune.c  | 71 +++--------------------------------------------------------------------
utf.h  | 4 ++--

6 files changed, 206 insertions(+), 79 deletions(-)
diff --git a/Makefile b/Makefile
@@ -8,8 +8,6 @@ OBJ = $(SRC:.c=.o)
 LIB = libutf.a
 INC = utf.h
 
-UCD = UnicodeData-6.1.0.txt
-
 all: $(LIB) utftest
 
 $(LIB): $(OBJ)
@@ -21,18 +19,24 @@ utftest: utftest.o $(LIB)
 .c.o:
 	$(CC) $(CFLAGS) -c $<
 
-runetypebody.h: mkrunetype.awk $(UCD)
-	$(AWK) -f mkrunetype.awk $(UCD) > $@
+runetypebody.h: mkrunetype.awk UnicodeData-$(UNICODE).txt
+	$(AWK) -f mkrunetype.awk UnicodeData-$(UNICODE).txt > $@
 
-install: $(LIB)
-	mkdir -p $(DESTDIR)$(PREFIX)/lib
-	cp $(LIB) $(DESTDIR)$(PREFIX)/lib/$(LIB)
-	mkdir -p $(DESTDIR)$(PREFIX)/include
-	cp $(INC) $(DESTDIR)$(PREFIX)/include/$(INC)
+install: $(LIB) $(INC) $(MAN)
+	@echo @ install libutf
+	@mkdir -p $(DESTDIR)$(PREFIX)/lib
+	@cp $(LIB) $(DESTDIR)$(PREFIX)/lib/$(LIB)
+	@mkdir -p $(DESTDIR)$(PREFIX)/include
+	@cp $(INC) $(DESTDIR)$(PREFIX)/include/$(INC)
+	@mkdir -p $(DESTDIR)$(PREFIX)/share/man/man3
+	@cp rune.3 $(DESTDIR)$(PREFIX)/share/man/man3/rune.3
+	@sed 's/$$UNICODE/$(UNICODE)/g' isalpharune.3 > $(DESTDIR)$(PREFIX)/share/man/man3/isalpharune.3
 
 uninstall:
 	rm -f $(DESTDIR)$(PREFIX)/lib/$(LIB)
 	rm -f $(DESTDIR)$(PREFIX)/include/$(INC)
+	rm -f $(DESTDIR)$(PREFIX)/share/man/man3/rune.3
+	rm -f $(DESTDIR)$(PREFIX)/share/man/man3/isalpharune.3
 
 clean:
 	rm -f $(LIB) utftest utftest.o $(OBJ)
diff --git a/config.mk b/config.mk
@@ -2,6 +2,8 @@
 
 PREFIX = /usr/local
 
+UNICODE = 6.1.0
+
 CFLAGS  = -ansi -pedantic -Os -Wall -Wextra
 LDFLAGS = -s
 
diff --git a/isalpharune.3 b/isalpharune.3
@@ -0,0 +1,29 @@
+.Dd $Mdocdate$
+.Dt ISALPHARUNE 3
+.Os
+.Sh NAME
+.Nm isalpharune, islowerrune, isspacerune, istitlerune, isupperrune, isdigitrune
+.Nd Unicode rune classification
+.Sh SYNOPSIS
+.Ft int
+.Fn isalpharune "Rune r"
+.Ft int
+.Fn islowerrune "Rune r"
+.Ft int
+.Fn isspacerune "Rune r"
+.Ft int
+.Fn istitlerune "Rune r"
+.Ft int
+.Fn isupperrune "Rune r"
+.Ft int
+.Fn isdigitrune "Rune r"
+.Sh DESCRIPTION
+These functions classify Unicode runes according to their properties defined in the Unicode standard, analogously to
+.Xr ctype 3
+for ASCII.
+.Sh CONFORMING TO
+These functions are compatible with those defined in the Plan 9 C library, but are generated automatically from the Unicode $UNICODE Character Database, so classifications may differ.
+.Sh SEE ALSO
+.Xr ctype 3 ,
+.Xr rune 3 ,
+The Unicode $UNICODE Standard
diff --git a/rune.3 b/rune.3
@@ -0,0 +1,157 @@
+.Dd $Mdocdate$
+.Dt RUNE 3
+.Os
+.Sh NAME
+.Nm runetochar, chartorune, charntorune, runelen, runenlen, fullrune, utfecpy, utflen, utfnlen, utfrune, utfrrune, utfutf
+.Nd UTF-8 rune conversion
+.Sh SYNOPSIS
+.In utf.h
+.Ft int
+.Fn runetochar "char *s" "Rune *p"
+.Ft int
+.Fn chartorune "Rune *p" "char *s"
+.Ft int
+.Fn charntorune "Rune *p" "char *s" "size_t len"
+.Ft int
+.Fn runelen "Rune r"
+.Ft int
+.Fn runenlen "Rune *p" "size_t len"
+.Ft int
+.Fn fullrune "char *s" "size_t len"
+.Ft char *
+.Fn utfecpy "char *to" "char *end" "char *from"
+.Ft size_t
+.Fn utflen "char *s"
+.Ft size_t
+.Fn utfnlen "char *s" "size_t len"
+.Ft char *
+.Fn utfrune "char *s" "Rune r"
+.Ft char *
+.Fn utfrrune "char *s" "Rune r"
+.Ft char *
+.Fn utfutf "char *s" "char *t"
+.Sh DESCRIPTION
+The following functions convert to and from a UTF-8 byte stream and Unicode runes.
+.Pp
+.Fn runetochar
+converts one rune at
+.Fa p
+to at most
+.Dv UTFmax
+bytes starting at
+.Fa s ,
+and returns the number of bytes copied.
+.Dv UTFmax
+is the maximum number of bytes required to represent a rune.
+If the rune is illegal,
+.Fn runetochar
+will return 0.
+.Pp
+.Fn chartorune
+converts at most
+.Dv UTFmax
+bytes starting at
+.Fa s
+to one rune at
+.Fa p ,
+and returns the number of bytes copied.
+If the input is invalid UTF-8,
+.Fn chartorune
+will convert the sequence to
+.Dv Runeerror
+(0xFFFD) and return the number of bytes in the invalid sequence.
+.Pp
+.Fn charntorune
+converts at most
+.Fa len
+bytes starting at
+.Fa s
+to one rune at
+.Fa p ,
+and returns the number of bytes copied.
+If the next sequence is longer than
+.Fa len
+bytes,
+.Fn charntorune
+will return 0.
+.Pp
+.Fn runelen
+returns the number of bytes required to convert the rune
+.Fa r
+into UTF-8.
+If the rune is illegal,
+.Fn runelen
+will return 0.
+.Pp
+.Fn runenlen
+returns the number of bytes required to convert the
+.Fa len
+runes pointed to by
+.Fa p
+into UTF-8.
+.Pp
+.Fn fullrune
+returns 1 if the first
+.Fa len
+bytes of the UTF-8 string
+.Fa s
+form a complete rune, and 0 otherwise.
+.Pp
+The following functions are analogous to the corresponding string routines, with `utf' substituted for `str', and `rune' for `chr'.
+.Pp
+.Fn utfecpy
+copies UTF-8 sequences until a nul byte has been copied, but writes no sequences beyond
+.Fa end .
+If any sequences are copied,
+.Fa to
+is terminated with a nul byte and a pointer to that byte is returned.
+Otherwise the original
+.Fa to
+is returned.
+.Pp
+.Fn utflen
+returns the number of runes represented by the UTF-8 string
+.Fa s .
+.Pp
+.Fn utfnlen
+returns the number of runes represented by the first
+.Fa len
+bytes of the UTF-8 string
+.Fa s .
+If the final sequence is incomplete it will not be counted.
+.Pp
+.Fn utfrune
+.Pq Fn utfrrune
+returns a pointer to the first
+.Pq last
+occurrence of the rune
+.Fa r
+in the UTF-8 string
+.Fa s ,
+or
+.Dv NULL
+if there is none.
+The terminating nul byte is considered a part of the string
+.Fa s .
+.Pp
+.Fn utfutf
+returns a pointer to the first occurrence of the UTF-8 string
+.Fa t
+as a UTF-8 substring of
+.Fa s ,
+or
+.Dv NULL
+if there is none.
+If
+.Fa t
+is the null string,
+.Fn utfutf
+returns
+.Fa s .
+.Sh CONFORMING TO
+These functions are compatible with those defined in the Plan 9 C library, with the exception of
+.Fn charntorune ,
+which is an extension.
+However, these functions are much stricter about UTF-8 validity than their Plan 9 counterparts (the kind from up there).
+.Sh SEE ALSO
+.Xr isalpharune 3
diff --git a/rune.c b/rune.c
@@ -18,13 +18,6 @@
                 || ((x) >= 0xD800 && (x) <= 0xDFFF) \
                 || ((x) >= 0xFDD0 && (x) <= 0xFDEF))
 
-/*
- * runetochar copies one rune at p to at most UTFmax bytes starting at s and
- * returns the number of bytes copied. UTFmax is the maximum number of bytes
- * required to represent a legal rune.
- *
- * If the rune is illegal, runetochar will return 0.
- */
 int
 runetochar(char *s, const Rune *p)
 {
@@ -54,27 +47,12 @@ runetochar(char *s, const Rune *p)
 	}
 }
 
-/*
- * chartorune copies at most UTFmax bytes starting at s to one rune at p and
- * returns the number of bytes copied. If the input is not valid UTF-8,
- * chartorune will convert the sequence to Runeerror (0xFFFD), returning the
- * number of bytes in the invalid sequence.
- */
 int
 chartorune(Rune *p, const char *s)
 {
 	return charntorune(p, s, UTFmax);
 }
 
-/* 
- * charntorune copies at most len bytes starting at s to one rune at p and
- * returns the number of bytes copied. If the input is not valid UTF-8,
- * charntorune will convert the sequence to Runeerror (0xFFFD), returning the
- * number of bytes in the invalid sequence.
- *
- * If a potentially valid sequence is cut off by the len limit, charntorune will
- * return 0.
- */
 int
 charntorune(Rune *p, const char *s, size_t len)
 {
@@ -116,29 +94,21 @@ charntorune(Rune *p, const char *s, size_t len)
 	return n;
 }
 
-/*
- * runelen returns the number of bytes required to convert r into UTF-8. If the
- * rune is illegal, runelen will return 0.
- */
 int
 runelen(Rune r)
 {
-	if(BADRUNE(r))
-		return 0; /* error */
-	else if(r <= 0x7F)
+	if(r <= 0x7F)
 		return 1;
 	else if(r <= 0x07FF)
 		return 2;
+	else if(BADRUNE(r))
+		return 0; /* error */
 	else if(r <= 0xFFFF)
 		return 3;
 	else
 		return 4;
 }
 
-/*
- * runelen returns the number of bytes required to convert the rune-string of
- * length len pointed to by p into UTF-8.
- */
 size_t
 runenlen(const Rune *p, size_t len)
 {
@@ -149,10 +119,6 @@ runenlen(const Rune *p, size_t len)
 	return n;
 }
 
-/*
- * fullrune returns 1 if the string s of length len is long enough to be
- * decoded by chartorune, and 0 otherwise.
- */
 int
 fullrune(const char *s, size_t len)
 {
@@ -161,12 +127,6 @@ fullrune(const char *s, size_t len)
 	return charntorune(&r, s, len) > 0;
 }
 
-/*
- * utfecpy copies UTF-8 sequences until a null sequence has been copied, but
- * writes no sequences beyond end. If any sequences are copied, the to string is
- * terminated by a null sequence, and a pointer to that sequence is returned.
- * Otherwise, the original to string is returned.
- */
 char *
 utfecpy(char *to, char *end, const char *from)
 {
@@ -183,10 +143,6 @@ utfecpy(char *to, char *end, const char *from)
 	return &to[i];
 }
 
-/*
- * utflen returns the number of runes that are represented by the UTF-8 string
- * s.
- */
 size_t
 utflen(const char *s)
 {
@@ -199,12 +155,6 @@ utflen(const char *s)
 	return i;
 }
 
-/*
- * utfnlen returns the number of runes that are represented by the UTF-8 string
- * s of length len. If the last few bytes contain an incompletely coded rune,
- * utfnlen will not count them; in this way it differs from utflen, which
- * includes every byte of the string.
- */
 size_t
 utfnlen(const char *s, size_t len)
 {
@@ -218,11 +168,6 @@ utfnlen(const char *s, size_t len)
 	return i;
 }
 
-/*
- * utfrune returns a pointer to the first ocurrence of r in the UTF-8 string s,
- * or NULL if r does not occur in s. The null byte terminating a string is
- * considered to be part of the string s.
- */
 char *
 utfrune(const char *s, Rune r)
 {
@@ -251,11 +196,6 @@ utfrune(const char *s, Rune r)
 	return NULL;
 }
 
-/*
- * utfrrune returns a pointer to the last ocurrence of r in the UTF-8 string s,
- * or NULL if r does not occur in s. The null byte terminating a string is
- * considered to be part of the string s.
- */
 char *
 utfrrune(const char *s, Rune r)
 {
@@ -274,11 +214,6 @@ utfrrune(const char *s, Rune r)
 	return (char *)p;
 }
 
-/*
- * utfutf returns a pointer to the first occurrence of the UTF-8 string t as a
- * UTF-8 substring of s, or NULL if there is none. If t is the null string,
- * utfutf returns s.
- */
 char *
 utfutf(const char *s, const char *t)
 {
diff --git a/utf.h b/utf.h
@@ -27,10 +27,10 @@ char *utfrrune(const char *, Rune);
 char *utfutf(const char *, const char *);
 
 int isalpharune(Rune);
-int isspacerune(Rune);
-int isupperrune(Rune);
 int islowerrune(Rune);
+int isspacerune(Rune);
 int istitlerune(Rune);
+int isupperrune(Rune);
 int isdigitrune(Rune);
 
 #endif

	libutf UTF-8 library
	git clone git://git.suckless.org/libutf
	Log \| Files \| Refs \| README \| LICENSE

Makefile	\|	22	+++++++++++++---------
config.mk	\|	2	++
isalpharune.3	\|	29	+++++++++++++++++++++++++++++
rune.3	\|	157	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
rune.c	\|	71	+++--------------------------------------------------------------------
utf.h	\|	4	++--