diff options
Diffstat (limited to 'src/xutf8/utf8Utils.c')
| -rw-r--r-- | src/xutf8/utf8Utils.c | 227 |
1 files changed, 111 insertions, 116 deletions
diff --git a/src/xutf8/utf8Utils.c b/src/xutf8/utf8Utils.c index ad2bc83ff..846846c93 100644 --- a/src/xutf8/utf8Utils.c +++ b/src/xutf8/utf8Utils.c @@ -1,5 +1,4 @@ -/* "$Id$" - * +/* * Author: Jean-Marc Lienher ( http://oksid.ch ) * Copyright 2000-2003 by O'ksi'D. * @@ -7,11 +6,11 @@ * the file "COPYING" which should have been included with this file. If this * file is missing or damaged, see the license at: * - * http://www.fltk.org/COPYING.php + * https://www.fltk.org/COPYING.php * - * Please report all bugs and problems on the following page: + * Please see the following page on how to report bugs and issues: * - * http://www.fltk.org/str.php + * https://www.fltk.org/bugs.php */ /* @@ -24,84 +23,84 @@ /*** NOTE : all functions are LIMITED to 24 bits Unicode values !!! ***/ -/* - * Converts the first char of the UTF-8 string to an Unicode value - * Returns the byte length of the converted UTF-8 char - * Returns -1 if the UTF-8 string is not valid +/* + * Converts the first char of the UTF-8 string to an Unicode value + * Returns the byte length of the converted UTF-8 char + * Returns -1 if the UTF-8 string is not valid */ int XConvertUtf8ToUcs(const unsigned char *buf, - int len, - unsigned int *ucs) { + int len, + unsigned int *ucs) { if (buf[0] & 0x80) { if (buf[0] & 0x40) { if (buf[0] & 0x20) { - if (buf[0] & 0x10) { - if (buf[0] & 0x08) { - if (buf[0] & 0x04) { - if (buf[0] & 0x02) { - /* bad UTF-8 string */ - } else { - /* 0x04000000 - 0x7FFFFFFF */ - } - } else if (len > 4 - && (buf[1] & 0xC0) == 0x80 - && (buf[2] & 0xC0) == 0x80 - && (buf[3] & 0xC0) == 0x80 - && (buf[4] & 0xC0) == 0x80) { - /* 0x00200000 - 0x03FFFFFF */ - *ucs = ((buf[0] & ~0xF8) << 24) + - ((buf[1] & ~0x80) << 18) + - ((buf[2] & ~0x80) << 12) + - ((buf[3] & ~0x80) << 6) + - (buf[4] & ~0x80); - if (*ucs > 0x001FFFFF && *ucs < 0x01000000) return 5; - } - } else if (len > 3 - && (buf[1] & 0xC0) == 0x80 - && (buf[2] & 0xC0) == 0x80 - && (buf[3] & 0xC0) == 0x80) { - /* 0x00010000 - 0x001FFFFF */ - *ucs = ((buf[0] & ~0xF0) << 18) + - ((buf[1] & ~0x80) << 12) + - ((buf[2] & ~0x80) << 6) + - (buf[3] & ~0x80); - if (*ucs > 0x0000FFFF) return 4; - } - } else if (len > 2 - && (buf[1] & 0xC0) == 0x80 - && (buf[2] & 0xC0) == 0x80) { - /* 0x00000800 - 0x0000FFFF */ - *ucs = ((buf[0] & ~0xE0) << 12) + - ((buf[1] & ~0x80) << 6) + - (buf[2] & ~0x80); - if (*ucs > 0x000007FF) return 3; - } + if (buf[0] & 0x10) { + if (buf[0] & 0x08) { + if (buf[0] & 0x04) { + if (buf[0] & 0x02) { + /* bad UTF-8 string */ + } else { + /* 0x04000000 - 0x7FFFFFFF */ + } + } else if (len > 4 + && (buf[1] & 0xC0) == 0x80 + && (buf[2] & 0xC0) == 0x80 + && (buf[3] & 0xC0) == 0x80 + && (buf[4] & 0xC0) == 0x80) { + /* 0x00200000 - 0x03FFFFFF */ + *ucs = ((buf[0] & ~0xF8) << 24) + + ((buf[1] & ~0x80) << 18) + + ((buf[2] & ~0x80) << 12) + + ((buf[3] & ~0x80) << 6) + + (buf[4] & ~0x80); + if (*ucs > 0x001FFFFF && *ucs < 0x01000000) return 5; + } + } else if (len > 3 + && (buf[1] & 0xC0) == 0x80 + && (buf[2] & 0xC0) == 0x80 + && (buf[3] & 0xC0) == 0x80) { + /* 0x00010000 - 0x001FFFFF */ + *ucs = ((buf[0] & ~0xF0) << 18) + + ((buf[1] & ~0x80) << 12) + + ((buf[2] & ~0x80) << 6) + + (buf[3] & ~0x80); + if (*ucs > 0x0000FFFF) return 4; + } + } else if (len > 2 + && (buf[1] & 0xC0) == 0x80 + && (buf[2] & 0xC0) == 0x80) { + /* 0x00000800 - 0x0000FFFF */ + *ucs = ((buf[0] & ~0xE0) << 12) + + ((buf[1] & ~0x80) << 6) + + (buf[2] & ~0x80); + if (*ucs > 0x000007FF) return 3; + } } else if (len > 1 && (buf[1] & 0xC0) == 0x80) { - /* 0x00000080 - 0x000007FF */ - *ucs = ((buf[0] & ~0xC0) << 6) + - (buf[1] & ~0x80); - if (*ucs > 0x0000007F) return 2; + /* 0x00000080 - 0x000007FF */ + *ucs = ((buf[0] & ~0xC0) << 6) + + (buf[1] & ~0x80); + if (*ucs > 0x0000007F) return 2; } } } else if (len > 0) { /* 0x00000000 - 0x0000007F */ *ucs = buf[0]; return 1; - } + } *ucs = (unsigned int) '?'; /* bad UTF-8 string */ return -1; } -/* - * Converts an Unicode value to an UTF-8 string - * NOTE : the buffer (buf) must be at least 5 bytes long !!! +/* + * Converts an Unicode value to an UTF-8 string + * NOTE : the buffer (buf) must be at least 5 bytes long !!! */ -int -XConvertUcsToUtf8(unsigned int ucs, - char *buf) { +int +XConvertUcsToUtf8(unsigned int ucs, + char *buf) { if (ucs < 0x000080) { buf[0] = ucs; @@ -110,7 +109,7 @@ XConvertUcsToUtf8(unsigned int ucs, buf[0] = 0xC0 | (ucs >> 6); buf[1] = 0x80 | (ucs & 0x3F); return 2; - } else if (ucs < 0x010000) { + } else if (ucs < 0x010000) { buf[0] = 0xE0 | (ucs >> 12); buf[1] = 0x80 | ((ucs >> 6) & 0x3F); buf[2] = 0x80 | (ucs & 0x3F); @@ -133,23 +132,23 @@ XConvertUcsToUtf8(unsigned int ucs, return -1; } -/* - * returns the byte length of the first UTF-8 char - * (returns -1 if not valid) +/* + * returns the byte length of the first UTF-8 char + * (returns -1 if not valid) */ int XUtf8CharByteLen(const unsigned char *buf, - int len) { + int len) { unsigned int ucs; return XConvertUtf8ToUcs(buf, len, &ucs); } /* - * returns the quantity of Unicode chars in the UTF-8 string + * returns the quantity of Unicode chars in the UTF-8 string */ -int -XCountUtf8Char(const unsigned char *buf, - int len) { +int +XCountUtf8Char(const unsigned char *buf, + int len) { int i = 0; int nbc = 0; @@ -162,66 +161,62 @@ XCountUtf8Char(const unsigned char *buf, return nbc; } -/* +/* * Same as XConvertUtf8ToUcs but no sanity check is done. */ int XFastConvertUtf8ToUcs(const unsigned char *buf, - int len, - unsigned int *ucs) { + int len, + unsigned int *ucs) { if (buf[0] & 0x80) { if (buf[0] & 0x40) { if (buf[0] & 0x20) { - if (buf[0] & 0x10) { - if (buf[0] & 0x08) { - if (buf[0] & 0x04) { - if (buf[0] & 0x02) { - /* bad UTF-8 string */ - } else { - /* 0x04000000 - 0x7FFFFFFF */ - } - } else if (len > 4) { - /* 0x00200000 - 0x03FFFFFF */ - *ucs = ((buf[0] & ~0xF8) << 24) + - ((buf[1] & ~0x80) << 18) + - ((buf[2] & ~0x80) << 12) + - ((buf[3] & ~0x80) << 6) + - (buf[4] & ~0x80); - return 5; - } - } else if (len > 3) { - /* 0x00010000 - 0x001FFFFF */ - *ucs = ((buf[0] & ~0xF0) << 18) + - ((buf[1] & ~0x80) << 12) + - ((buf[2] & ~0x80) << 6) + - (buf[3] & ~0x80); - return 4; - } - } else if (len > 2) { - /* 0x00000800 - 0x0000FFFF */ - *ucs = ((buf[0] & ~0xE0) << 12) + - ((buf[1] & ~0x80) << 6) + - (buf[2] & ~0x80); - return 3; - } + if (buf[0] & 0x10) { + if (buf[0] & 0x08) { + if (buf[0] & 0x04) { + if (buf[0] & 0x02) { + /* bad UTF-8 string */ + } else { + /* 0x04000000 - 0x7FFFFFFF */ + } + } else if (len > 4) { + /* 0x00200000 - 0x03FFFFFF */ + *ucs = ((buf[0] & ~0xF8) << 24) + + ((buf[1] & ~0x80) << 18) + + ((buf[2] & ~0x80) << 12) + + ((buf[3] & ~0x80) << 6) + + (buf[4] & ~0x80); + return 5; + } + } else if (len > 3) { + /* 0x00010000 - 0x001FFFFF */ + *ucs = ((buf[0] & ~0xF0) << 18) + + ((buf[1] & ~0x80) << 12) + + ((buf[2] & ~0x80) << 6) + + (buf[3] & ~0x80); + return 4; + } + } else if (len > 2) { + /* 0x00000800 - 0x0000FFFF */ + *ucs = ((buf[0] & ~0xE0) << 12) + + ((buf[1] & ~0x80) << 6) + + (buf[2] & ~0x80); + return 3; + } } else if (len > 1) { - /* 0x00000080 - 0x000007FF */ - *ucs = ((buf[0] & ~0xC0) << 6) + - (buf[1] & ~0x80); - return 2; + /* 0x00000080 - 0x000007FF */ + *ucs = ((buf[0] & ~0xC0) << 6) + + (buf[1] & ~0x80); + return 2; } } } else if (len > 0) { /* 0x00000000 - 0x0000007F */ *ucs = buf[0]; return 1; - } + } *ucs = (unsigned int) '?'; /* bad UTF-8 string */ return -1; } - -/* - * End of "$Id$". - */ |
