diff options
| author | Greg Ercolano <erco@seriss.com> | 2009-03-18 04:47:01 +0000 |
|---|---|---|
| committer | Greg Ercolano <erco@seriss.com> | 2009-03-18 04:47:01 +0000 |
| commit | 6cbde8909b551f9561f0a06c210fb9fc059c4485 (patch) | |
| tree | 3d24e8bf8197cd62ccc5662523d0e88428d2e3a5 /src/xutf8/utf8Utils.c | |
| parent | 5d601837b99ceb6285718034959bf38ca4a749cb (diff) | |
xutf8 files code conformance:
o C files containing C++ "//" comments -> C style "/* */" comments
o Converted unintended doxygen style comments to regular C comments
o FLTK brace/indent coding standard conformance
o Tested linux + sgi
o Avoided mods to xutf8/lcUniConv [libiconv/FSF code]
to avoid unwanted diffs with future updates of that lib
as per Fabien's fltk.dev request 03/14/09.
(Those files already compliant anyway)
git-svn-id: file:///fltk/svn/fltk/branches/branch-1.3@6698 ea41ed52-d2ee-0310-a9c1-e6b18d33e121
Diffstat (limited to 'src/xutf8/utf8Utils.c')
| -rw-r--r-- | src/xutf8/utf8Utils.c | 333 |
1 files changed, 162 insertions, 171 deletions
diff --git a/src/xutf8/utf8Utils.c b/src/xutf8/utf8Utils.c index 9ad8e4d81..6c177a719 100644 --- a/src/xutf8/utf8Utils.c +++ b/src/xutf8/utf8Utils.c @@ -39,73 +39,69 @@ * Returns -1 if the UTF-8 string is not valid */ int -XConvertUtf8ToUcs( - const unsigned char *buf, - int len, - unsigned int *ucs) -{ - if (buf[0] & 0x80) { - if (buf[0] & 0x40) { - if (buf[0] & 0x20) { - if (buf[0] & 0x10) { - if (buf[0] & 0x08) { - if (buf[0] & 0x04) { - if (buf[0] & 0x02) { - /* bad UTF-8 string */ - } else { - /* 0x04000000 - 0x7FFFFFFF */ - } - } else if (len > 4 - && (buf[1] & 0xC0) == 0x80 - && (buf[2] & 0xC0) == 0x80 - && (buf[3] & 0xC0) == 0x80 - && (buf[4] & 0xC0) == 0x80) - { - /* 0x00200000 - 0x03FFFFFF */ - *ucs = ((buf[0] & ~0xF8) << 24) + - ((buf[1] & ~0x80) << 18) + - ((buf[2] & ~0x80) << 12) + - ((buf[3] & ~0x80) << 6) + - (buf[4] & ~0x80); - if (*ucs > 0x001FFFFF && *ucs < 0x01000000) return 5; - } - } else if (len > 3 - && (buf[1] & 0xC0) == 0x80 - && (buf[2] & 0xC0) == 0x80 - && (buf[3] & 0xC0) == 0x80) - { - /* 0x00010000 - 0x001FFFFF */ - *ucs = ((buf[0] & ~0xF0) << 18) + - ((buf[1] & ~0x80) << 12) + - ((buf[2] & ~0x80) << 6) + - (buf[3] & ~0x80); - if (*ucs > 0x0000FFFF) return 4; - } - } else if (len > 2 && - (buf[1] & 0xC0) == 0x80 && - (buf[2] & 0xC0) == 0x80) - { - /* 0x00000800 - 0x0000FFFF */ - *ucs = ((buf[0] & ~0xE0) << 12) + - ((buf[1] & ~0x80) << 6) + - (buf[2] & ~0x80); - if (*ucs > 0x000007FF) return 3; - } - } else if (len > 1 && (buf[1] & 0xC0) == 0x80) { - /* 0x00000080 - 0x000007FF */ - *ucs = ((buf[0] & ~0xC0) << 6) + - (buf[1] & ~0x80); - if (*ucs > 0x0000007F) return 2; +XConvertUtf8ToUcs(const unsigned char *buf, + int len, + unsigned int *ucs) { + + if (buf[0] & 0x80) { + if (buf[0] & 0x40) { + if (buf[0] & 0x20) { + if (buf[0] & 0x10) { + if (buf[0] & 0x08) { + if (buf[0] & 0x04) { + if (buf[0] & 0x02) { + /* bad UTF-8 string */ + } else { + /* 0x04000000 - 0x7FFFFFFF */ + } + } else if (len > 4 + && (buf[1] & 0xC0) == 0x80 + && (buf[2] & 0xC0) == 0x80 + && (buf[3] & 0xC0) == 0x80 + && (buf[4] & 0xC0) == 0x80) { + /* 0x00200000 - 0x03FFFFFF */ + *ucs = ((buf[0] & ~0xF8) << 24) + + ((buf[1] & ~0x80) << 18) + + ((buf[2] & ~0x80) << 12) + + ((buf[3] & ~0x80) << 6) + + (buf[4] & ~0x80); + if (*ucs > 0x001FFFFF && *ucs < 0x01000000) return 5; + } + } else if (len > 3 + && (buf[1] & 0xC0) == 0x80 + && (buf[2] & 0xC0) == 0x80 + && (buf[3] & 0xC0) == 0x80) { + /* 0x00010000 - 0x001FFFFF */ + *ucs = ((buf[0] & ~0xF0) << 18) + + ((buf[1] & ~0x80) << 12) + + ((buf[2] & ~0x80) << 6) + + (buf[3] & ~0x80); + if (*ucs > 0x0000FFFF) return 4; } - } - } else if (len > 0) { - /* 0x00000000 - 0x0000007F */ - *ucs = buf[0]; - return 1; - } - - *ucs = (unsigned int) '?'; /* bad utf-8 string */ - return -1; + } else if (len > 2 + && (buf[1] & 0xC0) == 0x80 + && (buf[2] & 0xC0) == 0x80) { + /* 0x00000800 - 0x0000FFFF */ + *ucs = ((buf[0] & ~0xE0) << 12) + + ((buf[1] & ~0x80) << 6) + + (buf[2] & ~0x80); + if (*ucs > 0x000007FF) return 3; + } + } else if (len > 1 && (buf[1] & 0xC0) == 0x80) { + /* 0x00000080 - 0x000007FF */ + *ucs = ((buf[0] & ~0xC0) << 6) + + (buf[1] & ~0x80); + if (*ucs > 0x0000007F) return 2; + } + } + } else if (len > 0) { + /* 0x00000000 - 0x0000007F */ + *ucs = buf[0]; + return 1; + } + + *ucs = (unsigned int) '?'; /* bad utf-8 string */ + return -1; } /* @@ -113,38 +109,37 @@ XConvertUtf8ToUcs( * NOTE : the buffer (buf) must be at least 5 bytes long !!! */ int -XConvertUcsToUtf8( - unsigned int ucs, - char *buf) -{ - if (ucs < 0x000080) { - buf[0] = ucs; - return 1; - } else if (ucs < 0x000800) { - buf[0] = 0xC0 | (ucs >> 6); - buf[1] = 0x80 | (ucs & 0x3F); - return 2; - } else if (ucs < 0x010000) { - buf[0] = 0xE0 | (ucs >> 12); - buf[1] = 0x80 | ((ucs >> 6) & 0x3F); - buf[2] = 0x80 | (ucs & 0x3F); - return 3; - } else if (ucs < 0x00200000) { - buf[0] = 0xF0 | (ucs >> 18); - buf[1] = 0x80 | ((ucs >> 12) & 0x3F); - buf[2] = 0x80 | ((ucs >> 6) & 0x3F); - buf[3] = 0x80 | (ucs & 0x3F); - return 4; - } else if (ucs < 0x01000000) { - buf[0] = 0xF8 | (ucs >> 24); - buf[1] = 0x80 | ((ucs >> 18) & 0x3F); - buf[2] = 0x80 | ((ucs >> 12) & 0x3F); - buf[3] = 0x80 | ((ucs >> 6) & 0x3F); - buf[4] = 0x80 | (ucs & 0x3F); - return 5; - } - buf[0] = '?'; - return -1; +XConvertUcsToUtf8(unsigned int ucs, + char *buf) { + + if (ucs < 0x000080) { + buf[0] = ucs; + return 1; + } else if (ucs < 0x000800) { + buf[0] = 0xC0 | (ucs >> 6); + buf[1] = 0x80 | (ucs & 0x3F); + return 2; + } else if (ucs < 0x010000) { + buf[0] = 0xE0 | (ucs >> 12); + buf[1] = 0x80 | ((ucs >> 6) & 0x3F); + buf[2] = 0x80 | (ucs & 0x3F); + return 3; + } else if (ucs < 0x00200000) { + buf[0] = 0xF0 | (ucs >> 18); + buf[1] = 0x80 | ((ucs >> 12) & 0x3F); + buf[2] = 0x80 | ((ucs >> 6) & 0x3F); + buf[3] = 0x80 | (ucs & 0x3F); + return 4; + } else if (ucs < 0x01000000) { + buf[0] = 0xF8 | (ucs >> 24); + buf[1] = 0x80 | ((ucs >> 18) & 0x3F); + buf[2] = 0x80 | ((ucs >> 12) & 0x3F); + buf[3] = 0x80 | ((ucs >> 6) & 0x3F); + buf[4] = 0x80 | (ucs & 0x3F); + return 5; + } + buf[0] = '?'; + return -1; } /* @@ -152,92 +147,88 @@ XConvertUcsToUtf8( * (returns -1 if not valid) */ int -XUtf8CharByteLen( - const unsigned char *buf, - int len) -{ - unsigned int ucs; - return XConvertUtf8ToUcs(buf, len, &ucs); +XUtf8CharByteLen(const unsigned char *buf, + int len) { + unsigned int ucs; + return XConvertUtf8ToUcs(buf, len, &ucs); } /* * returns the quantity of Unicode chars in the UTF-8 string */ int -XCountUtf8Char( - const unsigned char *buf, - int len) -{ - int i = 0; - int nbc = 0; - while (i < len) { - int cl = XUtf8CharByteLen(buf + i, len - i); - if (cl < 1) cl = 1; - nbc++; - i += cl; - } - return nbc; +XCountUtf8Char(const unsigned char *buf, + int len) { + + int i = 0; + int nbc = 0; + while (i < len) { + int cl = XUtf8CharByteLen(buf + i, len - i); + if (cl < 1) cl = 1; + nbc++; + i += cl; + } + return nbc; } /* * Same as XConvertUtf8ToUcs but no sanity check is done. */ int -XFastConvertUtf8ToUcs( - const unsigned char *buf, - int len, - unsigned int *ucs) -{ - if (buf[0] & 0x80) { - if (buf[0] & 0x40) { - if (buf[0] & 0x20) { - if (buf[0] & 0x10) { - if (buf[0] & 0x08) { - if (buf[0] & 0x04) { - if (buf[0] & 0x02) { - /* bad UTF-8 string */ - } else { - /* 0x04000000 - 0x7FFFFFFF */ - } - } else if (len > 4) { - /* 0x00200000 - 0x03FFFFFF */ - *ucs = ((buf[0] & ~0xF8) << 24) + - ((buf[1] & ~0x80) << 18) + - ((buf[2] & ~0x80) << 12) + - ((buf[3] & ~0x80) << 6) + - (buf[4] & ~0x80); - return 5; - } - } else if (len > 3) { - /* 0x00010000 - 0x001FFFFF */ - *ucs = ((buf[0] & ~0xF0) << 18) + - ((buf[1] & ~0x80) << 12) + - ((buf[2] & ~0x80) << 6) + - (buf[3] & ~0x80); - return 4; - } - } else if (len > 2) { - /* 0x00000800 - 0x0000FFFF */ - *ucs = ((buf[0] & ~0xE0) << 12) + - ((buf[1] & ~0x80) << 6) + - (buf[2] & ~0x80); - return 3; - } - } else if (len > 1) { - /* 0x00000080 - 0x000007FF */ - *ucs = ((buf[0] & ~0xC0) << 6) + - (buf[1] & ~0x80); - return 2; +XFastConvertUtf8ToUcs(const unsigned char *buf, + int len, + unsigned int *ucs) { + + if (buf[0] & 0x80) { + if (buf[0] & 0x40) { + if (buf[0] & 0x20) { + if (buf[0] & 0x10) { + if (buf[0] & 0x08) { + if (buf[0] & 0x04) { + if (buf[0] & 0x02) { + /* bad UTF-8 string */ + } else { + /* 0x04000000 - 0x7FFFFFFF */ + } + } else if (len > 4) { + /* 0x00200000 - 0x03FFFFFF */ + *ucs = ((buf[0] & ~0xF8) << 24) + + ((buf[1] & ~0x80) << 18) + + ((buf[2] & ~0x80) << 12) + + ((buf[3] & ~0x80) << 6) + + (buf[4] & ~0x80); + return 5; + } + } else if (len > 3) { + /* 0x00010000 - 0x001FFFFF */ + *ucs = ((buf[0] & ~0xF0) << 18) + + ((buf[1] & ~0x80) << 12) + + ((buf[2] & ~0x80) << 6) + + (buf[3] & ~0x80); + return 4; } - } - } else if (len > 0) { - /* 0x00000000 - 0x0000007F */ - *ucs = buf[0]; - return 1; - } - - *ucs = (unsigned int) '?'; /* bad utf-8 string */ - return -1; + } else if (len > 2) { + /* 0x00000800 - 0x0000FFFF */ + *ucs = ((buf[0] & ~0xE0) << 12) + + ((buf[1] & ~0x80) << 6) + + (buf[2] & ~0x80); + return 3; + } + } else if (len > 1) { + /* 0x00000080 - 0x000007FF */ + *ucs = ((buf[0] & ~0xC0) << 6) + + (buf[1] & ~0x80); + return 2; + } + } + } else if (len > 0) { + /* 0x00000000 - 0x0000007F */ + *ucs = buf[0]; + return 1; + } + + *ucs = (unsigned int) '?'; /* bad utf-8 string */ + return -1; } #endif /* X11 only */ |
