summaryrefslogtreecommitdiff
path: root/src/xutf8/utf8Utils.c
diff options
context:
space:
mode:
authorMatthias Melcher <fltk@matthiasm.com>2008-09-10 23:56:49 +0000
committerMatthias Melcher <fltk@matthiasm.com>2008-09-10 23:56:49 +0000
commitb6bde2e4569aa617c8a6af64947c688c624ed7f8 (patch)
tree010d15843eb7d4faf7cd1b0cd44d5b9c00462a83 /src/xutf8/utf8Utils.c
parentdfb50e85292687561927610e689eb5ab30d0ba26 (diff)
Merging the UTF8 patch, consisting of O'ksi'd s original 1.1.6 patch and additions by Ian. PLEASE BE AWARE that the patch in its current incarnation is a regression in many aspects and further work is required before we can announce Unicode support.
git-svn-id: file:///fltk/svn/fltk/branches/branch-1.3@6212 ea41ed52-d2ee-0310-a9c1-e6b18d33e121
Diffstat (limited to 'src/xutf8/utf8Utils.c')
-rw-r--r--src/xutf8/utf8Utils.c259
1 files changed, 259 insertions, 0 deletions
diff --git a/src/xutf8/utf8Utils.c b/src/xutf8/utf8Utils.c
new file mode 100644
index 000000000..50482baca
--- /dev/null
+++ b/src/xutf8/utf8Utils.c
@@ -0,0 +1,259 @@
+/*
+ * "$Id: $"
+ *
+ * Unicode to UTF-8 conversion functions.
+ *
+ * Copyright (c) 2000,2001 by O'ksi'D.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * Neither the name of O'ksi'D nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Author: Jean-Marc Lienher ( http://oksid.ch )
+ */
+
+#if !defined(WIN32) && !defined(__APPLE__)
+
+#include "../../FL/Xutf8.h"
+
+/*** NOTE : all functions are LIMITED to 24 bits Unicode values !!! ***/
+
+/*
+ * Converts the first char of the UTF-8 string to an Unicode value
+ * Returns the byte length of the converted UTF-8 char
+ * Returns -1 if the UTF-8 string is not valid
+ */
+int
+XConvertUtf8ToUcs(
+ const unsigned char *buf,
+ int len,
+ unsigned int *ucs)
+{
+ if (buf[0] & 0x80) {
+ if (buf[0] & 0x40) {
+ if (buf[0] & 0x20) {
+ if (buf[0] & 0x10) {
+ if (buf[0] & 0x08) {
+ if (buf[0] & 0x04) {
+ if (buf[0] & 0x02) {
+ /* bad UTF-8 string */
+ } else {
+ /* 0x04000000 - 0x7FFFFFFF */
+ }
+ } else if (len > 4
+ && (buf[1] & 0xC0) == 0x80
+ && (buf[2] & 0xC0) == 0x80
+ && (buf[3] & 0xC0) == 0x80
+ && (buf[4] & 0xC0) == 0x80)
+ {
+ /* 0x00200000 - 0x03FFFFFF */
+ *ucs = ((buf[0] & ~0xF8) << 24) +
+ ((buf[1] & ~0x80) << 18) +
+ ((buf[2] & ~0x80) << 12) +
+ ((buf[3] & ~0x80) << 6) +
+ (buf[4] & ~0x80);
+ if (*ucs > 0x001FFFFF && *ucs < 0x01000000) return 5;
+ }
+ } else if (len > 3
+ && (buf[1] & 0xC0) == 0x80
+ && (buf[2] & 0xC0) == 0x80
+ && (buf[3] & 0xC0) == 0x80)
+ {
+ /* 0x00010000 - 0x001FFFFF */
+ *ucs = ((buf[0] & ~0xF0) << 18) +
+ ((buf[1] & ~0x80) << 12) +
+ ((buf[2] & ~0x80) << 6) +
+ (buf[3] & ~0x80);
+ if (*ucs > 0x0000FFFF) return 4;
+ }
+ } else if (len > 2 &&
+ (buf[1] & 0xC0) == 0x80 &&
+ (buf[2] & 0xC0) == 0x80)
+ {
+ /* 0x00000800 - 0x0000FFFF */
+ *ucs = ((buf[0] & ~0xE0) << 12) +
+ ((buf[1] & ~0x80) << 6) +
+ (buf[2] & ~0x80);
+ if (*ucs > 0x000007FF) return 3;
+ }
+ } else if (len > 1 && (buf[1] & 0xC0) == 0x80) {
+ /* 0x00000080 - 0x000007FF */
+ *ucs = ((buf[0] & ~0xC0) << 6) +
+ (buf[1] & ~0x80);
+ if (*ucs > 0x0000007F) return 2;
+ }
+ }
+ } else if (len > 0) {
+ /* 0x00000000 - 0x0000007F */
+ *ucs = buf[0];
+ return 1;
+ }
+
+ *ucs = (unsigned int) '?'; /* bad utf-8 string */
+ return -1;
+}
+
+/*
+ * Converts an Unicode value to an UTF-8 string
+ * NOTE : the buffer (buf) must be at least 5 bytes long !!!
+ */
+int
+XConvertUcsToUtf8(
+ unsigned int ucs,
+ char *buf)
+{
+ if (ucs < 0x000080) {
+ buf[0] = ucs;
+ return 1;
+ } else if (ucs < 0x000800) {
+ buf[0] = 0xC0 | (ucs >> 6);
+ buf[1] = 0x80 | (ucs & 0x3F);
+ return 2;
+ } else if (ucs < 0x010000) {
+ buf[0] = 0xE0 | (ucs >> 12);
+ buf[1] = 0x80 | ((ucs >> 6) & 0x3F);
+ buf[2] = 0x80 | (ucs & 0x3F);
+ return 3;
+ } else if (ucs < 0x00200000) {
+ buf[0] = 0xF0 | (ucs >> 18);
+ buf[1] = 0x80 | ((ucs >> 12) & 0x3F);
+ buf[2] = 0x80 | ((ucs >> 6) & 0x3F);
+ buf[3] = 0x80 | (ucs & 0x3F);
+ return 4;
+ } else if (ucs < 0x01000000) {
+ buf[0] = 0xF8 | (ucs >> 24);
+ buf[1] = 0x80 | ((ucs >> 18) & 0x3F);
+ buf[2] = 0x80 | ((ucs >> 12) & 0x3F);
+ buf[3] = 0x80 | ((ucs >> 6) & 0x3F);
+ buf[4] = 0x80 | (ucs & 0x3F);
+ return 5;
+ }
+ buf[0] = '?';
+ return -1;
+}
+
+/*
+ * returns the byte length of the first UTF-8 char
+ * (returns -1 if not valid)
+ */
+int
+XUtf8CharByteLen(
+ const unsigned char *buf,
+ int len)
+{
+ unsigned int ucs;
+ return XConvertUtf8ToUcs(buf, len, &ucs);
+}
+
+/*
+ * returns the quantity of Unicode chars in the UTF-8 string
+ */
+int
+XCountUtf8Char(
+ const unsigned char *buf,
+ int len)
+{
+ int i = 0;
+ int nbc = 0;
+ while (i < len) {
+ int cl = XUtf8CharByteLen(buf + i, len - i);
+ if (cl < 1) cl = 1;
+ nbc++;
+ i += cl;
+ }
+ return nbc;
+}
+
+/*
+ * Same as XConvertUtf8ToUcs but no sanity check is done.
+ */
+int
+XFastConvertUtf8ToUcs(
+ const unsigned char *buf,
+ int len,
+ unsigned int *ucs)
+{
+ if (buf[0] & 0x80) {
+ if (buf[0] & 0x40) {
+ if (buf[0] & 0x20) {
+ if (buf[0] & 0x10) {
+ if (buf[0] & 0x08) {
+ if (buf[0] & 0x04) {
+ if (buf[0] & 0x02) {
+ /* bad UTF-8 string */
+ } else {
+ /* 0x04000000 - 0x7FFFFFFF */
+ }
+ } else if (len > 4) {
+ /* 0x00200000 - 0x03FFFFFF */
+ *ucs = ((buf[0] & ~0xF8) << 24) +
+ ((buf[1] & ~0x80) << 18) +
+ ((buf[2] & ~0x80) << 12) +
+ ((buf[3] & ~0x80) << 6) +
+ (buf[4] & ~0x80);
+ return 5;
+ }
+ } else if (len > 3) {
+ /* 0x00010000 - 0x001FFFFF */
+ *ucs = ((buf[0] & ~0xF0) << 18) +
+ ((buf[1] & ~0x80) << 12) +
+ ((buf[2] & ~0x80) << 6) +
+ (buf[3] & ~0x80);
+ return 4;
+ }
+ } else if (len > 2) {
+ /* 0x00000800 - 0x0000FFFF */
+ *ucs = ((buf[0] & ~0xE0) << 12) +
+ ((buf[1] & ~0x80) << 6) +
+ (buf[2] & ~0x80);
+ return 3;
+ }
+ } else if (len > 1) {
+ /* 0x00000080 - 0x000007FF */
+ *ucs = ((buf[0] & ~0xC0) << 6) +
+ (buf[1] & ~0x80);
+ return 2;
+ }
+ }
+ } else if (len > 0) {
+ /* 0x00000000 - 0x0000007F */
+ *ucs = buf[0];
+ return 1;
+ }
+
+ *ucs = (unsigned int) '?'; /* bad utf-8 string */
+ return -1;
+}
+
+#endif // X11 only
+
+/*
+ * End of "$Id: $".
+ */
+