diff options
| author | Matthias Melcher <fltk@matthiasm.com> | 2008-09-10 23:56:49 +0000 |
|---|---|---|
| committer | Matthias Melcher <fltk@matthiasm.com> | 2008-09-10 23:56:49 +0000 |
| commit | b6bde2e4569aa617c8a6af64947c688c624ed7f8 (patch) | |
| tree | 010d15843eb7d4faf7cd1b0cd44d5b9c00462a83 /src/xutf8/utf8Input.c | |
| parent | dfb50e85292687561927610e689eb5ab30d0ba26 (diff) | |
Merging the UTF8 patch, consisting of O'ksi'd s original 1.1.6 patch and additions by Ian. PLEASE BE AWARE that the patch in its current incarnation is a regression in many aspects and further work is required before we can announce Unicode support.
git-svn-id: file:///fltk/svn/fltk/branches/branch-1.3@6212 ea41ed52-d2ee-0310-a9c1-e6b18d33e121
Diffstat (limited to 'src/xutf8/utf8Input.c')
| -rw-r--r-- | src/xutf8/utf8Input.c | 446 |
1 files changed, 446 insertions, 0 deletions
diff --git a/src/xutf8/utf8Input.c b/src/xutf8/utf8Input.c new file mode 100644 index 000000000..205e7d0fc --- /dev/null +++ b/src/xutf8/utf8Input.c @@ -0,0 +1,446 @@ +/****************************************************************************** + Copyright (c) 2000-2002 by O'ksi'D + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * Neither the name of O'ksi'D nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + +******************************************************************************/ + +#if !defined(WIN32) && !defined(__APPLE__) + +#include "config.h" +#include "../../FL/Xutf8.h" +#include <X11/X.h> +#include <X11/Xlib.h> +#include <X11/Xutil.h> +#include <X11/Intrinsic.h> +#include <stdlib.h> + +#if HAVE_LIBC_ICONV +#include <iconv.h> +#endif +/* + I haven't found much doc on the web about EUC encodings, so I've used + GNU libiconv source code as a reference. + http://clisp.cons.org/~haible/packages-libiconv.html +*/ + +#define RET_ILSEQ -1 +#define RET_TOOFEW(x) (-10 - x) +#define RET_TOOSMALL -2 +#define conv_t void* +#define ucs4_t unsigned int +typedef struct { + unsigned short indx; + unsigned short used; +} Summary16; + +#include "lcUniConv/big5.h" +#include "lcUniConv/gb2312.h" +#include "lcUniConv/jisx0201.h" +#include "lcUniConv/jisx0208.h" +#include "lcUniConv/jisx0212.h" +#include "lcUniConv/ksc5601.h" + +int +XConvertEucTwToUtf8( + char* buffer_return, + int len) +{ + /* FIXME */ +#if HAVE_LIBC_ICONV + iconv_t cd; + int cdl; +#else + int i = 0; +#endif + int l = 0; + char *buf, *b; + + if (len < 1) return 0; + b = buf = (char*) malloc((unsigned)len); + memcpy(buf, buffer_return, (unsigned) len); + +#if HAVE_LIBC_ICONV + l = cdl = len; + cd = iconv_open("EUC-TW", "UTF-8"); + iconv(cd, &b, &len, &buffer_return, &cdl); + iconv_close(cd); + l -= cdl; +#else + while (i < len) { + unsigned int ucs; + unsigned char c; + c = (unsigned char) buf[i]; + if (c < 0x80) { + ucs = c; + i++; + } else if (c >= 0xa1 && c < 0xff && len - i > 1 ) { + unsigned char b[2]; + b[0] = (unsigned char) c - 0x80; + b[1] = (unsigned char) buf[i + 1] - 0x80; + ucs = ' '; i += 2; + } else if (c == 0x8e && len - i > 3) { + unsigned char b[2]; + unsigned char c1 = buf[i + 1]; + unsigned char c2 = buf[i + 2]; + unsigned char c3 = buf[i + 3]; + b[0] = (unsigned char) buf[i + 2] - 0x80; + b[1] = (unsigned char) buf[i + 3] - 0x80; + if (c1 >= 0xa1 && c1 <= 0xb0) { + if (c2 >= 0xa1 && c2 < 0xff && c3 >= 0xa1 && + c3 < 0xff) + { + ucs = ' '; i += 4; + } else { + ucs = '?'; i++; + } + } else { + ucs = '?'; i++; + } + } else { + ucs = '?'; + i++; + } + l += XConvertUcsToUtf8(ucs, buffer_return + l); + } +#endif + free(buf); + return l; +} + +int +XConvertEucKrToUtf8( + char* buffer_return, + int len) +{ + int i = 0, l = 0; + char *buf; + + if (len < 1) return 0; + + buf = (char*) malloc((unsigned)len); + memcpy(buf, buffer_return, (unsigned)len); + + while (i < len) { + unsigned int ucs; + unsigned char c, c1; + c = (unsigned char) buf[i]; + if (c < 0x80) { + ucs = c; + i++; + } else if (c >= 0xA1 && c < 0xFF && len - i > 1) { + c1 = (unsigned char) buf[i + 1]; + if (c1 >= 0xa1 && c1 < 0xff) { + unsigned char b[2]; + b[0] = c - 0x80; + b[1] = c1 - 0x80; + if (ksc5601_mbtowc(NULL, &ucs, b, 2) < 1) { + ucs = '?'; + } + } else { + ucs = '?'; + } + i += 2; + } else { + ucs = '?'; + i++; + } + l += XConvertUcsToUtf8(ucs, buffer_return + l); + } + free(buf); + return l; +} + +int +XConvertBig5ToUtf8( + char* buffer_return, + int len) +{ + int i = 0, l = 0; + char *buf; + + if (len < 1) return 0; + buf = (char*) malloc((unsigned)len); + memcpy(buf, buffer_return, (unsigned)len); + + if (len == 1) { + l += XConvertUcsToUtf8((unsigned int)buf[i], buffer_return + l); + } + while (i + 1 < len) { + unsigned int ucs; + unsigned char b[2]; + b[0] = (unsigned char) buf[i]; + b[1] = (unsigned char) buf[i + 1]; + if (big5_mbtowc(NULL, &ucs, b, 2) == 2) { + i += 2; + } else { + ucs = '?'; + i++; + } + l += XConvertUcsToUtf8(ucs, buffer_return + l); + } + free(buf); + return l; +} + +int +XConvertGb2312ToUtf8( + char* buffer_return, + int len) +{ + int i = 0, l = 0; + char *buf; + + if (len < 1) return 0; + buf = (char*) malloc((unsigned)len); + memcpy(buf, buffer_return, (unsigned)len); + + if (len == 1) { + l += XConvertUcsToUtf8((unsigned int)buf[i], buffer_return + l); + } + while (i + 1 < len) { + unsigned int ucs; + unsigned char b[2]; + b[0] = (unsigned char) buf[i]; + b[1] = (unsigned char) buf[i + 1]; + if (gb2312_mbtowc(NULL, &ucs, b, 2) == 2) { + i += 2; + } else { + ucs = '?'; + i++; + } + l += XConvertUcsToUtf8(ucs, buffer_return + l); + } + free(buf); + return l; +} + +int +XConvertEucCnToUtf8( + char* buffer_return, + int len) +{ + int i = 0, l = 0; + char *buf; + + if (len < 1) return 0; + buf = (char*) malloc((unsigned)len); + memcpy(buf, buffer_return, (unsigned)len); + + while (i < len) { + unsigned int ucs; + unsigned char c, c1; + c = (unsigned char) buf[i]; + if (c < 0x80) { + ucs = c; + i++; + } else if (c >= 0xA1 && c < 0xFF && len - i > 1) { + c1 = (unsigned char) buf[i + 1]; + if (c1 >= 0xa1 && c1 < 0xff) { + unsigned char b[2]; + b[0] = (unsigned char) c; + b[1] = (unsigned char) c1; + if (gb2312_mbtowc(NULL, &ucs, b, 2) < 1) { + ucs = '?'; + } + } else { + ucs = '?'; + } + i += 2; + } else { + ucs = '?'; + i++; + } + l += XConvertUcsToUtf8(ucs, buffer_return + l); + } + free(buf); + return l; +} + +int +XConvertEucJpToUtf8( + char* buffer_return, + int len) +{ + int i = 0, l = 0; + char *buf; + + if (len < 1) return 0; + buf = (char*) malloc((unsigned)len); + memcpy(buf, buffer_return, (unsigned)len); + + while (i < len) { + unsigned int ucs; + unsigned char c, c1; + c = (unsigned char) buf[i]; + if (c < 0x80) { + ucs = c; + i++; + } else if (c >= 0xA1 && c < 0xFF && len - i > 1) { + c1 = (unsigned char) buf[i + 1]; + if (c < 0xF5 && c1 >= 0xa1) { + unsigned char b[2]; + b[0] = c - 0x80; + b[1] = c1 - 0x80; + if (jisx0208_mbtowc(NULL, &ucs, b, 2) < 1) { + ucs = '?'; + } + } else if (c1 >= 0xA1 && c1 < 0xFF) { + ucs = 0xE000 + 94 * (c - 0xF5) + (c1 - 0xA1); + } else { + ucs = '?'; + } + i += 2; + } else if (c == 0x8E && len - i > 1) { + c1 = (unsigned char) buf[i + 1]; + if (c1 >= 0xa1 && c1 <= 0xe0) { + if (jisx0201_mbtowc(NULL, &ucs, &c1, 1) != 1) { + ucs = '?'; + } + } else { + ucs = '?'; + } + i += 2; + } else if (c == 0x8F && len - i > 2) { + c = (unsigned char) buf[i + 1]; + c1 = (unsigned char) buf[i + 2]; + if (c >= 0xa1 && c < 0xff) { + if (c < 0xf5 && c1 >= 0xa1 && c1 < 0xff) { + unsigned char b[2]; + b[0] = c - 0x80; + b[1] = c1 - 0x80; + if (jisx0212_mbtowc(NULL, &ucs, b, 2) + < 1) + { + ucs = '?'; + } + } else { + ucs = '?'; + } + } else { + if (c1 >= 0xa1 && c1 < 0xff) { + ucs = 0xe3ac + 94 * (c - 0xF5) + + (c1 - 0xA1); + } else { + ucs = '?'; + } + } + i += 3; + } else { + ucs = '?'; + i++; + } + l += XConvertUcsToUtf8(ucs, buffer_return + l); + } + free(buf); + return l; +} + +int +XConvertEucToUtf8( + const char* locale, + char* buffer_return, + int len, + int bytes_buffer) +{ + if (!locale/* || strstr(locale, "UTF") || strstr(locale, "utf")*/) { + return len; + } + + if (strstr(locale, "ja")) { + return XConvertEucJpToUtf8(buffer_return, len); + } else if (strstr(locale, "Big5") || strstr(locale, "big5")) { // BIG5 + return XConvertBig5ToUtf8(buffer_return, len); + } else if (strstr(locale, "zh") || strstr(locale, "chinese-")) { + if (strstr(locale, "TW") || strstr(locale, "chinese-t")) { + if (strstr(locale, "EUC") || strstr(locale, "euc") || + strstr(locale, "chinese-t")) + { + return XConvertEucTwToUtf8(buffer_return, len); + } + return XConvertBig5ToUtf8(buffer_return, len); + } + if (strstr(locale, "EUC") || strstr(locale, "euc")) { + return XConvertEucCnToUtf8(buffer_return, len); + } + return XConvertGb2312ToUtf8(buffer_return, len); + } else if (strstr(locale, "ko")) { + return XConvertEucKrToUtf8(buffer_return, len); + } + return len; +} + + +int +XUtf8LookupString( + XIC ic, + XKeyPressedEvent* event, + char* buffer_return, + int bytes_buffer, + KeySym* keysym, + Status* status_return) +{ + long ucs = -1; + int len; + len = XmbLookupString(ic, event, buffer_return, bytes_buffer / 5, + keysym, status_return); + if (*status_return == XBufferOverflow) { + return len * 5; + } + if (*keysym > 0 && *keysym < 0x100 && len == 1) { + if (*keysym < 0x80) { + ucs = (unsigned char)buffer_return[0]; + } else { + ucs = *keysym; + } + } else if (((*keysym >= 0x100 && *keysym <= 0xf000) || + (*keysym & 0xff000000U) == 0x01000000)) + { + ucs = XKeysymToUcs(*keysym); + } else { + ucs = -2; + } + + if (ucs > 0) { + len = XConvertUcsToUtf8((unsigned)ucs, (char *)buffer_return); + } else if (len > 0) { + XIM im; + if (!ic) return 0; + im = XIMOfIC(ic); + if (!im) return 0; + len = XConvertEucToUtf8(XLocaleOfIM(im), + buffer_return, len, bytes_buffer); + } + return len; +} + +#endif // X11 only + |
