diff options
Diffstat (limited to 'src/xutf8/utf8Input.c')
| -rw-r--r-- | src/xutf8/utf8Input.c | 51 |
1 files changed, 49 insertions, 2 deletions
diff --git a/src/xutf8/utf8Input.c b/src/xutf8/utf8Input.c index 8882c9ecf..4108c9f11 100644 --- a/src/xutf8/utf8Input.c +++ b/src/xutf8/utf8Input.c @@ -54,6 +54,7 @@ typedef struct { #include "lcUniConv/big5.h" #include "lcUniConv/gb2312.h" +#include "lcUniConv/cp936ext.h" #include "lcUniConv/jisx0201.h" #include "lcUniConv/jisx0208.h" #include "lcUniConv/jisx0212.h" @@ -191,6 +192,43 @@ XConvertBig5ToUtf8(char* buffer_return, int len) { } int +XConvertCp936extToUtf8(char* buffer_return, int len) +{ + int i = 0, l = 0; + char *buf; + + if (len < 1) return 0; + buf = (char*) malloc((unsigned)len); + memcpy(buf, buffer_return, (unsigned)len); + + if (len == 1) { + l += XConvertUcsToUtf8((unsigned int)buf[i], buffer_return + l); + } + while (i + 1 < len) { + unsigned int ucs; + unsigned char b[2]; + b[0] = (unsigned char) buf[i]; + b[1] = (unsigned char) buf[i + 1]; + if (cp936ext_mbtowc(NULL, &ucs, b, 2) == 2) { + i += 2; + } else { + if ( b[0] < 0x80) { + ucs = b[0]; + }else{ + ucs = '?'; + } + i++; + } + l += XConvertUcsToUtf8(ucs, buffer_return + l); + } + if(i + 1 == len) { + l += XConvertUcsToUtf8((unsigned int)buf[i], buffer_return + l); + } + free(buf); + return l; +} + +int XConvertGb2312ToUtf8(char* buffer_return, int len) { int i = 0, l = 0; char *buf; @@ -207,7 +245,10 @@ XConvertGb2312ToUtf8(char* buffer_return, int len) { unsigned char b[2]; b[0] = (unsigned char) buf[i]; b[1] = (unsigned char) buf[i + 1]; - if (gb2312_mbtowc(NULL, &ucs, b, 2) == 2) { + if ( b[0] < 0x80 ) { + ucs = b[0]; + i++; + } else if (gb2312_mbtowc(NULL, &ucs, b, 2) == 2) { i += 2; } else { ucs = '?'; @@ -215,6 +256,9 @@ XConvertGb2312ToUtf8(char* buffer_return, int len) { } l += XConvertUcsToUtf8(ucs, buffer_return + l); } + if (i + 1 == len) { + l += XConvertUcsToUtf8((unsigned int)buf[i], buffer_return + l); + } free(buf); return l; } @@ -337,7 +381,8 @@ XConvertEucToUtf8(const char* locale, int len, int bytes_buffer) { - if (!locale/* || strstr(locale, "UTF") || strstr(locale, "utf")*/) { + //if (!locale/* || strstr(locale, "UTF") || strstr(locale, "utf")*/) { + if (!locale || strstr(locale, "UTF") || strstr(locale, "utf")) { return len; } @@ -345,6 +390,8 @@ XConvertEucToUtf8(const char* locale, return XConvertEucJpToUtf8(buffer_return, len); } else if (strstr(locale, "Big5") || strstr(locale, "big5")) { /* BIG5 */ return XConvertBig5ToUtf8(buffer_return, len); + } else if (strstr(locale, "GBK") || strstr(locale, "gbk")) { + return XConvertCp936extToUtf8(buffer_return, len); } else if (strstr(locale, "zh") || strstr(locale, "chinese-")) { if (strstr(locale, "TW") || strstr(locale, "chinese-t")) { if (strstr(locale, "EUC") || strstr(locale, "euc") || strstr(locale, "chinese-t")) { |
