summaryrefslogtreecommitdiff
path: root/src/xutf8/utils
diff options
context:
space:
mode:
Diffstat (limited to 'src/xutf8/utils')
-rw-r--r--src/xutf8/utils/INSTALL26
-rw-r--r--src/xutf8/utils/Makefile22
-rw-r--r--src/xutf8/utils/README14
-rwxr-xr-xsrc/xutf8/utils/case.sh107
-rw-r--r--src/xutf8/utils/conv_gen.c170
-rw-r--r--src/xutf8/utils/convert_map.c174
-rw-r--r--src/xutf8/utils/create_table.c89
-rw-r--r--src/xutf8/utils/euc_tw.c61
-rwxr-xr-xsrc/xutf8/utils/non_spacing.sh103
-rwxr-xr-xsrc/xutf8/utils/tbl_gen.sh183
10 files changed, 949 insertions, 0 deletions
diff --git a/src/xutf8/utils/INSTALL b/src/xutf8/utils/INSTALL
new file mode 100644
index 000000000..a9fbd7cf3
--- /dev/null
+++ b/src/xutf8/utils/INSTALL
@@ -0,0 +1,26 @@
+
+Default install :
+
+ tar -xvzf Xutf8.tar.gz
+ cd Xutf8
+ ./configure --prefix=/usr
+ su -c "make install"
+
+
+
+Install with tables generation :
+
+ You must have a copy of http://www.unicode.org/Public/MAPPINGS/ in
+ the ./MAPPINGS/ directory and a copy of UnicodeData-2.txt in ./
+ to generate the conversion tables.
+
+ To add a table, edit utils/tbl_gen.sh and utils/convert_map.c.
+
+ tar -xvzf Xutf8.tar.gz
+ cd Xutf8
+ ./configure
+ cd utils
+ make
+ cd ..
+ su -c "make install"
+
diff --git a/src/xutf8/utils/Makefile b/src/xutf8/utils/Makefile
new file mode 100644
index 000000000..3bed422c7
--- /dev/null
+++ b/src/xutf8/utils/Makefile
@@ -0,0 +1,22 @@
+euc_tw: euc_tw.c
+ gcc euc_tw.c -o euc_tw
+
+all: convert_map create_table conv_gen
+ ./tbl_gen.sh
+ ./non_spacing.sh
+ ./case.sh
+
+conv_gen: conv_gen.c
+ gcc conv_gen.c -o conv_gen
+
+convert_map: convert_map.c
+ gcc convert_map.c -o convert_map
+
+create_table: create_table.c
+ gcc create_table.c -o create_table
+
+clean:
+ rm -f *.o convert_map create_table conv_gen euc_tw
+
+
+
diff --git a/src/xutf8/utils/README b/src/xutf8/utils/README
new file mode 100644
index 000000000..5f6771d7a
--- /dev/null
+++ b/src/xutf8/utils/README
@@ -0,0 +1,14 @@
+tbl_gen.sh :
+ shell script that convert unicode.org mappings tables to c headers.
+ Also creates unicode2fontmap.c.
+
+conv_gen.c:
+ create unicode2fontmap.c functions.
+
+convert_map.c:
+ convert diffrent unicode.org mappings to a unique format.
+
+create_table.c:
+ convert text files created by convert_map.c to to c tables.
+
+
diff --git a/src/xutf8/utils/case.sh b/src/xutf8/utils/case.sh
new file mode 100755
index 000000000..a824ad638
--- /dev/null
+++ b/src/xutf8/utils/case.sh
@@ -0,0 +1,107 @@
+#!/bin/sh
+
+
+nopsc=`grep 'CAPITAL' ../UnicodeData-2.txt`
+
+IFS="
+"
+
+#echo "#
+# List of case chars
+#
+#
+# Format: Three tab-separated columns
+# Column #1 is the non-spacing Unicode (in hex as 0xXXXX)
+# Column #2 is the spacing Unicode (in hex as 0xXXXX)
+# Column #3 the Unicode name (follows a comment sign, '#')
+# " > case.txt
+
+rm -f case.txt
+
+
+for line in ${nopsc}
+do
+ ucs=`echo ${line} | cut -d\; -f1`
+ name=`echo ${line} | cut -d\; -f2 | cut -d\; -f1| sed s/CAPITAL/SMALL/`
+ small=`grep ";${name};" ../UnicodeData-2.txt`
+ if test "X${small}" != X ;then
+ tbl=`echo ${small} | cut -d\; -f1`
+ # echo "0x${ucs} 0x${tbl} # ${name}" >> space.txt
+ echo "/* U+${ucs} */ 0x${tbl}," >> case.txt
+ else
+ # echo "0x${ucs} 0x${ucs} # ${name}" >> space.txt
+ echo "/* U+${ucs} */ 0x0," >> case.txt
+ fi
+done
+
+unset nospc
+
+# echo "/* EOF */" >> space.txt
+
+cat case.txt | ./create_table "spacing" > "../headers/case.h" 2> ../headers/case_tbl.txt
+
+rm -f case.txt
+
+cat >../case.c << ENDOFTEXT
+/******************************************************************************
+ Copyright 2001 by O'ksi'D
+
+Permission to use, copy, modify, distribute, and sell this software
+and its documentation for any purpose is hereby granted without fee,
+provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear
+in supporting documentation, and that the name of O'ksi'D
+not be used in advertising or publicity pertaining to distribution
+of the software without specific, written prior permission.
+O'ksi'D makes no representations about the suitability of
+this software for any purpose. It is provided "as is" without
+express or implied warranty.
+
+O'ksi'D DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
+IN NO EVENT SHALL O'ksi'D BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
+
+ Author: Jean-Marc Lienher ( http://oksid.ch )
+
+******************************************************************************/
+
+#include "headers/case.h"
+
+ENDOFTEXT
+
+echo "
+int
+XUtf8Tolower(
+ int ucs)
+{
+ int ret;
+" >>../case.c
+
+tables=`cat ../headers/case_tbl.txt`
+
+for line in ${tables}
+do
+ tbl=`echo ${line} | cut -d']' -f1`
+ bot=`echo ${line} | cut -d'_' -f3 | cut -d'[' -f1`
+ eot=`echo ${line} | cut -d' ' -f2 | cut -d'+' -f2 | cut -d' ' -f1`
+ echo "\
+ if (ucs <= 0x${eot}) {
+ if (ucs >= 0x${bot}) {
+ ret = ${tbl}ucs - 0x${bot}];
+ if (ret > 0) return ret;
+ }
+ return ucs;
+ }
+" >>../case.c
+
+done
+
+echo " return ucs;
+}
+" >>../case.c
+
+
diff --git a/src/xutf8/utils/conv_gen.c b/src/xutf8/utils/conv_gen.c
new file mode 100644
index 000000000..26c03589f
--- /dev/null
+++ b/src/xutf8/utils/conv_gen.c
@@ -0,0 +1,170 @@
+/******************************************************************************
+ *
+ * generate the "if(){} else if ..." structure of ucs2fontmap()
+ *
+ * Copyright (c) 2000 O'ksi'D
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Author : Jean-Marc Lienher ( http://oksid.ch )
+ *
+ ******************************************************************************/
+
+#include <wchar.h>
+#include <stdio.h>
+char buffer[1000000];
+
+int main(int argc, char **argv)
+{
+ char buf[80];
+ int len;
+ char *encode[256];
+ int encode_number = 0;
+ unsigned int i = 0;
+ unsigned char *ptr;
+ unsigned char *lst = "";
+ size_t nb;
+ int nbb = 0;
+ len = fread(buffer, 1, 1000000, stdin);
+
+ puts(" ");
+ puts(" /*************** conv_gen.c ************/");
+ buffer[len] = '\0';
+ ptr = buffer;
+
+ printf("const int ucs2fontmap"
+ "(char *s, unsigned int ucs, int enc)\n");
+ printf("{\n");
+ printf(" switch(enc) {\n");
+ printf(" case 0:\n");
+ printf(" s[0] = (char) ((ucs & 0xFF00) >> 8);\n");
+ printf(" s[1] = (char) (ucs & 0xFF);\n");
+ printf(" return 0;");
+ while (len > 0) {
+ unsigned char *p = ptr;
+ unsigned char *f, *t;
+
+ while (*p != ']') {
+ i++;
+ p++;
+ }
+ *(p - 1) = '\0';
+ *(p - 6) = '\0';
+ f = p - 5;
+ while (*p != '+') { i++; p++;}
+ p++;
+ t = p;
+ *(p + 4) = '\0';
+ if (strcmp(lst, ptr)) {
+ encode_number++;
+ encode[encode_number] = ptr;
+ printf("\n break;");
+ printf("\n case %d:\n", encode_number);
+ printf(" ");
+ } else {
+ printf(" else ");
+ }
+ lst = ptr;
+ printf("if (ucs <= 0x%s) {\n", t);
+ printf(" if (ucs >= 0x%s) {\n", f);
+ if (*(f - 3) == '2') {
+ printf(" int i = (ucs - 0x%s) * 2;\n", f);
+ printf(" s[0] = %s_%s[i++];\n", ptr, f, f);
+ printf(" s[1] = %s_%s[i];\n", ptr, f, f);
+ printf(" if (s[0] || s[1]) return %d;\n",
+ encode_number);
+ } else {
+ printf(" s[0] = 0;\n");
+ printf(" s[1] = %s_%s[ucs - 0x%s];\n",
+ ptr, f, f);
+ printf(" if (s[1]) return %d;\n", encode_number);
+ }
+ printf(" }\n");
+ printf(" }");
+ while (*ptr != '\n') {
+ ptr++;
+ len--;
+ }
+ ptr++;
+ len--;
+ }
+ printf("\n break;\n");
+ printf("\n default:\n");
+ printf(" break;\n");
+ printf(" };\n");
+ printf(" return -1;\n");
+ printf("};\n\n");
+
+ printf("const int encoding_number(const char *enc)\n{\n");
+ printf(" if (!enc || !strcmp(enc, \"iso10646-1\")) {\n");
+ printf(" return 0;\n");
+ i = 1;
+ while (i <= encode_number) {
+ int l;
+ char *ptr;
+ l = strlen(encode[i]) - 3;
+ ptr = encode[i] + l;
+ *(ptr) = '\0';
+ ptr--;
+ while (ptr != encode[i]) {
+ if (*ptr == '_') {
+ *ptr = '-';
+ ptr--;
+ break;
+ }
+ ptr--;
+ }
+ while (ptr != encode[i]) {
+ if (*ptr == '_') {
+ *ptr = '.';
+ }
+ ptr--;
+ }
+ printf(" } else if (!strcmp(enc, \"%s\")", encode[i] +11);
+
+ if (!strcmp(encode[i] + 11, "big5-0")) {
+ printf(" || !strcmp(enc, \"big5.eten-0\")");
+ } else if (!strcmp(encode[i] + 11, "dingbats")) {
+ printf(" || !strcmp(enc, \"zapfdingbats\")");
+ printf(" || !strcmp(enc, \"zapf dingbats\")");
+ printf(" || !strcmp(enc, \"itc zapf dingbats\")");
+ } else if (!strcmp(encode[i] + 11, "jisx0208.1983-0")) {
+ printf(" || !strcmp(enc, \"jisx0208.1990-0\")");
+ }
+
+ printf(") {\n");
+ printf(" return %d;\n", i);
+ i++;
+ }
+ printf(" };\n");
+ printf(" return -1;\n");
+ printf("};\n\n");
+
+
+ printf("/*\n");
+ printf("const char *encoding_name(int num)\n{\n");
+ printf(" switch (num) {\n");
+ i = 1;
+ while (i <= encode_number) {
+ printf(" case %d:\n", i);
+ printf(" return \"%s\";\n", encode[i] + 11);
+ i++;
+ }
+ printf(" };\n");
+ printf(" return \"iso10646-1\";\n");
+ printf("};\n\n");
+ printf("*/\n");
+ return 0;
+}
diff --git a/src/xutf8/utils/convert_map.c b/src/xutf8/utils/convert_map.c
new file mode 100644
index 000000000..9e4a23e10
--- /dev/null
+++ b/src/xutf8/utils/convert_map.c
@@ -0,0 +1,174 @@
+/******************************************************************************* * $Id: $
+ *
+ * read the http://www.unicode.org/Public/MAPPINGS/ and create something
+ * usable in C.
+ *
+ * Copyright (c) 2000 O'ksi'D
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Author : Jean-Marc Lienher ( http://oksid.ch )
+ *
+ ******************************************************************************/
+
+#include <wchar.h>
+#include <stdio.h>
+
+char buffer[1000000];
+
+int JIS0208(unsigned char * ptr)
+{
+ int i = 0;
+ unsigned int fmap;
+ unsigned int ucs;
+ while(*ptr != '\t') { ptr++; i++; }
+ ptr++; i++; *(ptr+6) = '\0';
+ fmap = (unsigned int)strtoul(ptr, NULL, 16);
+ while(*ptr != '\0') { ptr++; i++; }
+ i++; ptr++; *(ptr+6) = '\0';
+ ucs = (unsigned int)strtoul(ptr, NULL, 16);
+ if (ucs) printf("/* U+%04X */ 0x%02X, 0x%02X,\n", ucs,
+ (fmap & 0xFF00) >> 8, fmap & 0xFF);
+ while(*ptr != '\0') { ptr++; i++; }
+ i++; ptr++;
+ while(*ptr != '\n') { ptr++; i++; }
+ i++;
+ return i;
+}
+
+int JIS0201(unsigned char * ptr)
+{
+ int i = 0;
+ unsigned int fmap;
+ unsigned int ucs;
+ *(ptr+4) = '\0';
+ fmap = (unsigned int)strtoul(ptr, NULL, 16);
+ while(*ptr != '\0') { ptr++; i++; }
+ i++; ptr++; *(ptr+6) = '\0';
+ ucs = (unsigned int)strtoul(ptr, NULL, 16);
+ if (*(ptr + 1) != 'x') {
+ printf("/* EOF */\n");
+ abort();
+ }
+ if (ucs) printf("/* U+%04X */ 0x%02X,\n", ucs, (unsigned char)fmap);
+ while(*ptr != '\0') { ptr++; i++; }
+ i++; ptr++;
+ while(*ptr != '\n') { ptr++; i++; }
+ i++;
+ return i;
+}
+
+int ADOBE(unsigned char * ptr)
+{
+ int i = 0;
+ unsigned int fmap;
+ unsigned int ucs;
+ *(ptr+4) = '\0';
+ ucs = (unsigned int)strtoul(ptr, NULL, 16);
+ while(*ptr != '\0') { ptr++; i++; }
+ i++; ptr++; *(ptr+2) = '\0';
+ fmap = (unsigned int)strtoul(ptr, NULL, 16);
+ if (fmap < 1) {
+ printf("/* EOF */\n");
+ abort();
+ }
+ if (ucs) printf("/* U+%04X */ 0x%02X,\n", ucs, (unsigned char)fmap);
+ while(*ptr != '\0') { ptr++; i++; }
+ i++; ptr++;
+ while(*ptr != '\n') { ptr++; i++; }
+ i++;
+ return i;
+}
+
+
+int JIS0212(unsigned char * ptr)
+{
+ int i = 0;
+ unsigned int fmap;
+ unsigned int ucs;
+ *(ptr+6) = '\0';
+ fmap = (unsigned int)strtoul(ptr, NULL, 16);
+ ptr += 7;
+ i += 7;
+ while(*ptr == ' ') { ptr++; i++; }
+ //i++; ptr++;
+ *(ptr+6) = '\0';
+ ucs = (unsigned int)strtoul(ptr, NULL, 16);
+ if (*(ptr + 1) != 'x') {
+ printf("/* EOF */\n");
+ abort();
+ }
+ if (ucs) printf("/* U+%04X */ 0x%02X, 0x%02X,\n", ucs,
+ (fmap & 0xFF00) >> 8, fmap & 0xFF);
+ while(*ptr != '\0') { ptr++; i++; }
+ i++; ptr++;
+ while(*ptr != '\n') { ptr++; i++; }
+ i++;
+ return i;
+}
+
+int main(int argc, char **argv)
+{
+ char buf[80];
+ int len;
+ int i;
+ unsigned char *ptr;
+ size_t nb;
+ len = fread(buffer, 1, 1000000, stdin);
+
+ buffer[len] = '\0';
+ ptr = (unsigned char *)buffer;
+ while (*ptr !='\n') {ptr++; len--;};
+ ptr++; len--;
+ while (*ptr == '#') {
+ while (*ptr !='\n') {
+ ptr++;
+ len--;
+ }
+ ptr++;
+ len--;
+ }
+
+
+ while (len > 0) {
+ nb = 0;
+ if (!strcmp("jisx0208.1983-0", argv[1])) {
+ nb = JIS0208(ptr);
+ } else if (!strcmp("jisx0201.1976-0", argv[1])) {
+ nb = JIS0201(ptr);
+ } else if (!strcmp("jisx0212.1990-0", argv[1])) {
+ nb = JIS0212(ptr);
+ } else if (!strcmp("gb2312.1980-0", argv[1])) {
+ nb = JIS0212(ptr);
+ } else if (!strcmp("ksc5601.1987-0", argv[1])) {
+ nb = JIS0212(ptr);
+ } else if (!strcmp("big5-0", argv[1])) {
+ nb = JIS0212(ptr);
+ } else if (!strncmp("iso8859", argv[1], 7)) {
+ nb = JIS0201(ptr);
+ } else if (!strcmp("koi8-1", argv[1])) {
+ nb = JIS0201(ptr);
+ } else if (!strcmp("dingbats", argv[1]) ||
+ !strcmp("symbol", argv[1]))
+ {
+ nb = ADOBE(ptr);
+ } else {
+ len = 0;
+ }
+ ptr += nb;
+ len = len - nb;
+ }
+ return 0;
+}
diff --git a/src/xutf8/utils/create_table.c b/src/xutf8/utils/create_table.c
new file mode 100644
index 000000000..5bc702952
--- /dev/null
+++ b/src/xutf8/utils/create_table.c
@@ -0,0 +1,89 @@
+#include <wchar.h>
+#include <stdio.h>
+char buffer[1000000];
+
+/*** you can try to modifie this value to have better performences **/
+#define MAX_DELTA 0x80
+
+int main(int argc, char **argv)
+{
+ char buf[80];
+ int len;
+ unsigned int i = 0;
+ unsigned char *ptr;
+ size_t nb;
+ int nbb = 0;
+ len = fread(buffer, 1, 1000000, stdin);
+
+ buffer[len] = '\0';
+ ptr = (unsigned char *)buffer;
+ while (*ptr != '\n') ptr++;
+ ptr++;
+ while (*ptr != '\n') {
+ if (*ptr == ',') nbb++;
+ ptr++;
+ }
+ ptr = (unsigned char *)buffer;
+ printf("/* %s */\n", argv[1]);
+ while (len > 0) {
+ unsigned int ucs = 0;
+ char *p = ptr;
+ char pp[20];
+ nb = 0;
+ pp[0] = '\0';
+ while (*p != 'U') p++;
+ strncat(pp, p, 6);
+ *pp = '0';
+ *(pp+1) = 'x';
+ ucs = (unsigned int)strtoul(pp, NULL, 16);;
+ if (ucs < 1) {
+ printf("ERROR %d %d\n", len, ucs);
+ abort();
+ }
+ if (i != ucs - 1 || !i) {
+ if ((ucs - i) > MAX_DELTA || !i) {
+ if (i) {
+ printf("};\n");
+ fprintf(stderr, "\t/* end: U+%04X */\n",
+ i);
+ }
+ if (strcmp(argv[1], "spacing")) {
+ printf("\nstatic const char"
+ " unicode_to_%s_%db_%04X[]"
+ " = {\n", argv[1], nbb, ucs);
+ fprintf(stderr,
+ "unicode_to_%s_%db_%04X[]; ",
+ argv[1], nbb, ucs);
+ } else {
+ printf("\nstatic const unsigned short"
+ " ucs_table_%04X[]"
+ " = {\n", ucs);
+ fprintf(stderr,
+ "ucs_table_%04X[]; ",
+ ucs);
+ }
+ } else {
+ while (i < ucs - 1) {
+ i++;
+ if (nbb == 1) {
+ printf("0x00,\n");
+ } else {
+ printf("0x00, 0x00,\n");
+ }
+ };
+ }
+ }
+ i = ucs;
+ while (*ptr != '\n') {
+ printf("%c", *ptr);
+ ptr++;
+ len--;
+ }
+ printf("\n");
+ ptr++;
+ len--;
+ }
+ printf("};\n");
+ fprintf(stderr, "\t/* end: U+%04X */\n", i);
+ return 0;
+}
diff --git a/src/xutf8/utils/euc_tw.c b/src/xutf8/utils/euc_tw.c
new file mode 100644
index 000000000..11c08b617
--- /dev/null
+++ b/src/xutf8/utils/euc_tw.c
@@ -0,0 +1,61 @@
+/******************************************************************************
+ *
+ * generate the "if(){} else if ..." structure of ucs2fontmap()
+ *
+ * Copyright (c) 2000 O'ksi'D
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Author : Jean-Marc Lienher ( http://oksid.ch )
+ *
+ ******************************************************************************/
+
+#include <wchar.h>
+#include <stdio.h>
+#include <iconv.h>
+char uni[0x10000];
+#include "../utf8Utils.c"
+
+int main(int argc, char **argv)
+{
+
+ iconv_t cd;
+
+ int i;
+ cd = iconv_open("EUC-TW", "UTF16");
+ for(i = 0; i < 0x10000; i++) uni[i] = 0;
+ for(i = 0x00000000; i < 0xFFFFFFFF; i++) {
+ char buf[4], ob[6];
+ char *b = buf;
+ int ucs = -1;
+ int l1 = 4, l2 = 6;
+ char *o = ob ;
+ buf[0] = i & 0xff;
+ buf[1] = (i >> 8) & 0xFF;
+ buf[2] = (i >> 16) & 0xFF;
+ buf[3] = (i >> 24) & 0xFF;
+ iconv(cd, NULL, NULL, NULL, NULL);
+ iconv(cd, &b, &l1, &o, &l2);
+ if (l2 != 6) {
+ ucs = (unsigned)ob[0];
+ ucs += (unsigned) (ob[1] << 8);
+ //XConvertUtf8ToUcs((unsigned char*)ob, 6 - l2, &ucs);
+ printf ("%x --> %X\n", i, ucs & 0xFFFF);
+ }
+
+ }
+ iconv_close(cd);
+ return 0;
+}
diff --git a/src/xutf8/utils/non_spacing.sh b/src/xutf8/utils/non_spacing.sh
new file mode 100755
index 000000000..ba86bec0f
--- /dev/null
+++ b/src/xutf8/utils/non_spacing.sh
@@ -0,0 +1,103 @@
+#!/bin/sh
+
+
+nopsc=`grep ';Mn;' ../UnicodeData-2.txt`
+
+IFS="
+"
+
+#echo "#
+# List of non-spacing chars
+#
+#
+# Format: Three tab-separated columns
+# Column #1 is the non-spacing Unicode (in hex as 0xXXXX)
+# Column #2 is the spacing Unicode (in hex as 0xXXXX)
+# Column #3 the Unicode name (follows a comment sign, '#')
+# " > space.txt
+
+rm -f space.txt
+
+
+for line in ${nopsc}
+do
+ ucs=`echo ${line} | cut -d\; -f1`
+ name=`echo ${line} | cut -d\; -f2`
+ space=`grep " 0020 ${ucs};" ../UnicodeData-2.txt`
+ if test "X${space}" != X ;then
+ tbl=`echo ${space} | cut -d\; -f1`
+ # echo "0x${ucs} 0x${tbl} # ${name}" >> space.txt
+ echo "/* U+${ucs} */ 0x${tbl}," >> space.txt
+ else
+ # echo "0x${ucs} 0x${ucs} # ${name}" >> space.txt
+ echo "/* U+${ucs} */ 0x${ucs}," >> space.txt
+ fi
+done
+
+unset nospc
+
+# echo "/* EOF */" >> space.txt
+
+cat space.txt | ./create_table "spacing" > "../headers/spacing.h" 2> ../headers/spacing_tbl.txt
+
+rm -f space.txt
+
+cat >../is_spacing.c << ENDOFTEXT
+/******************************************************************************
+ Copyright 2000-2001 by O'ksi'D
+
+Permission to use, copy, modify, distribute, and sell this software
+and its documentation for any purpose is hereby granted without fee,
+provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear
+in supporting documentation, and that the name of O'ksi'D
+not be used in advertising or publicity pertaining to distribution
+of the software without specific, written prior permission.
+O'ksi'D makes no representations about the suitability of
+this software for any purpose. It is provided "as is" without
+express or implied warranty.
+
+O'ksi'D DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
+IN NO EVENT SHALL O'ksi'D BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
+
+ Author: Jean-Marc Lienher ( http://oksid.ch )
+
+******************************************************************************/
+
+#include "headers/spacing.h"
+
+ENDOFTEXT
+
+echo "
+unsigned short
+XUtf8IsNonSpacing(
+ unsigned int ucs)
+{
+" >>../is_spacing.c
+
+tables=`cat ../headers/spacing_tbl.txt`
+
+for line in ${tables}
+do
+ tbl=`echo ${line} | cut -d']' -f1`
+ bot=`echo ${line} | cut -d'_' -f3 | cut -d'[' -f1`
+ eot=`echo ${line} | cut -d' ' -f2 | cut -d'+' -f2 | cut -d' ' -f1`
+ echo "\
+ if (ucs <= 0x${eot}) {
+ if (ucs >= 0x${bot}) return ${tbl}ucs - 0x${bot}];
+ return 0;
+ }
+" >>../is_spacing.c
+
+done
+
+echo " return 0;
+}
+" >>../is_spacing.c
+
+
diff --git a/src/xutf8/utils/tbl_gen.sh b/src/xutf8/utils/tbl_gen.sh
new file mode 100755
index 000000000..601dba315
--- /dev/null
+++ b/src/xutf8/utils/tbl_gen.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+#/******************************************************************************
+#*
+#* generates ucs2fontmap.c and headers/*_.h
+#*
+#* Copyright (c) 2000-2001 O'ksi'D
+#*
+#* Permission to use, copy, modify, distribute, and sell this software
+#* and its documentation for any purpose is hereby granted without fee,
+#* provided that the above copyright notice appear in all copies and
+#* that both that copyright notice and this permission notice appear
+#* in supporting documentation, and that the name of O'ksi'D
+#* not be used in advertising or publicity pertaining to distribution
+#* of the software without specific, written prior permission.
+#* O'ksi'D makes no representations about the suitability of
+#* this software for any purpose. It is provided "as is" without
+#* express or implied warranty.
+#*
+#* O'ksi'D DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+#* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
+#* IN NO EVENT SHALL O'ksi'D BE LIABLE FOR ANY SPECIAL, INDIRECT
+#* OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+#* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+#* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+#* OR PERFORMANCE OF THIS SOFTWARE.
+#*
+#* Author : Jean-Marc Lienher ( http://oksid.ch )
+#*
+#******************************************************************************/
+
+# iso10646-1
+
+encode="iso8859-1 iso8859-2 iso8859-3 \
+ iso8859-4 iso8859-5 iso8859-6 iso8859-7 iso8859-8 iso8859-9 \
+ iso8859-10 iso8859-13 iso8859-14 iso8859-15 \
+ koi8-1 big5-0 ksc5601.1987-0 gb2312.1980-0 jisx0201.1976-0 \
+ jisx0208.1983-0 jisx0212.1990-0 symbol dingbats"
+
+mkdir -p ../headers/
+rm -f ../headers/* ucs2fontmap
+
+for enc in ${encode}
+do
+ echo ${enc}
+
+ case ${enc} in
+ ksc5601.1987-0)
+# cat ../MAPPINGS/EASTASIA/KSC/KSC5601.TXT | \
+ cat ../MAPPINGS/EASTASIA/KSC/KSX1001.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ koi8-1)
+ cat ../MAPPINGS/VENDORS/MISC/KOI8-R.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ iso8859-14)
+ cat ../MAPPINGS/ISO8859/8859-14.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ iso8859-13)
+ cat ../MAPPINGS/ISO8859/8859-13.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ iso8859-5)
+ cat ../MAPPINGS/ISO8859/8859-5.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ iso8859-6)
+ cat ../MAPPINGS/ISO8859/8859-6.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ iso8859-1)
+ cat ../MAPPINGS/ISO8859/8859-1.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ iso8859-10)
+ cat ../MAPPINGS/ISO8859/8859-10.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ iso8859-15)
+ cat ../MAPPINGS/ISO8859/8859-15.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ iso8859-2)
+ cat ../MAPPINGS/ISO8859/8859-2.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ iso8859-3)
+ cat ../MAPPINGS/ISO8859/8859-3.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ iso8859-4)
+ cat ../MAPPINGS/ISO8859/8859-4.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ iso8859-7)
+ cat ../MAPPINGS/ISO8859/8859-7.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ iso8859-8)
+ cat ../MAPPINGS/ISO8859/8859-8.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ iso8859-9)
+ cat ../MAPPINGS/ISO8859/8859-9.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ dingbats)
+ cat ../MAPPINGS/VENDORS/ADOBE/zdingbat.txt | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ symbol)
+ cat ../MAPPINGS/VENDORS/ADOBE/symbol.txt | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ big5-0)
+ cat ../MAPPINGS/EASTASIA/OTHER/BIG5.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ gb2312.1980-0)
+ cat ../MAPPINGS/EASTASIA/GB/GB2312.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ jisx0212.1990-0)
+ cat ../MAPPINGS/EASTASIA/JIS/JIS0212.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ jisx0208.1983-0)
+ cat ../MAPPINGS/EASTASIA/JIS/JIS0208.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ jisx0201.1976-0)
+ cat ../MAPPINGS/EASTASIA/JIS/JIS0201.TXT | \
+ ./convert_map "${enc}" > ${enc}.txt
+ ;;
+ esac
+ nm=`echo ${enc} |tr '.' '_' | tr '-' '_'`
+ cat ${enc}.txt | sort | uniq | \
+ ./create_table "${nm}" >> ../headers/${nm}_.h 2>> ../headers/tbl.txt
+ rm -f ${enc}.txt
+ enc=" "
+done
+
+cat > ../ucs2fontmap.c << ENDOFTEXT
+/******************************************************************************
+ Copyright 2000-2001 by O'ksi'D
+
+Permission to use, copy, modify, distribute, and sell this software
+and its documentation for any purpose is hereby granted without fee,
+provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear
+in supporting documentation, and that the name of O'ksi'D
+not be used in advertising or publicity pertaining to distribution
+of the software without specific, written prior permission.
+O'ksi'D makes no representations about the suitability of
+this software for any purpose. It is provided "as is" without
+express or implied warranty.
+
+O'ksi'D DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
+IN NO EVENT SHALL O'ksi'D BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
+
+ Author: Jean-Marc Lienher ( http://oksid.ch )
+
+******************************************************************************/
+
+ENDOFTEXT
+
+
+he=`cd ..; ls headers/*.h`
+for hea in ${he}
+do
+ echo "#include \"${hea}\"" >> ../ucs2fontmap.c
+done
+
+
+cat ../headers/tbl.txt | ./conv_gen >> ../ucs2fontmap.c
+
+