// // "$Id$" // // Unicode to UTF-8 conversion functions. // // Author: Jean-Marc Lienher ( http://oksid.ch ) // Copyright 2000-2010 by O'ksi'D. // Copyright 2016 by Bill Spitzak and others. // // This library is free software. Distribution and use rights are outlined in // the file "COPYING" which should have been included with this file. If this // file is missing or damaged, see the license at: // // http://www.fltk.org/COPYING.php // // Please report all bugs and problems on the following page: // // http://www.fltk.org/str.php // #include #include #include #include #include #include #undef fl_open extern "C" { int XUtf8Tolower(int ucs); // in src/xutf8/case.c unsigned short XUtf8IsNonSpacing(unsigned int ucs); // in src/xutf8/is_spacing.c } /** \addtogroup fl_unicode @{ */ // *** NOTE : All functions are LIMITED to 24 bits Unicode values !!! *** // *** But only 16 bits are really used under Linux and win32 *** #define NBC 0xFFFF + 1 static int Toupper(int ucs) { long i; static unsigned short *table = NULL; if (!table) { table = (unsigned short*) malloc( sizeof(unsigned short) * (NBC)); for (i = 0; i < NBC; i++) { table[i] = (unsigned short) i; } for (i = 0; i < NBC; i++) { int l; l = XUtf8Tolower(i); if (l != i) table[l] = (unsigned short) i; } } if (ucs >= NBC || ucs < 0) return ucs; return table[ucs]; } /** Returns the byte length of the UTF-8 sequence with first byte \p c, or -1 if \p c is not valid. This function is helpful for finding faulty UTF-8 sequences. \see fl_utf8len1 */ int fl_utf8len(char c) { if (!(c & 0x80)) return 1; if (c & 0x40) { if (c & 0x20) { if (c & 0x10) { if (c & 0x08) { if (c & 0x04) { return 6; } return 5; } return 4; } return 3; } return 2; } return -1; } // fl_utf8len /** Returns the byte length of the UTF-8 sequence with first byte \p c, or 1 if \p c is not valid. This function can be used to scan faulty UTF-8 sequences, albeit ignoring invalid codes. \see fl_utf8len */ int fl_utf8len1(char c) { if (!(c & 0x80)) return 1; if (c & 0x40) { if (c & 0x20) { if (c & 0x10) { if (c & 0x08) { if (c & 0x04) { return 6; } return 5; } return 4; } return 3; } return 2; } return 1; } // fl_utf8len1 /** Returns the number of Unicode chars in the UTF-8 string. */ int fl_utf_nb_char( const unsigned char *buf, int len) { int i = 0; int nbc = 0; while (i < len) { int cl = fl_utf8len((buf+i)[0]); if (cl < 1) cl = 1; nbc++; i += cl; } return nbc; } /** UTF-8 aware strncasecmp - converts to lower case Unicode and tests. \param s1, s2 the UTF-8 strings to compare \param n the maximum number of UTF-8 characters to compare \return result of comparison \retval 0 if the strings are equal \retval >0 if s1 is greater than s2 \retval <0 if s1 is less than s2 */ int fl_utf_strncasecmp(const char *s1, const char *s2, int n) { int i; for (i = 0; i < n; i++) { int l1, l2; unsigned int u1, u2; if (*s1==0 && *s2==0) return 0; // all compared equal, return 0 u1 = fl_utf8decode(s1, 0, &l1); u2 = fl_utf8decode(s2, 0, &l2); int res = XUtf8Tolower(u1) - XUtf8Tolower(u2); if (res) return res; s1 += l1; s2 += l2; } return 0; } /** UTF-8 aware strcasecmp - converts to Unicode and tests. \return result of comparison \retval 0 if the strings are equal \retval 1 if s1 is greater than s2 \retval -1 if s1 is less than s2 */ int fl_utf_strcasecmp(const char *s1, const char *s2) { return fl_utf_strncasecmp(s1, s2, 0x7fffffff); } /** Returns the Unicode lower case value of \p ucs. */ int fl_tolower(unsigned int ucs) { return XUtf8Tolower(ucs); } /** Returns the Unicode upper case value of \p ucs. */ int fl_toupper(unsigned int ucs) { return Toupper(ucs); } /** Converts the string \p str to its lower case equivalent into buf. Warning: to be safe buf length must be at least 3 * len [for 16-bit Unicode] */ int fl_utf_tolower(const unsigned char *str, int len, char *buf) { int i; int l = 0; char *end = (char *)&str[len]; for (i = 0; i < len;) { int l1, l2; unsigned int u1; u1 = fl_utf8decode((const char*)(str + i), end, &l1); l2 = fl_utf8encode((unsigned int) XUtf8Tolower(u1), buf + l); if (l1 < 1) { i += 1; } else { i += l1; } if (l2 < 1) { l += 1; } else { l += l2; } } return l; } /** Converts the string \p str to its upper case equivalent into buf. Warning: to be safe buf length must be at least 3 * len [for 16-bit Unicode] */ int fl_utf_toupper(const unsigned char *str, int len, char *buf) { int i; int l = 0; char *end = (char *)&str[len]; for (i = 0; i < len;) { int l1, l2; unsigned int u1; u1 = fl_utf8decode((const char*)(str + i), end, &l1); l2 = fl_utf8encode((unsigned int) Toupper(u1), buf + l); if (l1 < 1) { i += 1; } else { i += l1; } if (l2 < 1) { l += 1; } else { l += l2; } } return l; } /** Returns true if the Unicode character \p ucs is non-spacing. Non-spacing characters in Unicode are typically combining marks like tilde (~), diaeresis (¨), or other marks that are added to a base character, for instance 'a' (base character) + '¨' (combining mark) = 'ä' (German Umlaut). - http://unicode.org/glossary/#base_character - http://unicode.org/glossary/#nonspacing_mark - http://unicode.org/glossary/#combining_character */ unsigned int fl_nonspacing(unsigned int ucs) { return (unsigned int) XUtf8IsNonSpacing(ucs); } /** Converts UTF-8 string \p s to a local multi-byte character string. */ char * fl_utf2mbcs(const char *s) { return Fl_System_Driver::driver()->utf2mbcs(s); } /** Cross-platform function to get environment variables with a UTF-8 encoded name or value. This function is especially useful under the MSWindows platform where non-ASCII environment variables are encoded as wide characters. The returned value of the variable is encoded in UTF-8 as well. On platforms other than MSWindows this function calls getenv directly. The return value is returned as-is. \param[in] v the UTF-8 encoded environment variable \return the environment variable in UTF-8 encoding, or NULL in case of error. */ char *fl_getenv(const char* v) { return Fl_System_Driver::driver()->getenv(v); } /** Cross-platform function to open files with a UTF-8 encoded name. This function is especially useful under the MSWindows platform where the standard open() function fails with UTF-8 encoded non-ASCII filenames. \param f the UTF-8 encoded filename \param oflags other arguments are as in the standard open() function \return a file descriptor upon successful completion, or -1 in case of error. \sa fl_fopen(). */ int fl_open(const char* f, int oflags, ...) { int pmode; va_list ap; va_start(ap, oflags); pmode = va_arg (ap, int); va_end(ap); return Fl_System_Driver::driver()->open(f, oflags, pmode); } /** Cross-platform function to open files with a UTF-8 encoded name. This function is especially useful under the MSWindows platform where the standard fopen() function fails with UTF-8 encoded non-ASCII filenames. \param f the UTF-8 encoded filename \param mode same as the second argument of the standard fopen() function \return a FILE pointer upon successful completion, or NULL in case of error. \sa fl_open(). */ FILE *fl_fopen(const char* f, const char *mode) { return Fl_System_Driver::driver()->fopen(f, mode); } /** Cross-platform function to run a system command with a UTF-8 encoded string. This function is especially useful under the MSWindows platform where non-ASCII program (file) names must be encoded as wide characters. On platforms other than MSWindows this function calls system() directly. \param[in] cmd the UTF-8 encoded command string \return the return value of _wsystem() on Windows or system() on other platforms. */ int fl_system(const char* cmd) { return Fl_System_Driver::driver()->system(cmd); } int fl_execvp(const char *file, char *const *argv) { return Fl_System_Driver::driver()->execvp(file, argv); } /** Cross-platform function to set a files mode() with a UTF-8 encoded name or value. This function is especially useful under the MSWindows platform where the standard chmod() function fails with UTF-8 encoded non-ASCII filenames. \param[in] f the UTF-8 encoded filename \param[in] mode the mode to set \return the return value of _wchmod() on Windows or chmod() on other platforms. */ int fl_chmod(const char* f, int mode) { return Fl_System_Driver::driver()->chmod(f, mode); } /** Cross-platform function to test a files access() with a UTF-8 encoded name or value. This function is especially useful under the MSWindows platform where the standard access() function fails with UTF-8 encoded non-ASCII filenames. \param[in] f the UTF-8 encoded filename \param[in] mode the mode to test \return the return value of _waccess() on Windows or access() on other platforms. */ int fl_access(const char* f, int mode) { return Fl_System_Driver::driver()->access(f, mode); } /** Cross-platform function to stat() a file using a UTF-8 encoded name or value. This function is especially useful under the MSWindows platform where the standard stat() function fails with UTF-8 encoded non-ASCII filenames. \param[in] f the UTF-8 encoded filename \param b the stat struct to populate \return the return value of _wstat() on Windows or stat() on other platforms. */ int fl_stat(const char* f, struct stat *b) { return Fl_System_Driver::driver()->stat(f, b); } // TODO: add fl_chdir if we have fl_getcwd /** Cross-platform function to get the current working directory as a UTF-8 encoded value. This function is especially useful under the MSWindows platform where the standard _wgetcwd() function returns UTF-16 encoded non-ASCII filenames. \param b the buffer to populate \param l the length of the buffer \return the CWD encoded as UTF-8. */ char *fl_getcwd(char* b, int l) { if (b == NULL) { b = (char*) malloc(l+1); } return Fl_System_Driver::driver()->getcwd(b, l); } /** Cross-platform function to unlink() (that is, delete) a file using a UTF-8 encoded filename. This function is especially useful under the MSWindows platform where the standard function expects UTF-16 encoded non-ASCII filenames. \param f the filename to unlink \return the return value of _wunlink() on Windows or unlink() on other platforms. */ int fl_unlink(const char* f) { return Fl_System_Driver::driver()->unlink(f); } /** Cross-platform function to create a directory with a UTF-8 encoded name. This function is especially useful on the MSWindows platform where the standard _wmkdir() function expects UTF-16 encoded non-ASCII filenames. \param[in] f the UTF-8 encoded filename \param[in] mode the mode of the directory \return the return value of _wmkdir() on Windows or mkdir() on other platforms. */ int fl_mkdir(const char* f, int mode) { return Fl_System_Driver::driver()->mkdir(f, mode); } /** Cross-platform function to remove a directory with a UTF-8 encoded name. This function is especially useful on the MSWindows platform where the standard _wrmdir() function expects UTF-16 encoded non-ASCII filenames. \param[in] f the UTF-8 encoded filename to remove \return the return value of _wrmdir() on Windows or rmdir() on other platforms. */ int fl_rmdir(const char* f) { return Fl_System_Driver::driver()->rmdir(f); } /** Cross-platform function to rename a filesystem object using UTF-8 encoded names. This function is especially useful on the MSWindows platform where the standard _wrename() function expects UTF-16 encoded non-ASCII filenames. \param[in] f the UTF-8 encoded filename to change \param[in] n the new UTF-8 encoded filename to set \return the return value of _wrename() on Windows or rename() on other platforms. */ int fl_rename(const char* f, const char *n) { return Fl_System_Driver::driver()->rename(f, n); } /** Cross-platform function to recursively create a path in the file system. This function creates a \p path in the file system by recursively creating all directories. */ char fl_make_path( const char *path ) { if (fl_access(path, 0)) { const char *s = strrchr( path, '/' ); if ( !s ) return 0; size_t len = (size_t) (s-path); char *p = (char*)malloc( len+1 ); memcpy( p, path, len ); p[len] = 0; fl_make_path( p ); free( p ); fl_mkdir(path, 0700); } return 1; } /** Cross-platform function to create a path for the file in the file system. This function strips the filename from the given \p path and creates a path in the file system by recursively creating all directories. */ void fl_make_path_for_file( const char *path ) { const char *s = strrchr( path, '/' ); if ( !s ) return; size_t len = (s-path); char *p = (char*)malloc( len+1 ); memcpy( p, path, len ); p[len] = 0; fl_make_path( p ); free( p ); } // fl_make_path_for_file() /** @} */ // // End of "$Id$". //