diff options
| author | Fabien Costantini <fabien@onepost.net> | 2008-11-05 01:55:17 +0000 |
|---|---|---|
| committer | Fabien Costantini <fabien@onepost.net> | 2008-11-05 01:55:17 +0000 |
| commit | 8094043bcb0a2b010bae7cfc461fd3886f710383 (patch) | |
| tree | 4147cdcd9cd516d4ee08222a6c938262566d42fd /src | |
| parent | a26fe4d3200b0dd2f1edf60161579a72d1296ba5 (diff) | |
Added fast utf8 string detection routine fl_is_valid_utf8(). Solves the drawing problem in #2080 related fl_expand_text() use. The particularity of this routine is that the complete scan is cached so that it executes 0 to 1 time in the fl_expand_text() loop. Now chinese example in str 2080 seems to works perfectly here. Still, incorrect windows title in Win32 need to be adressed.
git-svn-id: file:///fltk/svn/fltk/branches/branch-1.3@6497 ea41ed52-d2ee-0310-a9c1-e6b18d33e121
Diffstat (limited to 'src')
| -rw-r--r-- | src/fl_draw.cxx | 56 |
1 files changed, 54 insertions, 2 deletions
diff --git a/src/fl_draw.cxx b/src/fl_draw.cxx index e4a617b85..3815ab657 100644 --- a/src/fl_draw.cxx +++ b/src/fl_draw.cxx @@ -55,6 +55,56 @@ static char* underline_at; // Sets n to the number of characters put into the buffer. // Sets width to the width of the string in the current font. +#define C_IN(c,a,b) ((c)>=(a) && (c)<=(b)) +#define C_UTF8(c) C_IN(c,0x80,0xBF) + +/** fast utf8 string detection routine. \reval 0 if not utf8, 1 otherwise */ +int fl_is_valid_utf8(int& init_scan, int& scan_ret, const char* s) { + if (init_scan) return scan_ret; // scan only once the string + init_scan=1; + if ( !s || !(*s) ) return 0; + + register const unsigned char* p=(const unsigned char*)s; + while (*p) { + if ( p[0]==0x09 || p[0]==0x0d || p[0]==0x0a || (p[0]>0x1f && p[0]<0x80) ) { + p++; + continue; // Ascii + } + if ( C_IN(p[0], 0xc2, 0xdf) && C_UTF8(p[1]) ) { + p+=2; + continue; // non-overlong 2-byte + } + if ( p[0]==0xe0 && C_IN(p[1], 0xa0, 0xbf) && C_UTF8(p[2]) ) { + p+=3; + continue; // excluding overlongs + } + if (p[0]==0xed && C_IN(p[1], 0x80, 0x9f) && C_UTF8(p[2]) ) { + p+=3; + continue; // excluding surrogates + } + if (p[0]!=0xed && C_IN(p[0], 0xe1, 0xef) && C_UTF8(p[1]) && C_UTF8(p[2]) ) { + p+=3; + continue; // straight 3-byte + } + if (p[0]==0xf0 && C_IN(p[1], 0x90, 0xbf) && C_UTF8(p[2]) && C_UTF8(p[3]) ) { + p+=4; + continue; // planes 1-3 + } + if (C_IN(p[0], 0xf1, 0xf3) && C_UTF8(p[1]) && C_UTF8(p[2]) && C_UTF8(p[3]) ) { + p+=4; + continue; // planes 4-15 + } + if (p[0]==0xf4 && C_IN(p[1], 0x80, 0x8f) && C_UTF8(p[2]) && C_UTF8(p[3]) ) { + p+=4; + continue; // planes 16 + } + scan_ret=0; + return scan_ret; // not utf8 + } + scan_ret=1; + return scan_ret; +} + const char* fl_expand_text(const char* from, char* buf, int maxbuf, double maxw, int& n, double &width, int wrap, int draw_symbols) { @@ -65,6 +115,8 @@ fl_expand_text(const char* from, char* buf, int maxbuf, double maxw, int& n, const char* word_start = from; double w = 0; + int init_scan=0, scan_ret; + const char* p = from; for (;; p++) { @@ -99,9 +151,9 @@ fl_expand_text(const char* from, char* buf, int maxbuf, double maxw, int& n, *o++ = '^'; *o++ = c ^ 0x40; #ifdef __APPLE__ - } else if (c == 0xCA) { // non-breaking space in MacRoman + } else if (c == 0xCA && !fl_is_valid_utf8(init_scan, scan_ret,from) ) { // non-breaking space in MacRoman #else - } else if (c == 0xA0) { // non-breaking space in ISO 8859 + } else if (c == 0xA0 && !fl_is_valid_utf8(init_scan, scan_ret,from) ) { // non-breaking space in ISO 8859 #endif *o++ = ' '; } else if (c == '@' && draw_symbols) { // Symbol??? |
