diff options
| -rw-r--r-- | CHANGES | 2 | ||||
| -rw-r--r-- | FL/Fl_Text_Buffer.H | 21 | ||||
| -rw-r--r-- | FL/fl_utf8.h | 7 | ||||
| -rw-r--r-- | src/Fl_Text_Buffer.cxx | 6 | ||||
| -rw-r--r-- | src/Fl_Text_Display.cxx | 10 | ||||
| -rw-r--r-- | src/fl_utf8.cxx | 43 |
6 files changed, 52 insertions, 37 deletions
@@ -1,5 +1,7 @@ CHANGES IN FLTK 1.3.0 + - Fixed crashes when detecting illegal utf 8 sequences + in Fl_Text_* widgets (STR #2348) - Fixed Fl_Text_Display Tabulator calculations (STR #2450) - Fixed file access code to use UTF-8 strings (STR #2440) - Fixed ARM Unicode cross compilation issue (STR #2432) diff --git a/FL/Fl_Text_Buffer.H b/FL/Fl_Text_Buffer.H index 29ca2cd9d..3cc65da8d 100644 --- a/FL/Fl_Text_Buffer.H +++ b/FL/Fl_Text_Buffer.H @@ -34,7 +34,7 @@ #define FL_TEXT_BUFFER_H -#define ASSERT_UTF8 +#undef ASSERT_UTF8 #ifdef ASSERT_UTF8 # include <assert.h> @@ -47,22 +47,11 @@ /* - Suggested UTF-8 terminology for this file: - - ?? "length" is the number of characters in a string - ?? "size" is the number of bytes - ?? "index" is the position in a string in number of characters - ?? "offset" is the position in a string in bytes (and must be kept on a charater boundary) - (there seems to be no standard in Uncode documents, howevere "length" is commonly - referencing the number of bytes. Maybe "bytes" and "glyphs" would be the most - obvious way to describe sizes?) - "character size" is the size of a UTF-8 character in bytes - "character width" is the width of a Unicode character in pixels - - "column" was orginally defined as a character offset from the left margin. It was - identical to the byte offset. In UTF-8, we have neither a byte offset nor - truly fixed width fonts (*). Column could be a pixel value multiplied with + "character width" is the width of a Unicode character in pixels + "column" was orginally defined as a character offset from the left margin. + It was identical to the byte offset. In UTF-8, we have neither a byte offset + nor truly fixed width fonts (*). Column could be a pixel value multiplied with an average character width (which is a bearable approximation). * in Unicode, there are no fixed width fonts! Even if the ASCII characters may diff --git a/FL/fl_utf8.h b/FL/fl_utf8.h index fd54b3350..22f8ade0b 100644 --- a/FL/fl_utf8.h +++ b/FL/fl_utf8.h @@ -99,13 +99,16 @@ FL_EXPORT int fl_utf8bytes(unsigned ucs); /* OD: returns the byte length of the first UTF-8 char sequence (returns -1 if not valid) */ FL_EXPORT int fl_utf8len(char c); - + +/* OD: returns the byte length of the first UTF-8 char sequence (returns +1 if not valid) */ +FL_EXPORT int fl_utf8len1(char c); + /* OD: returns the number of Unicode chars in the UTF-8 string */ FL_EXPORT int fl_utf_nb_char(const unsigned char *buf, int len); /* F2: Convert the next UTF8 char-sequence into a Unicode value (and say how many bytes were used) */ FL_EXPORT unsigned fl_utf8decode(const char* p, const char* end, int* len); - + /* F2: Encode a Unicode value into a UTF8 sequence, return the number of bytes used */ FL_EXPORT int fl_utf8encode(unsigned ucs, char* buf); diff --git a/src/Fl_Text_Buffer.cxx b/src/Fl_Text_Buffer.cxx index c70ae1692..d1aea36f1 100644 --- a/src/Fl_Text_Buffer.cxx +++ b/src/Fl_Text_Buffer.cxx @@ -1025,7 +1025,7 @@ int Fl_Text_Buffer::search_forward(int startPos, const char *searchString, *foundPos = startPos; return 1; } - int l = fl_utf8len(c); + int l = fl_utf8len1(c); if (memcmp(sp, address(bp), l)) break; sp += l; bp += l; @@ -1077,7 +1077,7 @@ int Fl_Text_Buffer::search_backward(int startPos, const char *searchString, *foundPos = startPos; return 1; } - int l = fl_utf8len(c); + int l = fl_utf8len1(c); if (memcmp(sp, address(bp), l)) break; sp += l; bp += l; @@ -1602,7 +1602,7 @@ int Fl_Text_Buffer::prev_char(int pos) const int Fl_Text_Buffer::next_char(int pos) const { IS_UTF8_ALIGNED2(this, (pos)) - int n = fl_utf8len(byte_at(pos)); + int n = fl_utf8len1(byte_at(pos)); pos += n; if (pos>=mLength) return mLength; diff --git a/src/Fl_Text_Display.cxx b/src/Fl_Text_Display.cxx index d54bbcca9..c55a0d17c 100644 --- a/src/Fl_Text_Display.cxx +++ b/src/Fl_Text_Display.cxx @@ -753,7 +753,7 @@ void Fl_Text_Display::overstrike(const char* text) { /* determine how many displayed character positions are covered */ startIndent = mBuffer->count_displayed_characters( lineStart, startPos ); indent = startIndent; - for ( c = text; *c != '\0'; c += fl_utf8len(*c) ) + for ( c = text; *c != '\0'; c += fl_utf8len1(*c) ) indent++; endIndent = indent; @@ -1735,7 +1735,7 @@ int Fl_Text_Display::handle_vline( style = position_style(lineStartPos, lineLen, 0); for (i=0; i<lineLen; ) { currChar = lineStr[i]; // one byte is enough to handele tabs and other cases - int len = fl_utf8len(currChar); + int len = fl_utf8len1(currChar); if (len<=0) len = 1; // OUCH! charStyle = position_style(lineStartPos, lineLen, i); if (charStyle!=style || currChar=='\t' || prevChar=='\t') { @@ -1829,7 +1829,7 @@ int Fl_Text_Display::find_x(const char *s, int len, int style, int x) const { // TODO: use binary search which may be quicker. int i = 0; while (i<len) { - int cl = fl_utf8len(s[i]); + int cl = fl_utf8len1(s[i]); int w = int( string_width(s, i+cl, style) ); if (w>x) return i; @@ -3204,7 +3204,7 @@ double Fl_Text_Display::measure_proportional_character(const char *s, int xPix, return (((xPix/tab)+1)*tab) - xPix; } - int charLen = fl_utf8len(*s), style = 0; + int charLen = fl_utf8len1(*s), style = 0; if (mStyleBuffer) { style = mStyleBuffer->byte_at(pos); } @@ -3284,7 +3284,7 @@ int Fl_Text_Display::wrap_uses_character(int lineEndPos) const { c = buffer()->char_at(lineEndPos); return c == '\n' || ((c == '\t' || c == ' ') && - lineEndPos + fl_utf8len(c) < buffer()->length()); + lineEndPos + fl_utf8len1(c) < buffer()->length()); } diff --git a/src/fl_utf8.cxx b/src/fl_utf8.cxx index 94aff0fb8..ccbe98e95 100644 --- a/src/fl_utf8.cxx +++ b/src/fl_utf8.cxx @@ -112,9 +112,11 @@ Toupper( } /** - return the byte length of the UTF-8 sequence with first byte \p c, - or -1 if \p c is not valid. - */ + return the byte length of the UTF-8 sequence with first byte \p c, + or -1 if \p c is not valid. + This function is helpful for finding faulty UTF8 sequences. + \see fl_utf8len1 + */ int fl_utf8len(char c) { if (!(c & 0x80)) return 1; @@ -137,15 +139,34 @@ int fl_utf8len(char c) } // fl_utf8len -#if 0 -int fl_utflen( - const unsigned char *buf, - int len) +/** + Return the byte length of the UTF-8 sequence with first byte \p c, + or 1 if \p c is not valid. + This function can be used to scan faulty UTF8 sequence, albeit ignoring invalid + codes. + \see fl_utf8len + */ +int fl_utf8len1(char c) { - unsigned int ucs; - return fl_utf2ucs(buf, len, &ucs); -} -#endif + if (!(c & 0x80)) return 1; + if (c & 0x40) { + if (c & 0x20) { + if (c & 0x10) { + if (c & 0x08) { + if (c & 0x04) { + return 6; + } + return 5; + } + return 4; + } + return 3; + } + return 2; + } + return 1; +} // fl_utf8len1 + /** returns the number of Unicode chars in the UTF-8 string |
