summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGES2
-rw-r--r--FL/Fl_Text_Buffer.H21
-rw-r--r--FL/fl_utf8.h7
-rw-r--r--src/Fl_Text_Buffer.cxx6
-rw-r--r--src/Fl_Text_Display.cxx10
-rw-r--r--src/fl_utf8.cxx43
6 files changed, 52 insertions, 37 deletions
diff --git a/CHANGES b/CHANGES
index 7cef1a91d..d2347a5f5 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,5 +1,7 @@
CHANGES IN FLTK 1.3.0
+ - Fixed crashes when detecting illegal utf 8 sequences
+ in Fl_Text_* widgets (STR #2348)
- Fixed Fl_Text_Display Tabulator calculations (STR #2450)
- Fixed file access code to use UTF-8 strings (STR #2440)
- Fixed ARM Unicode cross compilation issue (STR #2432)
diff --git a/FL/Fl_Text_Buffer.H b/FL/Fl_Text_Buffer.H
index 29ca2cd9d..3cc65da8d 100644
--- a/FL/Fl_Text_Buffer.H
+++ b/FL/Fl_Text_Buffer.H
@@ -34,7 +34,7 @@
#define FL_TEXT_BUFFER_H
-#define ASSERT_UTF8
+#undef ASSERT_UTF8
#ifdef ASSERT_UTF8
# include <assert.h>
@@ -47,22 +47,11 @@
/*
- Suggested UTF-8 terminology for this file:
-
- ?? "length" is the number of characters in a string
- ?? "size" is the number of bytes
- ?? "index" is the position in a string in number of characters
- ?? "offset" is the position in a string in bytes (and must be kept on a charater boundary)
- (there seems to be no standard in Uncode documents, howevere "length" is commonly
- referencing the number of bytes. Maybe "bytes" and "glyphs" would be the most
- obvious way to describe sizes?)
-
"character size" is the size of a UTF-8 character in bytes
- "character width" is the width of a Unicode character in pixels
-
- "column" was orginally defined as a character offset from the left margin. It was
- identical to the byte offset. In UTF-8, we have neither a byte offset nor
- truly fixed width fonts (*). Column could be a pixel value multiplied with
+ "character width" is the width of a Unicode character in pixels
+ "column" was orginally defined as a character offset from the left margin.
+ It was identical to the byte offset. In UTF-8, we have neither a byte offset
+ nor truly fixed width fonts (*). Column could be a pixel value multiplied with
an average character width (which is a bearable approximation).
* in Unicode, there are no fixed width fonts! Even if the ASCII characters may
diff --git a/FL/fl_utf8.h b/FL/fl_utf8.h
index fd54b3350..22f8ade0b 100644
--- a/FL/fl_utf8.h
+++ b/FL/fl_utf8.h
@@ -99,13 +99,16 @@ FL_EXPORT int fl_utf8bytes(unsigned ucs);
/* OD: returns the byte length of the first UTF-8 char sequence (returns -1 if not valid) */
FL_EXPORT int fl_utf8len(char c);
-
+
+/* OD: returns the byte length of the first UTF-8 char sequence (returns +1 if not valid) */
+FL_EXPORT int fl_utf8len1(char c);
+
/* OD: returns the number of Unicode chars in the UTF-8 string */
FL_EXPORT int fl_utf_nb_char(const unsigned char *buf, int len);
/* F2: Convert the next UTF8 char-sequence into a Unicode value (and say how many bytes were used) */
FL_EXPORT unsigned fl_utf8decode(const char* p, const char* end, int* len);
-
+
/* F2: Encode a Unicode value into a UTF8 sequence, return the number of bytes used */
FL_EXPORT int fl_utf8encode(unsigned ucs, char* buf);
diff --git a/src/Fl_Text_Buffer.cxx b/src/Fl_Text_Buffer.cxx
index c70ae1692..d1aea36f1 100644
--- a/src/Fl_Text_Buffer.cxx
+++ b/src/Fl_Text_Buffer.cxx
@@ -1025,7 +1025,7 @@ int Fl_Text_Buffer::search_forward(int startPos, const char *searchString,
*foundPos = startPos;
return 1;
}
- int l = fl_utf8len(c);
+ int l = fl_utf8len1(c);
if (memcmp(sp, address(bp), l))
break;
sp += l; bp += l;
@@ -1077,7 +1077,7 @@ int Fl_Text_Buffer::search_backward(int startPos, const char *searchString,
*foundPos = startPos;
return 1;
}
- int l = fl_utf8len(c);
+ int l = fl_utf8len1(c);
if (memcmp(sp, address(bp), l))
break;
sp += l; bp += l;
@@ -1602,7 +1602,7 @@ int Fl_Text_Buffer::prev_char(int pos) const
int Fl_Text_Buffer::next_char(int pos) const
{
IS_UTF8_ALIGNED2(this, (pos))
- int n = fl_utf8len(byte_at(pos));
+ int n = fl_utf8len1(byte_at(pos));
pos += n;
if (pos>=mLength)
return mLength;
diff --git a/src/Fl_Text_Display.cxx b/src/Fl_Text_Display.cxx
index d54bbcca9..c55a0d17c 100644
--- a/src/Fl_Text_Display.cxx
+++ b/src/Fl_Text_Display.cxx
@@ -753,7 +753,7 @@ void Fl_Text_Display::overstrike(const char* text) {
/* determine how many displayed character positions are covered */
startIndent = mBuffer->count_displayed_characters( lineStart, startPos );
indent = startIndent;
- for ( c = text; *c != '\0'; c += fl_utf8len(*c) )
+ for ( c = text; *c != '\0'; c += fl_utf8len1(*c) )
indent++;
endIndent = indent;
@@ -1735,7 +1735,7 @@ int Fl_Text_Display::handle_vline(
style = position_style(lineStartPos, lineLen, 0);
for (i=0; i<lineLen; ) {
currChar = lineStr[i]; // one byte is enough to handele tabs and other cases
- int len = fl_utf8len(currChar);
+ int len = fl_utf8len1(currChar);
if (len<=0) len = 1; // OUCH!
charStyle = position_style(lineStartPos, lineLen, i);
if (charStyle!=style || currChar=='\t' || prevChar=='\t') {
@@ -1829,7 +1829,7 @@ int Fl_Text_Display::find_x(const char *s, int len, int style, int x) const {
// TODO: use binary search which may be quicker.
int i = 0;
while (i<len) {
- int cl = fl_utf8len(s[i]);
+ int cl = fl_utf8len1(s[i]);
int w = int( string_width(s, i+cl, style) );
if (w>x)
return i;
@@ -3204,7 +3204,7 @@ double Fl_Text_Display::measure_proportional_character(const char *s, int xPix,
return (((xPix/tab)+1)*tab) - xPix;
}
- int charLen = fl_utf8len(*s), style = 0;
+ int charLen = fl_utf8len1(*s), style = 0;
if (mStyleBuffer) {
style = mStyleBuffer->byte_at(pos);
}
@@ -3284,7 +3284,7 @@ int Fl_Text_Display::wrap_uses_character(int lineEndPos) const {
c = buffer()->char_at(lineEndPos);
return c == '\n' || ((c == '\t' || c == ' ') &&
- lineEndPos + fl_utf8len(c) < buffer()->length());
+ lineEndPos + fl_utf8len1(c) < buffer()->length());
}
diff --git a/src/fl_utf8.cxx b/src/fl_utf8.cxx
index 94aff0fb8..ccbe98e95 100644
--- a/src/fl_utf8.cxx
+++ b/src/fl_utf8.cxx
@@ -112,9 +112,11 @@ Toupper(
}
/**
- return the byte length of the UTF-8 sequence with first byte \p c,
- or -1 if \p c is not valid.
- */
+ return the byte length of the UTF-8 sequence with first byte \p c,
+ or -1 if \p c is not valid.
+ This function is helpful for finding faulty UTF8 sequences.
+ \see fl_utf8len1
+ */
int fl_utf8len(char c)
{
if (!(c & 0x80)) return 1;
@@ -137,15 +139,34 @@ int fl_utf8len(char c)
} // fl_utf8len
-#if 0
-int fl_utflen(
- const unsigned char *buf,
- int len)
+/**
+ Return the byte length of the UTF-8 sequence with first byte \p c,
+ or 1 if \p c is not valid.
+ This function can be used to scan faulty UTF8 sequence, albeit ignoring invalid
+ codes.
+ \see fl_utf8len
+ */
+int fl_utf8len1(char c)
{
- unsigned int ucs;
- return fl_utf2ucs(buf, len, &ucs);
-}
-#endif
+ if (!(c & 0x80)) return 1;
+ if (c & 0x40) {
+ if (c & 0x20) {
+ if (c & 0x10) {
+ if (c & 0x08) {
+ if (c & 0x04) {
+ return 6;
+ }
+ return 5;
+ }
+ return 4;
+ }
+ return 3;
+ }
+ return 2;
+ }
+ return 1;
+} // fl_utf8len1
+
/**
returns the number of Unicode chars in the UTF-8 string