From 1bac8a0ccae1f8993714e795d7da2e78245182d2 Mon Sep 17 00:00:00 2001
From: Matthias Melcher <fltk@matthiasm.com>
Date: Mon, 6 Dec 2010 18:22:22 +0000
Subject: Fixed crashes when Fl_Text_* detects illegal UTF 8 sequences. Widgets
 will not do any further processing but just jump over the character. Screen
 representation depends largely on whatever the underlying OS does with those
 sequences, but I feel that this is out of the scope of this library. (STR
 2348)

git-svn-id: file:///fltk/svn/fltk/branches/branch-1.3@7965 ea41ed52-d2ee-0310-a9c1-e6b18d33e121
---
 src/fl_utf8.cxx | 43 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 11 deletions(-)

(limited to 'src/fl_utf8.cxx')

diff --git a/src/fl_utf8.cxx b/src/fl_utf8.cxx
index 94aff0fb8..ccbe98e95 100644
--- a/src/fl_utf8.cxx
+++ b/src/fl_utf8.cxx
@@ -112,9 +112,11 @@ Toupper(
 }
 
 /**
-  return the byte length of the UTF-8 sequence with first byte \p c,
-  or -1 if \p c is not valid.
-  */
+ return the byte length of the UTF-8 sequence with first byte \p c,
+ or -1 if \p c is not valid.
+ This function is helpful for finding faulty UTF8 sequences.
+ \see fl_utf8len1
+ */
 int fl_utf8len(char c)
 {
   if (!(c & 0x80)) return 1;
@@ -137,15 +139,34 @@ int fl_utf8len(char c)
 } // fl_utf8len
 
 
-#if 0
-int fl_utflen(
-        const unsigned char     *buf,
-        int                     len)
+/**
+ Return the byte length of the UTF-8 sequence with first byte \p c,
+ or 1 if \p c is not valid. 
+ This function can be used to scan faulty UTF8 sequence, albeit ignoring invalid
+ codes.
+ \see fl_utf8len
+ */
+int fl_utf8len1(char c)
 {
-	unsigned int ucs;
-	return fl_utf2ucs(buf, len, &ucs);
-}
-#endif
+  if (!(c & 0x80)) return 1;
+  if (c & 0x40) {
+    if (c & 0x20) {
+      if (c & 0x10) {
+        if (c & 0x08) {
+          if (c & 0x04) {
+            return 6;
+          }
+          return 5;
+        }
+        return 4;
+      }
+      return 3;
+    }
+    return 2;
+  }
+  return 1;
+} // fl_utf8len1
+
 
 /**
   returns the number of Unicode chars in the UTF-8 string
-- 
cgit v1.2.3