From a128b7c95aad0a287b57a110034ed81b4166ca78 Mon Sep 17 00:00:00 2001
From: Matthias Melcher <github@matthiasm.com>
Date: Sat, 1 Nov 2025 21:23:18 +0100
Subject: Fix UTF-8 documentation'

Fix Unicode buffer allocation
---
 src/drivers/WinAPI/Fl_WinAPI_System_Driver.cxx |  2 +-
 src/fl_utf8.cxx                                | 40 ++++++++++++++++----------
 2 files changed, 26 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/drivers/WinAPI/Fl_WinAPI_System_Driver.cxx b/src/drivers/WinAPI/Fl_WinAPI_System_Driver.cxx
index 19b01190b..9eab455df 100644
--- a/src/drivers/WinAPI/Fl_WinAPI_System_Driver.cxx
+++ b/src/drivers/WinAPI/Fl_WinAPI_System_Driver.cxx
@@ -190,7 +190,7 @@ char *Fl_WinAPI_System_Driver::utf2mbcs(const char *utf8) {
   unsigned len = (unsigned)strlen(utf8);
 
   unsigned wn = fl_utf8toUtf16(utf8, len, NULL, 0) + 7; // Query length
-  mbwbuf = (wchar_t *)realloc(mbwbuf, sizeof(wchar_t) * (wn+1));
+  mbwbuf = (wchar_t *)realloc(mbwbuf, sizeof(wchar_t) * wn);
   len = fl_utf8toUtf16(utf8, len, (unsigned short *)mbwbuf, wn); // Convert string
   mbwbuf[len] = 0;
 
diff --git a/src/fl_utf8.cxx b/src/fl_utf8.cxx
index 3fa29aeab..cfca03732 100644
--- a/src/fl_utf8.cxx
+++ b/src/fl_utf8.cxx
@@ -69,17 +69,26 @@ static int Toupper(int ucs) {
   \code{.cpp}
   #include <FL/fl_utf8.h>
 
-  char utf8_string[] = "Hello 世界";
-  char *p = utf8_string;
-
-  while (*p) {
-    int len = fl_utf8len(*p);
-    if (len == -1) {
-      printf("Invalid UTF-8 byte: 0x%02x\n", (unsigned char)*p);
-      p++;  // Skip invalid byte
-    } else {
-      printf("Character uses %d bytes\n", len);
-      p += len;  // Move to next character
+  bool test(const char *str) {
+    if (str == nullptr) return true;
+    const char *src = str;
+    for (int p = 0; ; p++) {
+      if (src == 0) return true;
+      int len = fl_utf8len(*src);
+      if (len == -1) {
+        printf("Invalid UTF-8 character start: 0x%02x\n", (unsigned char)*src);
+        return false;
+      } else {
+        while (len > 0) {
+          if (*src == 0) {
+            printf("Interrupted UTF-8 sequence at %d\n", (int)(src-str));
+            return false;
+          }
+          src++;
+          len--;
+        }
+        printf("Character %d at %d uses %d bytes\n", p, (int)(src-str), len);
+      }
     }
   }
   \endcode
@@ -113,10 +122,11 @@ int fl_utf8len(char c)
 
 
 /**
-  Returns the byte length of a UTF-8 sequence, or -1.
+ Returns the byte length of the UTF-8 sequence with first byte \p c,
+ or -1 if \p c is not valid.
 
-  This function can be used to scan faulty UTF-8 sequences, albeit
-  ignoring invalid codes.
+ This function can be used to scan faulty UTF-8 sequences, albeit
+ ignoring invalid codes.
 
   Example:
   \code
@@ -132,7 +142,7 @@ int fl_utf8len(char c)
   }
   \endcode
 
-  \param[in] c the first character in a UTF- sequence
+  \param[in] c the first character in a UTF-8 sequence
   \return the number of bytes in that sequence, or 1 if c is not a recognized
     character for UTF-8 style encoding, so a loop can continue to scan a string.
 
-- 
cgit v1.2.3