From a128b7c95aad0a287b57a110034ed81b4166ca78 Mon Sep 17 00:00:00 2001 From: Matthias Melcher Date: Sat, 1 Nov 2025 21:23:18 +0100 Subject: Fix UTF-8 documentation' Fix Unicode buffer allocation --- src/drivers/WinAPI/Fl_WinAPI_System_Driver.cxx | 2 +- src/fl_utf8.cxx | 40 ++++++++++++++++---------- 2 files changed, 26 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/drivers/WinAPI/Fl_WinAPI_System_Driver.cxx b/src/drivers/WinAPI/Fl_WinAPI_System_Driver.cxx index 19b01190b..9eab455df 100644 --- a/src/drivers/WinAPI/Fl_WinAPI_System_Driver.cxx +++ b/src/drivers/WinAPI/Fl_WinAPI_System_Driver.cxx @@ -190,7 +190,7 @@ char *Fl_WinAPI_System_Driver::utf2mbcs(const char *utf8) { unsigned len = (unsigned)strlen(utf8); unsigned wn = fl_utf8toUtf16(utf8, len, NULL, 0) + 7; // Query length - mbwbuf = (wchar_t *)realloc(mbwbuf, sizeof(wchar_t) * (wn+1)); + mbwbuf = (wchar_t *)realloc(mbwbuf, sizeof(wchar_t) * wn); len = fl_utf8toUtf16(utf8, len, (unsigned short *)mbwbuf, wn); // Convert string mbwbuf[len] = 0; diff --git a/src/fl_utf8.cxx b/src/fl_utf8.cxx index 3fa29aeab..cfca03732 100644 --- a/src/fl_utf8.cxx +++ b/src/fl_utf8.cxx @@ -69,17 +69,26 @@ static int Toupper(int ucs) { \code{.cpp} #include - char utf8_string[] = "Hello 世界"; - char *p = utf8_string; - - while (*p) { - int len = fl_utf8len(*p); - if (len == -1) { - printf("Invalid UTF-8 byte: 0x%02x\n", (unsigned char)*p); - p++; // Skip invalid byte - } else { - printf("Character uses %d bytes\n", len); - p += len; // Move to next character + bool test(const char *str) { + if (str == nullptr) return true; + const char *src = str; + for (int p = 0; ; p++) { + if (src == 0) return true; + int len = fl_utf8len(*src); + if (len == -1) { + printf("Invalid UTF-8 character start: 0x%02x\n", (unsigned char)*src); + return false; + } else { + while (len > 0) { + if (*src == 0) { + printf("Interrupted UTF-8 sequence at %d\n", (int)(src-str)); + return false; + } + src++; + len--; + } + printf("Character %d at %d uses %d bytes\n", p, (int)(src-str), len); + } } } \endcode @@ -113,10 +122,11 @@ int fl_utf8len(char c) /** - Returns the byte length of a UTF-8 sequence, or -1. + Returns the byte length of the UTF-8 sequence with first byte \p c, + or -1 if \p c is not valid. - This function can be used to scan faulty UTF-8 sequences, albeit - ignoring invalid codes. + This function can be used to scan faulty UTF-8 sequences, albeit + ignoring invalid codes. Example: \code @@ -132,7 +142,7 @@ int fl_utf8len(char c) } \endcode - \param[in] c the first character in a UTF- sequence + \param[in] c the first character in a UTF-8 sequence \return the number of bytes in that sequence, or 1 if c is not a recognized character for UTF-8 style encoding, so a loop can continue to scan a string. -- cgit v1.2.3