summaryrefslogtreecommitdiff
path: root/src/fl_utf8.cxx
diff options
context:
space:
mode:
authorMatthias Melcher <github@matthiasm.com>2025-11-01 21:23:18 +0100
committerMatthias Melcher <github@matthiasm.com>2025-11-01 21:23:18 +0100
commita128b7c95aad0a287b57a110034ed81b4166ca78 (patch)
tree532baf02f2b197b353765682d6237f4bee2fdb35 /src/fl_utf8.cxx
parentccbb424046c76e6b41c0a0f3c3804e53c359d169 (diff)
Fix UTF-8 documentation'
Fix Unicode buffer allocation
Diffstat (limited to 'src/fl_utf8.cxx')
-rw-r--r--src/fl_utf8.cxx40
1 files changed, 25 insertions, 15 deletions
diff --git a/src/fl_utf8.cxx b/src/fl_utf8.cxx
index 3fa29aeab..cfca03732 100644
--- a/src/fl_utf8.cxx
+++ b/src/fl_utf8.cxx
@@ -69,17 +69,26 @@ static int Toupper(int ucs) {
\code{.cpp}
#include <FL/fl_utf8.h>
- char utf8_string[] = "Hello 世界";
- char *p = utf8_string;
-
- while (*p) {
- int len = fl_utf8len(*p);
- if (len == -1) {
- printf("Invalid UTF-8 byte: 0x%02x\n", (unsigned char)*p);
- p++; // Skip invalid byte
- } else {
- printf("Character uses %d bytes\n", len);
- p += len; // Move to next character
+ bool test(const char *str) {
+ if (str == nullptr) return true;
+ const char *src = str;
+ for (int p = 0; ; p++) {
+ if (src == 0) return true;
+ int len = fl_utf8len(*src);
+ if (len == -1) {
+ printf("Invalid UTF-8 character start: 0x%02x\n", (unsigned char)*src);
+ return false;
+ } else {
+ while (len > 0) {
+ if (*src == 0) {
+ printf("Interrupted UTF-8 sequence at %d\n", (int)(src-str));
+ return false;
+ }
+ src++;
+ len--;
+ }
+ printf("Character %d at %d uses %d bytes\n", p, (int)(src-str), len);
+ }
}
}
\endcode
@@ -113,10 +122,11 @@ int fl_utf8len(char c)
/**
- Returns the byte length of a UTF-8 sequence, or -1.
+ Returns the byte length of the UTF-8 sequence with first byte \p c,
+ or -1 if \p c is not valid.
- This function can be used to scan faulty UTF-8 sequences, albeit
- ignoring invalid codes.
+ This function can be used to scan faulty UTF-8 sequences, albeit
+ ignoring invalid codes.
Example:
\code
@@ -132,7 +142,7 @@ int fl_utf8len(char c)
}
\endcode
- \param[in] c the first character in a UTF- sequence
+ \param[in] c the first character in a UTF-8 sequence
\return the number of bytes in that sequence, or 1 if c is not a recognized
character for UTF-8 style encoding, so a loop can continue to scan a string.