Let text widgets handle gracefully composed unicode characters - Cont'd.

This commit adds support of another type of composed characters: flags. It also fixes Fl_Text_Buffer::prev_char() and Fl_Text_Buffer::next_char() that must use Fl_Text_Buffer::byte_at() to access to the content of the text buffer.
author: ManoloFLTK <41016272+ManoloFLTK@users.noreply.github.com> 2026-01-26 09:17:18 +0100
committer: ManoloFLTK <41016272+ManoloFLTK@users.noreply.github.com> 2026-01-26 09:17:18 +0100
commit: 521f7b3c5b39e1dc6c0560869d811a6730554eca (patch)
tree: 33377467ff2ed55d26712d5b71bb19fcaaa9eb07 /src/Fl_Text_Buffer.cxx
parent: 188aa0be0ed5e366a35d521d173c6248137ba8a6 (diff)
1 files changed, 37 insertions, 12 deletions
diff --git a/src/Fl_Text_Buffer.cxx b/src/Fl_Text_Buffer.cxx
index 907a5cce6..848cacd9a 100644
--- a/src/Fl_Text_Buffer.cxx
+++ b/src/Fl_Text_Buffer.cxx
@@ -2080,9 +2080,8 @@ int Fl_Text_Buffer::outputfile(const char *file,
 }
 
 
-/*
- Return the previous character position.
- Unicode safe.
+/**
+ As prev_char() but returns 0 if the beginning of the buffer is reached.
  */
 int Fl_Text_Buffer::prev_char_clipped(int pos) const
 {
@@ -2090,17 +2089,27 @@ int Fl_Text_Buffer::prev_char_clipped(int pos) const
     return 0;
 
   IS_UTF8_ALIGNED2(this, (pos))
-  const char *previous = fl_utf8_previous_composed_char(address(0) + pos, address(0));
-  pos = previous - address(0);
-
+  const int l_t = 40;
+  char t[l_t + 1]; t[l_t] = 0;
+  int l = l_t, p = pos, ll;
+  for (int i = l_t; i > 0 && p > 0; i--) {
+    t[--l] = byte_at(--p);
+    ll = fl_utf8len(t[l]);
+    if (ll == 1 || ll == 2) break;
+  }
+  const char *previous = fl_utf8_previous_composed_char(t + l_t, t + l);
+  ll = strlen(t + l);
+  pos = (pos - ll) + (previous - (t+l));
   IS_UTF8_ALIGNED2(this, (pos))
   return pos;
 }
 
 
-/*
- Return the previous character position.
+/**
+ Returns the index of the previous character.
+ This function processes a composed character (e.g., a flag emoji) as a single character.
  Returns -1 if the beginning of the buffer is reached.
+ \param pos index to the current character
  */
 int Fl_Text_Buffer::prev_char(int pos) const
 {
@@ -2109,15 +2118,31 @@ int Fl_Text_Buffer::prev_char(int pos) const
 }
 
 
-/*
- Return the next character position.
+/**
+ Returns the index of the next character.
+ This function processes a composed character (e.g., a flag emoji) as a single character.
  Returns length() if the end of the buffer is reached.
+ \param pos index to the current character
  */
 int Fl_Text_Buffer::next_char(int pos) const
 {
   IS_UTF8_ALIGNED2(this, (pos))
-  const char *next = fl_utf8_next_composed_char(address(0) + pos, address(0) + mLength);
-  pos = next - address(0);
+  int l = fl_utf8len1(byte_at(pos));
+  if (l > 2) { // test for composed character only if pos is at long codepoint
+    int p = pos, ll, b;
+    char t[40]; // crazyest composed characters I know use 28 bytes in UTF8 (e.g., 🏴󠁧󠁢󠁷󠁬󠁳󠁿)
+    l = 0;
+    // extract bytes after pos stopping after short codepoint or 40 bytes at most
+    while (p < mLength && l < sizeof(t)) {
+      b = byte_at(p++);
+      t[l++] = b;
+      ll = fl_utf8len(b);
+      for (int i = 1; i < ll && l < sizeof(t); i++) t[l++] = byte_at(p++);
+      if (ll == 1 || ll == 2) break; // stop after short codepoint (includes '\n')
+    }
+    l = fl_utf8_next_composed_char(t, t + l) - t; // length of possibly composed character starting at pos
+  }
+  pos += l;
   if (pos>=mLength)
     return mLength;
   IS_UTF8_ALIGNED2(this, (pos))
author	ManoloFLTK <41016272+ManoloFLTK@users.noreply.github.com>	2026-01-26 09:17:18 +0100
committer	ManoloFLTK <41016272+ManoloFLTK@users.noreply.github.com>	2026-01-26 09:17:18 +0100
commit	521f7b3c5b39e1dc6c0560869d811a6730554eca (patch)
tree	33377467ff2ed55d26712d5b71bb19fcaaa9eb07 /src/Fl_Text_Buffer.cxx
parent	188aa0be0ed5e366a35d521d173c6248137ba8a6 (diff)