From 64064378309ae7ae92b7c53a7615cfa607cd7907 Mon Sep 17 00:00:00 2001 From: ManoloFLTK <41016272+ManoloFLTK@users.noreply.github.com> Date: Sat, 31 Jan 2026 11:24:29 +0100 Subject: Improve documentation of functions dedicated to processing of emoji sequences. --- src/Fl_Text_Buffer.cxx | 8 ++++---- src/fl_utf8.cxx | 16 +++++++++++----- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/Fl_Text_Buffer.cxx b/src/Fl_Text_Buffer.cxx index b0be2d94c..8f7ae98a5 100644 --- a/src/Fl_Text_Buffer.cxx +++ b/src/Fl_Text_Buffer.cxx @@ -2107,7 +2107,7 @@ int Fl_Text_Buffer::prev_char_clipped(int pos) const /** Returns the index of the previous character. - This function processes a composed character (e.g., a flag emoji) as a single character. + This function processes an emoji sequence (see \ref fl_utf8_next_composed_char) as a single character. Returns -1 if the beginning of the buffer is reached. \param pos index to the current character */ @@ -2120,7 +2120,7 @@ int Fl_Text_Buffer::prev_char(int pos) const /** Returns the index of the next character. - This function processes a composed character (e.g., a flag emoji) as a single character. + This function processes an emoji sequence (see \ref fl_utf8_next_composed_char) as a single character. Returns length() if the end of the buffer is reached. \param pos index to the current character */ @@ -2128,7 +2128,7 @@ int Fl_Text_Buffer::next_char(int pos) const { IS_UTF8_ALIGNED2(this, (pos)) int l = fl_utf8len(byte_at(pos)); - if (l > 0) { // test for composed character except for bad bytes + if (l > 0) { // test for emoji sequence except for bad bytes int p = pos, ll, b, count_points = 0; char t[40]; // longest emoji sequences I know use 28 bytes in UTF8 (e.g., 🏴󠁧󠁒󠁷󠁬󠁳󠁿 "Wales flag") l = 0; @@ -2145,7 +2145,7 @@ int Fl_Text_Buffer::next_char(int pos) const break; } } - // length of possibly composed character starting at pos + // length of possibly emoji sequence starting at pos l = (l > 0 ? fl_utf8_next_composed_char(t, t + l) - t : 0); } else if (l == -1) { l = 1; diff --git a/src/fl_utf8.cxx b/src/fl_utf8.cxx index 62d393094..7ba8102a5 100644 --- a/src/fl_utf8.cxx +++ b/src/fl_utf8.cxx @@ -1638,11 +1638,15 @@ unsigned fl_utf8from_mb(char* dst, unsigned dstlen, const char* src, unsigned sr /** Returns pointer to beginning of character after given location in UTF8 string accounting for emoji sequences. - Unicode encodes some emojis (examples: πŸ‘©β€βœˆοΈ "woman pilot", πŸ‡ΈπŸ‡² "San Marino flag", 9️⃣ "keycap 9") - via an emoji sequence, that is, they are represented by sequences of consecutive unicode points. - An emoji sequence may pair two successive codepoints with "zero-width joiner" and may qualify - any component with "variation selectors" or "Fitzpatrick emoji modifiers". Most flag emojis are encoded with two successive - "regional indicator symbols". Keycap emojis are encoded with key + "emoji variation selector" + "combining enclosing keycap". + While Unicode encodes most characters as a single codepoint, some emojis (examples: πŸ‘©β€βœˆοΈ "woman pilot", + πŸ‡ΈπŸ‡² "San Marino flag", 9️⃣ "keycap 9") are encoded via an emoji sequence, that is, they are + represented by sequences of consecutive Unicode points. An emoji sequence may pair two codepoints with + "zero-width joiner" and may qualify any component with "variation selectors" or "Fitzpatrick emoji modifiers". + Most flag emojis are encoded with two consecutive "regional indicator symbols". Keycap emojis are encoded + with key + "emoji variation selector" + "combining enclosing keycap". + + Use this function to advance to the next character within a UTF8 string processing an entire emoji sequence + if present as a single character. \param from points to a location within a UTF8 string. If this location is inside the UTF8 encoding of a codepoint or is an invalid byte, this function returns \p from + 1. \param end points past last codepoint of the string. @@ -1689,6 +1693,8 @@ const char *fl_utf8_next_composed_char(const char *from, const char *end) { /** Returns pointer to beginning of character before given location in UTF8 string accounting for emoji sequences. See fl_utf8_next_composed_char() for a hint about what is an emoji sequence. + Use this function to step back to the previous character within a UTF8 string processing an entire emoji sequence + if present as a single character. \param from points to a location within a UTF8 string. If this location is inside the UTF8 encoding of a codepoint or is an invalid byte, this function returns \p from - 1. \param begin points to start of first codepoint of the string. -- cgit v1.2.3