summaryrefslogtreecommitdiff
path: root/src/fl_utf8.cxx
diff options
context:
space:
mode:
authorManoloFLTK <41016272+ManoloFLTK@users.noreply.github.com>2026-01-31 11:24:29 +0100
committerManoloFLTK <41016272+ManoloFLTK@users.noreply.github.com>2026-02-01 15:28:06 +0100
commit64064378309ae7ae92b7c53a7615cfa607cd7907 (patch)
treebc255bd86faa8761b2844f33bd80d33c4b706b9b /src/fl_utf8.cxx
parent38aaabb059c6ad959e91bc74acbee03db06d70ce (diff)
Improve documentation of functions dedicated to processing of emoji sequences.
Diffstat (limited to 'src/fl_utf8.cxx')
-rw-r--r--src/fl_utf8.cxx16
1 files changed, 11 insertions, 5 deletions
diff --git a/src/fl_utf8.cxx b/src/fl_utf8.cxx
index 62d393094..7ba8102a5 100644
--- a/src/fl_utf8.cxx
+++ b/src/fl_utf8.cxx
@@ -1638,11 +1638,15 @@ unsigned fl_utf8from_mb(char* dst, unsigned dstlen, const char* src, unsigned sr
/**
Returns pointer to beginning of character after given location in UTF8 string accounting for emoji sequences.
- Unicode encodes some emojis (examples: πŸ‘©β€βœˆοΈ "woman pilot", πŸ‡ΈπŸ‡² "San Marino flag", 9️⃣ "keycap 9")
- via an <b>emoji sequence</b>, that is, they are represented by sequences of consecutive unicode points.
- An emoji sequence may pair two successive codepoints with "zero-width joiner" and may qualify
- any component with "variation selectors" or "Fitzpatrick emoji modifiers". Most flag emojis are encoded with two successive
- "regional indicator symbols". Keycap emojis are encoded with key + "emoji variation selector" + "combining enclosing keycap".
+ While Unicode encodes most characters as a single codepoint, some emojis (examples: πŸ‘©β€βœˆοΈ "woman pilot",
+ πŸ‡ΈπŸ‡² "San Marino flag", 9️⃣ "keycap 9") are encoded via an <b>emoji sequence</b>, that is, they are
+ represented by sequences of consecutive Unicode points. An emoji sequence may pair two codepoints with
+ "zero-width joiner" and may qualify any component with "variation selectors" or "Fitzpatrick emoji modifiers".
+ Most flag emojis are encoded with two consecutive "regional indicator symbols". Keycap emojis are encoded
+ with key + "emoji variation selector" + "combining enclosing keycap".
+
+ Use this function to advance to the next character within a UTF8 string processing an entire emoji sequence
+ if present as a single character.
\param from points to a location within a UTF8 string. If this location is inside the UTF8
encoding of a codepoint or is an invalid byte, this function returns \p from + 1.
\param end points past last codepoint of the string.
@@ -1689,6 +1693,8 @@ const char *fl_utf8_next_composed_char(const char *from, const char *end) {
/**
Returns pointer to beginning of character before given location in UTF8 string accounting for emoji sequences.
See fl_utf8_next_composed_char() for a hint about what is an emoji sequence.
+ Use this function to step back to the previous character within a UTF8 string processing an entire emoji sequence
+ if present as a single character.
\param from points to a location within a UTF8 string. If this location is inside the UTF8
encoding of a codepoint or is an invalid byte, this function returns \p from - 1.
\param begin points to start of first codepoint of the string.