Emojis: add support of keycap emoji sequences.
- It is expected that all emojis listed in the Wikipedia "emoji" article
as of early 2026 are recognized as single glyphs by FLTK text widgets.
- Document functions fl_utf8_{next|previous}_composed_char()
relatively to the notion of "emoji sequence".
- Remove signed/unsigned comparison compilation warnings.
This commit is contained in:
parent
445d26bb71
commit
bed38ba3f5
@ -2127,7 +2127,7 @@ int Fl_Text_Buffer::prev_char(int pos) const
|
||||
int Fl_Text_Buffer::next_char(int pos) const
|
||||
{
|
||||
IS_UTF8_ALIGNED2(this, (pos))
|
||||
int l = fl_utf8len1(byte_at(pos));
|
||||
unsigned l = fl_utf8len1(byte_at(pos));
|
||||
if (l > 2) { // test for composed character only if pos is at long codepoint
|
||||
int p = pos, ll, b;
|
||||
char t[40]; // crazyest composed characters I know use 28 bytes in UTF8 (e.g., 🏴)
|
||||
|
||||
@ -1637,32 +1637,36 @@ unsigned fl_utf8from_mb(char* dst, unsigned dstlen, const char* src, unsigned sr
|
||||
|
||||
|
||||
/**
|
||||
Returns pointer to beginning of character after given location in UTF8 string accounting for composed characters.
|
||||
Some unicode characters (examples: 👩✈️ "woman pilot", 🇸🇲 "San Marino flag") are composed of several unicode points.
|
||||
They may pair two successive codepoints with U+200D (zero-width joiner) and may qualify any component with
|
||||
variation selectors or Fitzpatrick emoji modifiers. Most flag emojis are composed of two successive
|
||||
"regional indicator symbols", each in range [U+1F1E6 , U+1F1FF].
|
||||
Returns pointer to beginning of character after given location in UTF8 string accounting for emoji sequences.
|
||||
Unicode encodes some emojis (examples: 👩✈️ "woman pilot", 🇸🇲 "San Marino flag", 9️⃣ "keycap 9")
|
||||
via an <b>emoji sequence</b>, that is, they are represented by sequences of consecutive unicode points.
|
||||
An emoji sequence may pair two successive codepoints with "zero-width joiner" and may qualify
|
||||
any component with "variation selectors" or "Fitzpatrick emoji modifiers". Most flag emojis are encoded with two successive
|
||||
"regional indicator symbols". Keycap emojis are encoded with key + "emoji variation selector" + "combining enclosing keycap".
|
||||
\param from points to a location within a UTF8 string. If this location is inside the UTF8
|
||||
encoding of a codepoint or is an invalid byte, this function returns \p from + 1.
|
||||
\param end points past last codepoint of the string.
|
||||
\return pointer to beginning of first codepoint after possibly composed character that begins at \p from.
|
||||
\return pointer to beginning of first codepoint after character, possibly an emoji sequence, that begins at \p from.
|
||||
*/
|
||||
const char *fl_utf8_next_composed_char(const char *from, const char *end) {
|
||||
int skip = fl_utf8len1(*from);
|
||||
if (skip <= 2) return from + skip;
|
||||
unsigned u = fl_utf8decode(from, end, NULL);
|
||||
if (u >= 0x1F1E6 && u <= 0x1F1FF) { // a 1st regional indicator symbol can be a flag
|
||||
u = fl_utf8decode(from + skip, end, NULL);
|
||||
if (u >= 0x1F1E6 && u <= 0x1F1FF) { // a 2nd regional indicator symbol gives a flag
|
||||
return from + 2 * skip;
|
||||
int skip = fl_utf8len(*from);
|
||||
if (skip == -1) return from + 1;
|
||||
unsigned u;
|
||||
if (skip >= 4) {
|
||||
u = fl_utf8decode(from, end, NULL);
|
||||
if (u >= 0x1F1E6 && u <= 0x1F1FF) { // a 1st regional indicator symbol can be a flag
|
||||
u = fl_utf8decode(from + skip, end, NULL);
|
||||
if (u >= 0x1F1E6 && u <= 0x1F1FF) { // a 2nd regional indicator symbol gives a flag
|
||||
return from + 2 * skip;
|
||||
}
|
||||
} else if (u == 0x1F3F4) { // “waving black flag” may start subdivision flags (e.g. 🏴)
|
||||
const char *next = from + skip;
|
||||
do {
|
||||
u = fl_utf8decode(next, end, NULL);
|
||||
next += fl_utf8len1(*next);
|
||||
if (u == 0xE007F) return next; // ends with "cancel tag"
|
||||
} while (u >= 0xE0020 && u <= 0xE007E); // any series of "tag components"
|
||||
}
|
||||
} else if (u == 0x1F3F4) { // “waving black flag” may start subdivision flags (e.g. 🏴)
|
||||
const char *next = from + skip;
|
||||
do {
|
||||
u = fl_utf8decode(next, end, NULL);
|
||||
next += fl_utf8len1(*next);
|
||||
if (u == 0xE007F) return next; // ends with "cancel tag"
|
||||
} while (u >= 0xE0020 && u <= 0xE007E); // any series of "tag components"
|
||||
}
|
||||
from += skip; // skip 1st codepoint
|
||||
while (from < end) {
|
||||
@ -1674,6 +1678,8 @@ const char *fl_utf8_next_composed_char(const char *from, const char *end) {
|
||||
from += fl_utf8len(*from); // skip variation selector
|
||||
} else if (u >= 0x1F3FB && u <= 0x1F3FF) { // EMOJI MODIFIER FITZPATRICK
|
||||
from += fl_utf8len(*from); // skip modifier
|
||||
} else if (u == 0x20E3) { // combining enclosing keycap (e.g., 9️⃣*️⃣#️⃣9︎⃣)
|
||||
from += fl_utf8len(*from); // skip it
|
||||
} else break;
|
||||
}
|
||||
return from;
|
||||
@ -1681,12 +1687,12 @@ const char *fl_utf8_next_composed_char(const char *from, const char *end) {
|
||||
|
||||
|
||||
/**
|
||||
Returns pointer to beginning of character before given location in UTF8 string accounting for composed characters.
|
||||
See fl_utf8_next_composed_char() for a hint about what is a composed unicode character.
|
||||
Returns pointer to beginning of character before given location in UTF8 string accounting for emoji sequences.
|
||||
See fl_utf8_next_composed_char() for a hint about what is an emoji sequence.
|
||||
\param from points to a location within a UTF8 string. If this location is inside the UTF8
|
||||
encoding of a codepoint or is an invalid byte, this function returns \p from - 1.
|
||||
\param begin points to start of first codepoint of the string.
|
||||
\return pointer to beginning of first possibly composed character before the codepoint that begins at \p from.
|
||||
\return pointer to beginning of first character, possibly an emoji sequence, before the codepoint that begins at \p from.
|
||||
*/
|
||||
const char *fl_utf8_previous_composed_char(const char *from, const char *begin) {
|
||||
int l = fl_utf8len(*from);
|
||||
@ -1715,6 +1721,8 @@ const char *fl_utf8_previous_composed_char(const char *from, const char *begin)
|
||||
from = fl_utf8back(from - 1, begin, NULL);
|
||||
} else if (u >= 0x1F3FB && u <= 0x1F3FF) { // EMOJI MODIFIER FITZPATRICK
|
||||
from = fl_utf8back(from - 1, begin, NULL);
|
||||
} else if (u == 0x20E3) { // combining enclosing keycap
|
||||
from = fl_utf8back(from - 1, begin, NULL);
|
||||
} else if (from > begin) {
|
||||
keep = fl_utf8back(from - 1, begin, NULL);
|
||||
u = fl_utf8decode(keep, from, NULL);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user