Fixed crashes when Fl_Text_* detects illegal UTF 8 sequences. Widgets will not do any further processing but just jump over the character. Screen representation depends largely on whatever the underlying OS does with those sequences, but I feel that this is out of the scope of this library. (STR 2348)
git-svn-id: file:///fltk/svn/fltk/branches/branch-1.3@7965 ea41ed52-d2ee-0310-a9c1-e6b18d33e121
This commit is contained in:
parent
06e5a163cd
commit
1bac8a0cca
2
CHANGES
2
CHANGES
@ -1,5 +1,7 @@
|
||||
CHANGES IN FLTK 1.3.0
|
||||
|
||||
- Fixed crashes when detecting illegal utf 8 sequences
|
||||
in Fl_Text_* widgets (STR #2348)
|
||||
- Fixed Fl_Text_Display Tabulator calculations (STR #2450)
|
||||
- Fixed file access code to use UTF-8 strings (STR #2440)
|
||||
- Fixed ARM Unicode cross compilation issue (STR #2432)
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
#define FL_TEXT_BUFFER_H
|
||||
|
||||
|
||||
#define ASSERT_UTF8
|
||||
#undef ASSERT_UTF8
|
||||
|
||||
#ifdef ASSERT_UTF8
|
||||
# include <assert.h>
|
||||
@ -47,22 +47,11 @@
|
||||
|
||||
|
||||
/*
|
||||
Suggested UTF-8 terminology for this file:
|
||||
|
||||
?? "length" is the number of characters in a string
|
||||
?? "size" is the number of bytes
|
||||
?? "index" is the position in a string in number of characters
|
||||
?? "offset" is the position in a string in bytes (and must be kept on a charater boundary)
|
||||
(there seems to be no standard in Uncode documents, howevere "length" is commonly
|
||||
referencing the number of bytes. Maybe "bytes" and "glyphs" would be the most
|
||||
obvious way to describe sizes?)
|
||||
|
||||
"character size" is the size of a UTF-8 character in bytes
|
||||
"character width" is the width of a Unicode character in pixels
|
||||
|
||||
"column" was orginally defined as a character offset from the left margin. It was
|
||||
identical to the byte offset. In UTF-8, we have neither a byte offset nor
|
||||
truly fixed width fonts (*). Column could be a pixel value multiplied with
|
||||
"character width" is the width of a Unicode character in pixels
|
||||
"column" was orginally defined as a character offset from the left margin.
|
||||
It was identical to the byte offset. In UTF-8, we have neither a byte offset
|
||||
nor truly fixed width fonts (*). Column could be a pixel value multiplied with
|
||||
an average character width (which is a bearable approximation).
|
||||
|
||||
* in Unicode, there are no fixed width fonts! Even if the ASCII characters may
|
||||
|
||||
@ -99,13 +99,16 @@ FL_EXPORT int fl_utf8bytes(unsigned ucs);
|
||||
|
||||
/* OD: returns the byte length of the first UTF-8 char sequence (returns -1 if not valid) */
|
||||
FL_EXPORT int fl_utf8len(char c);
|
||||
|
||||
|
||||
/* OD: returns the byte length of the first UTF-8 char sequence (returns +1 if not valid) */
|
||||
FL_EXPORT int fl_utf8len1(char c);
|
||||
|
||||
/* OD: returns the number of Unicode chars in the UTF-8 string */
|
||||
FL_EXPORT int fl_utf_nb_char(const unsigned char *buf, int len);
|
||||
|
||||
/* F2: Convert the next UTF8 char-sequence into a Unicode value (and say how many bytes were used) */
|
||||
FL_EXPORT unsigned fl_utf8decode(const char* p, const char* end, int* len);
|
||||
|
||||
|
||||
/* F2: Encode a Unicode value into a UTF8 sequence, return the number of bytes used */
|
||||
FL_EXPORT int fl_utf8encode(unsigned ucs, char* buf);
|
||||
|
||||
|
||||
@ -1025,7 +1025,7 @@ int Fl_Text_Buffer::search_forward(int startPos, const char *searchString,
|
||||
*foundPos = startPos;
|
||||
return 1;
|
||||
}
|
||||
int l = fl_utf8len(c);
|
||||
int l = fl_utf8len1(c);
|
||||
if (memcmp(sp, address(bp), l))
|
||||
break;
|
||||
sp += l; bp += l;
|
||||
@ -1077,7 +1077,7 @@ int Fl_Text_Buffer::search_backward(int startPos, const char *searchString,
|
||||
*foundPos = startPos;
|
||||
return 1;
|
||||
}
|
||||
int l = fl_utf8len(c);
|
||||
int l = fl_utf8len1(c);
|
||||
if (memcmp(sp, address(bp), l))
|
||||
break;
|
||||
sp += l; bp += l;
|
||||
@ -1602,7 +1602,7 @@ int Fl_Text_Buffer::prev_char(int pos) const
|
||||
int Fl_Text_Buffer::next_char(int pos) const
|
||||
{
|
||||
IS_UTF8_ALIGNED2(this, (pos))
|
||||
int n = fl_utf8len(byte_at(pos));
|
||||
int n = fl_utf8len1(byte_at(pos));
|
||||
pos += n;
|
||||
if (pos>=mLength)
|
||||
return mLength;
|
||||
|
||||
@ -753,7 +753,7 @@ void Fl_Text_Display::overstrike(const char* text) {
|
||||
/* determine how many displayed character positions are covered */
|
||||
startIndent = mBuffer->count_displayed_characters( lineStart, startPos );
|
||||
indent = startIndent;
|
||||
for ( c = text; *c != '\0'; c += fl_utf8len(*c) )
|
||||
for ( c = text; *c != '\0'; c += fl_utf8len1(*c) )
|
||||
indent++;
|
||||
endIndent = indent;
|
||||
|
||||
@ -1735,7 +1735,7 @@ int Fl_Text_Display::handle_vline(
|
||||
style = position_style(lineStartPos, lineLen, 0);
|
||||
for (i=0; i<lineLen; ) {
|
||||
currChar = lineStr[i]; // one byte is enough to handele tabs and other cases
|
||||
int len = fl_utf8len(currChar);
|
||||
int len = fl_utf8len1(currChar);
|
||||
if (len<=0) len = 1; // OUCH!
|
||||
charStyle = position_style(lineStartPos, lineLen, i);
|
||||
if (charStyle!=style || currChar=='\t' || prevChar=='\t') {
|
||||
@ -1829,7 +1829,7 @@ int Fl_Text_Display::find_x(const char *s, int len, int style, int x) const {
|
||||
// TODO: use binary search which may be quicker.
|
||||
int i = 0;
|
||||
while (i<len) {
|
||||
int cl = fl_utf8len(s[i]);
|
||||
int cl = fl_utf8len1(s[i]);
|
||||
int w = int( string_width(s, i+cl, style) );
|
||||
if (w>x)
|
||||
return i;
|
||||
@ -3204,7 +3204,7 @@ double Fl_Text_Display::measure_proportional_character(const char *s, int xPix,
|
||||
return (((xPix/tab)+1)*tab) - xPix;
|
||||
}
|
||||
|
||||
int charLen = fl_utf8len(*s), style = 0;
|
||||
int charLen = fl_utf8len1(*s), style = 0;
|
||||
if (mStyleBuffer) {
|
||||
style = mStyleBuffer->byte_at(pos);
|
||||
}
|
||||
@ -3284,7 +3284,7 @@ int Fl_Text_Display::wrap_uses_character(int lineEndPos) const {
|
||||
|
||||
c = buffer()->char_at(lineEndPos);
|
||||
return c == '\n' || ((c == '\t' || c == ' ') &&
|
||||
lineEndPos + fl_utf8len(c) < buffer()->length());
|
||||
lineEndPos + fl_utf8len1(c) < buffer()->length());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -112,9 +112,11 @@ Toupper(
|
||||
}
|
||||
|
||||
/**
|
||||
return the byte length of the UTF-8 sequence with first byte \p c,
|
||||
or -1 if \p c is not valid.
|
||||
*/
|
||||
return the byte length of the UTF-8 sequence with first byte \p c,
|
||||
or -1 if \p c is not valid.
|
||||
This function is helpful for finding faulty UTF8 sequences.
|
||||
\see fl_utf8len1
|
||||
*/
|
||||
int fl_utf8len(char c)
|
||||
{
|
||||
if (!(c & 0x80)) return 1;
|
||||
@ -137,15 +139,34 @@ int fl_utf8len(char c)
|
||||
} // fl_utf8len
|
||||
|
||||
|
||||
#if 0
|
||||
int fl_utflen(
|
||||
const unsigned char *buf,
|
||||
int len)
|
||||
/**
|
||||
Return the byte length of the UTF-8 sequence with first byte \p c,
|
||||
or 1 if \p c is not valid.
|
||||
This function can be used to scan faulty UTF8 sequence, albeit ignoring invalid
|
||||
codes.
|
||||
\see fl_utf8len
|
||||
*/
|
||||
int fl_utf8len1(char c)
|
||||
{
|
||||
unsigned int ucs;
|
||||
return fl_utf2ucs(buf, len, &ucs);
|
||||
}
|
||||
#endif
|
||||
if (!(c & 0x80)) return 1;
|
||||
if (c & 0x40) {
|
||||
if (c & 0x20) {
|
||||
if (c & 0x10) {
|
||||
if (c & 0x08) {
|
||||
if (c & 0x04) {
|
||||
return 6;
|
||||
}
|
||||
return 5;
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
return 3;
|
||||
}
|
||||
return 2;
|
||||
}
|
||||
return 1;
|
||||
} // fl_utf8len1
|
||||
|
||||
|
||||
/**
|
||||
returns the number of Unicode chars in the UTF-8 string
|
||||
|
||||
Loading…
Reference in New Issue
Block a user