fltk/src/xutf8/utf8Input.c

447 lines
10 KiB
C
Raw Normal View History

/******************************************************************************
Copyright (c) 2000-2002 by O'ksi'D
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* Neither the name of O'ksi'D nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
#if !defined(WIN32) && !defined(__APPLE__)
#include "config.h"
#include "../../FL/Xutf8.h"
#include <X11/X.h>
#include <X11/Xlib.h>
#include <X11/Xutil.h>
#include <string.h>
#include <stdlib.h>
#if HAVE_LIBC_ICONV
#include <iconv.h>
#endif
/*
I haven't found much doc on the web about EUC encodings, so I've used
GNU libiconv source code as a reference.
http://clisp.cons.org/~haible/packages-libiconv.html
*/
#define RET_ILSEQ -1
#define RET_TOOFEW(x) (-10 - x)
#define RET_TOOSMALL -2
#define conv_t void*
#define ucs4_t unsigned int
typedef struct {
unsigned short indx;
unsigned short used;
} Summary16;
#include "lcUniConv/big5.h"
#include "lcUniConv/gb2312.h"
#include "lcUniConv/jisx0201.h"
#include "lcUniConv/jisx0208.h"
#include "lcUniConv/jisx0212.h"
#include "lcUniConv/ksc5601.h"
int
XConvertEucTwToUtf8(
char* buffer_return,
int len)
{
/* FIXME */
#if HAVE_LIBC_ICONV
iconv_t cd;
int cdl;
#else
int i = 0;
#endif
int l = 0;
char *buf, *b;
if (len < 1) return 0;
b = buf = (char*) malloc((unsigned)len);
memcpy(buf, buffer_return, (unsigned) len);
#if HAVE_LIBC_ICONV
l = cdl = len;
cd = iconv_open("EUC-TW", "UTF-8");
iconv(cd, &b, &len, &buffer_return, &cdl);
iconv_close(cd);
l -= cdl;
#else
while (i < len) {
unsigned int ucs;
unsigned char c;
c = (unsigned char) buf[i];
if (c < 0x80) {
ucs = c;
i++;
} else if (c >= 0xa1 && c < 0xff && len - i > 1 ) {
unsigned char b[2];
b[0] = (unsigned char) c - 0x80;
b[1] = (unsigned char) buf[i + 1] - 0x80;
ucs = ' '; i += 2;
} else if (c == 0x8e && len - i > 3) {
unsigned char b[2];
unsigned char c1 = buf[i + 1];
unsigned char c2 = buf[i + 2];
unsigned char c3 = buf[i + 3];
b[0] = (unsigned char) buf[i + 2] - 0x80;
b[1] = (unsigned char) buf[i + 3] - 0x80;
if (c1 >= 0xa1 && c1 <= 0xb0) {
if (c2 >= 0xa1 && c2 < 0xff && c3 >= 0xa1 &&
c3 < 0xff)
{
ucs = ' '; i += 4;
} else {
ucs = '?'; i++;
}
} else {
ucs = '?'; i++;
}
} else {
ucs = '?';
i++;
}
l += XConvertUcsToUtf8(ucs, buffer_return + l);
}
#endif
free(buf);
return l;
}
int
XConvertEucKrToUtf8(
char* buffer_return,
int len)
{
int i = 0, l = 0;
char *buf;
if (len < 1) return 0;
buf = (char*) malloc((unsigned)len);
memcpy(buf, buffer_return, (unsigned)len);
while (i < len) {
unsigned int ucs;
unsigned char c, c1;
c = (unsigned char) buf[i];
if (c < 0x80) {
ucs = c;
i++;
} else if (c >= 0xA1 && c < 0xFF && len - i > 1) {
c1 = (unsigned char) buf[i + 1];
if (c1 >= 0xa1 && c1 < 0xff) {
unsigned char b[2];
b[0] = c - 0x80;
b[1] = c1 - 0x80;
if (ksc5601_mbtowc(NULL, &ucs, b, 2) < 1) {
ucs = '?';
}
} else {
ucs = '?';
}
i += 2;
} else {
ucs = '?';
i++;
}
l += XConvertUcsToUtf8(ucs, buffer_return + l);
}
free(buf);
return l;
}
int
XConvertBig5ToUtf8(
char* buffer_return,
int len)
{
int i = 0, l = 0;
char *buf;
if (len < 1) return 0;
buf = (char*) malloc((unsigned)len);
memcpy(buf, buffer_return, (unsigned)len);
if (len == 1) {
l += XConvertUcsToUtf8((unsigned int)buf[i], buffer_return + l);
}
while (i + 1 < len) {
unsigned int ucs;
unsigned char b[2];
b[0] = (unsigned char) buf[i];
b[1] = (unsigned char) buf[i + 1];
if (big5_mbtowc(NULL, &ucs, b, 2) == 2) {
i += 2;
} else {
ucs = '?';
i++;
}
l += XConvertUcsToUtf8(ucs, buffer_return + l);
}
free(buf);
return l;
}
int
XConvertGb2312ToUtf8(
char* buffer_return,
int len)
{
int i = 0, l = 0;
char *buf;
if (len < 1) return 0;
buf = (char*) malloc((unsigned)len);
memcpy(buf, buffer_return, (unsigned)len);
if (len == 1) {
l += XConvertUcsToUtf8((unsigned int)buf[i], buffer_return + l);
}
while (i + 1 < len) {
unsigned int ucs;
unsigned char b[2];
b[0] = (unsigned char) buf[i];
b[1] = (unsigned char) buf[i + 1];
if (gb2312_mbtowc(NULL, &ucs, b, 2) == 2) {
i += 2;
} else {
ucs = '?';
i++;
}
l += XConvertUcsToUtf8(ucs, buffer_return + l);
}
free(buf);
return l;
}
int
XConvertEucCnToUtf8(
char* buffer_return,
int len)
{
int i = 0, l = 0;
char *buf;
if (len < 1) return 0;
buf = (char*) malloc((unsigned)len);
memcpy(buf, buffer_return, (unsigned)len);
while (i < len) {
unsigned int ucs;
unsigned char c, c1;
c = (unsigned char) buf[i];
if (c < 0x80) {
ucs = c;
i++;
} else if (c >= 0xA1 && c < 0xFF && len - i > 1) {
c1 = (unsigned char) buf[i + 1];
if (c1 >= 0xa1 && c1 < 0xff) {
unsigned char b[2];
b[0] = (unsigned char) c;
b[1] = (unsigned char) c1;
if (gb2312_mbtowc(NULL, &ucs, b, 2) < 1) {
ucs = '?';
}
} else {
ucs = '?';
}
i += 2;
} else {
ucs = '?';
i++;
}
l += XConvertUcsToUtf8(ucs, buffer_return + l);
}
free(buf);
return l;
}
int
XConvertEucJpToUtf8(
char* buffer_return,
int len)
{
int i = 0, l = 0;
char *buf;
if (len < 1) return 0;
buf = (char*) malloc((unsigned)len);
memcpy(buf, buffer_return, (unsigned)len);
while (i < len) {
unsigned int ucs;
unsigned char c, c1;
c = (unsigned char) buf[i];
if (c < 0x80) {
ucs = c;
i++;
} else if (c >= 0xA1 && c < 0xFF && len - i > 1) {
c1 = (unsigned char) buf[i + 1];
if (c < 0xF5 && c1 >= 0xa1) {
unsigned char b[2];
b[0] = c - 0x80;
b[1] = c1 - 0x80;
if (jisx0208_mbtowc(NULL, &ucs, b, 2) < 1) {
ucs = '?';
}
} else if (c1 >= 0xA1 && c1 < 0xFF) {
ucs = 0xE000 + 94 * (c - 0xF5) + (c1 - 0xA1);
} else {
ucs = '?';
}
i += 2;
} else if (c == 0x8E && len - i > 1) {
c1 = (unsigned char) buf[i + 1];
if (c1 >= 0xa1 && c1 <= 0xe0) {
if (jisx0201_mbtowc(NULL, &ucs, &c1, 1) != 1) {
ucs = '?';
}
} else {
ucs = '?';
}
i += 2;
} else if (c == 0x8F && len - i > 2) {
c = (unsigned char) buf[i + 1];
c1 = (unsigned char) buf[i + 2];
if (c >= 0xa1 && c < 0xff) {
if (c < 0xf5 && c1 >= 0xa1 && c1 < 0xff) {
unsigned char b[2];
b[0] = c - 0x80;
b[1] = c1 - 0x80;
if (jisx0212_mbtowc(NULL, &ucs, b, 2)
< 1)
{
ucs = '?';
}
} else {
ucs = '?';
}
} else {
if (c1 >= 0xa1 && c1 < 0xff) {
ucs = 0xe3ac + 94 * (c - 0xF5) +
(c1 - 0xA1);
} else {
ucs = '?';
}
}
i += 3;
} else {
ucs = '?';
i++;
}
l += XConvertUcsToUtf8(ucs, buffer_return + l);
}
free(buf);
return l;
}
int
XConvertEucToUtf8(
const char* locale,
char* buffer_return,
int len,
int bytes_buffer)
{
if (!locale/* || strstr(locale, "UTF") || strstr(locale, "utf")*/) {
return len;
}
if (strstr(locale, "ja")) {
return XConvertEucJpToUtf8(buffer_return, len);
} else if (strstr(locale, "Big5") || strstr(locale, "big5")) { // BIG5
return XConvertBig5ToUtf8(buffer_return, len);
} else if (strstr(locale, "zh") || strstr(locale, "chinese-")) {
if (strstr(locale, "TW") || strstr(locale, "chinese-t")) {
if (strstr(locale, "EUC") || strstr(locale, "euc") ||
strstr(locale, "chinese-t"))
{
return XConvertEucTwToUtf8(buffer_return, len);
}
return XConvertBig5ToUtf8(buffer_return, len);
}
if (strstr(locale, "EUC") || strstr(locale, "euc")) {
return XConvertEucCnToUtf8(buffer_return, len);
}
return XConvertGb2312ToUtf8(buffer_return, len);
} else if (strstr(locale, "ko")) {
return XConvertEucKrToUtf8(buffer_return, len);
}
return len;
}
int
XUtf8LookupString(
XIC ic,
XKeyPressedEvent* event,
char* buffer_return,
int bytes_buffer,
KeySym* keysym,
Status* status_return)
{
long ucs = -1;
int len;
len = XmbLookupString(ic, event, buffer_return, bytes_buffer / 5,
keysym, status_return);
if (*status_return == XBufferOverflow) {
return len * 5;
}
if (*keysym > 0 && *keysym < 0x100 && len == 1) {
if (*keysym < 0x80) {
ucs = (unsigned char)buffer_return[0];
} else {
ucs = *keysym;
}
} else if (((*keysym >= 0x100 && *keysym <= 0xf000) ||
(*keysym & 0xff000000U) == 0x01000000))
{
ucs = XKeysymToUcs(*keysym);
} else {
ucs = -2;
}
if (ucs > 0) {
len = XConvertUcsToUtf8((unsigned)ucs, (char *)buffer_return);
} else if (len > 0) {
XIM im;
if (!ic) return 0;
im = XIMOfIC(ic);
if (!im) return 0;
len = XConvertEucToUtf8(XLocaleOfIM(im),
buffer_return, len, bytes_buffer);
}
return len;
}
#endif // X11 only