Fri Dec 17 16:35:31 2004 Michael Jennings (mej)

Another attempt (and failure) at UTF-8 support.  Disabled until
someone can help me out.

If you know iconv(), please have a look at the FIXME_BLOCK starting at
line 3509 of src/command.c.
----------------------------------------------------------------------


SVN revision: 12495
This commit is contained in:
Michael Jennings 2004-12-17 21:37:39 +00:00
parent 19cae1a53d
commit d7f979b78b
4 changed files with 59 additions and 10 deletions

View File

@ -5271,3 +5271,11 @@ The benefits of being a passenger on a long road trip... The
beginnings of EWMH opacity (a la xcompmgr) support and (hopefully) beginnings of EWMH opacity (a la xcompmgr) support and (hopefully)
UTF-8 support. Both as yet untested. UTF-8 support. Both as yet untested.
---------------------------------------------------------------------- ----------------------------------------------------------------------
Fri Dec 17 16:35:31 2004 Michael Jennings (mej)
Another attempt (and failure) at UTF-8 support. Disabled until
someone can help me out.
If you know iconv(), please have a look at the FIXME_BLOCK starting at
line 3509 of src/command.c.
----------------------------------------------------------------------

View File

@ -111,6 +111,7 @@ static const char cvs_ident[] = "$Id$";
# include <locale.h> # include <locale.h>
# include <langinfo.h> # include <langinfo.h>
# include <iconv.h> # include <iconv.h>
# include <wchar.h>
#endif #endif
/* Eterm-specific Headers */ /* Eterm-specific Headers */
@ -3505,29 +3506,45 @@ main_loop(void)
} }
D_SCREEN(("Adding %d lines (%d chars); str == %8p, cmdbuf_ptr == %8p, cmdbuf_endp == %8p\n", D_SCREEN(("Adding %d lines (%d chars); str == %8p, cmdbuf_ptr == %8p, cmdbuf_endp == %8p\n",
nlines, cmdbuf_ptr - str, str, cmdbuf_ptr, cmdbuf_endp)); nlines, cmdbuf_ptr - str, str, cmdbuf_ptr, cmdbuf_endp));
#ifdef MULTI_CHARSET #if FIXME_BLOCK
/*
* iconv() is not my friend. :-( I've tried various things
* to make this work (including UCS2, SJIS, EUCJ, and
* WCHAR_T), but nothing has worked. I'm obviously
* missing something, so if you know what, kindly throw me
* a clue. :-) -- mej
*/
if (!strcmp(nl_langinfo(CODESET), "UTF-8")) { if (!strcmp(nl_langinfo(CODESET), "UTF-8")) {
iconv_t handle; iconv_t handle;
handle = iconv_open("UTF-8", "UCS2"); if (encoding_method != UCS2) {
set_multichar_encoding("utf8");
}
handle = iconv_open("UTF-8", "WCHAR_T");
if (handle == SPIF_CAST_C(iconv_t) -1) { if (handle == SPIF_CAST_C(iconv_t) -1) {
print_error("Unable to decode UTF-8 locale %s to UCS-2. Defaulting to portable C locale.\n", print_error("Unable to decode UTF-8 locale %s to WCHAR_T. Defaulting to portable C locale.\n",
setlocale(LC_ALL, "")); setlocale(LC_ALL, ""));
setlocale(LC_ALL, "C"); setlocale(LC_ALL, "C");
scr_add_lines(str, nlines, (cmdbuf_ptr - str));
} else { } else {
char *outbuff, *pinbuff, *poutbuff; char *outbuff, *pinbuff, *poutbuff;
wchar_t *wcbuff;
mbstate_t mbs;
size_t bufflen, outlen = 0, retval; size_t bufflen, outlen = 0, retval;
pinbuff = (char *) str; pinbuff = (char *) str;
bufflen = cmdbuf_ptr - str; bufflen = cmdbuf_ptr - str;
poutbuff = outbuff = SPIF_CAST_C(char *) MALLOC(bufflen * 6); outlen = bufflen * 6;
poutbuff = outbuff = SPIF_CAST_C(char *) MALLOC(outlen);
errno = 0; errno = 0;
D_VT(("Allocated output buffer of %lu chars at %010p against input buffer of %lu\n", bufflen * 6, outbuff, bufflen));
print_warning("Moo: %s\n", safe_print_string(str, bufflen));
retval = iconv(handle, &pinbuff, &bufflen, &poutbuff, &outlen); retval = iconv(handle, &pinbuff, &bufflen, &poutbuff, &outlen);
if (retval != (size_t) -1) { if (retval != (size_t) -1) {
errno = 0; errno = 0;
} }
if (errno == E2BIG) { if (errno == E2BIG) {
print_error("My UTF-8 decode buffer was too small by %lu bytes?!", bufflen); print_error("My UTF-8 decode buffer was too small by %lu bytes?!\n", bufflen);
} else if (errno == EILSEQ) { } else if (errno == EILSEQ) {
print_error("Illegal multibyte sequence encountered at \'%c\' (0x%02x); skipping.\n", print_error("Illegal multibyte sequence encountered at \'%c\' (0x%02x); skipping.\n",
*pinbuff, *pinbuff); *pinbuff, *pinbuff);
@ -3535,12 +3552,33 @@ main_loop(void)
pinbuff++; pinbuff++;
} else if (errno == EINVAL) { } else if (errno == EINVAL) {
D_VT(("Incomplete multibyte sequence encountered.\n")); D_VT(("Incomplete multibyte sequence encountered.\n"));
print_warning("Converted %lu input chars to %lu output chars before incomplete sequence.\n", (cmdbuf_ptr - str), outlen);
} else {
print_warning("Converted %lu input chars to %lu output chars.\n", (cmdbuf_ptr - str), outlen);
} }
if (pinbuff > (char *) str) { print_warning("Moo2: %s\n", safe_print_string(outbuff, outlen));
cmdbuf_ptr = (unsigned char *) pinbuff; MEMSET(outbuff + outlen, 0, sizeof(wchar_t));
scr_add_lines(str, nlines, (cmdbuf_ptr - str)); wcbuff = SPIF_CAST_C(wchar_t *) outbuff;
MEMSET(&mbs, 0, sizeof(mbstate_t));
outlen = wcsrtombs(NULL, &wcbuff, 0, &mbs) + 1;
if (outlen > 0) {
outbuff = SPIF_CAST_C(char *) MALLOC(outlen);
outlen = wcsrtombs(outbuff, &wcbuff, outlen, &mbs);
if ((long)outlen >= 0) {
FREE(wcbuff);
print_error("I win!\n");
} else {
print_error("wcsrtombs() returned %ld (errno is %d (%s))\n", (unsigned long) outlen, errno, strerror(errno));
}
if (pinbuff > (char *) str) {
cmdbuf_ptr = (unsigned char *) pinbuff;
scr_add_lines(outbuff, nlines, outlen);
}
} else {
print_error("wcsrtombs(NULL, %10p, 0) returned %ld (errno is %d (%s))\n", wcbuff, (unsigned long) outlen, errno, strerror(errno));
} }
FREE(outbuff);
} }
} else } else
#endif #endif

View File

@ -1450,7 +1450,10 @@ set_multichar_encoding(const char *str)
{ {
#ifdef MULTI_CHARSET #ifdef MULTI_CHARSET
if (str && *str) { if (str && *str) {
if (!strcasecmp(str, "sjis")) { if (!strcasecmp(str, "utf8") || !strcasecmp(str, "ucs2")) {
encoding_method = UCS2;
multichar_decode = latin1;
} else if (!strcasecmp(str, "sjis")) {
encoding_method = SJIS; encoding_method = SJIS;
multichar_decode = sjis2jis; multichar_decode = sjis2jis;
} else if (!strcasecmp(str, "eucj") || !strcasecmp(str, "euckr") || !strcasecmp(str, "gb")) { } else if (!strcasecmp(str, "eucj") || !strcasecmp(str, "euckr") || !strcasecmp(str, "gb")) {

View File

@ -165,7 +165,7 @@ typedef enum {
SELECTION_DONE SELECTION_DONE
} selection_op_t; } selection_op_t;
typedef enum { typedef enum {
LATIN1 = 0, EUCJ, EUCKR = EUCJ, GB = EUCJ, SJIS, BIG5 LATIN1 = 0, UCS2, EUCJ, EUCKR = EUCJ, GB = EUCJ, SJIS, BIG5
} encoding_t; } encoding_t;
typedef struct { typedef struct {
short row, col; short row, col;