Fri Dec 17 16:35:31 2004 Michael Jennings (mej)

Another attempt (and failure) at UTF-8 support.  Disabled until
someone can help me out.

If you know iconv(), please have a look at the FIXME_BLOCK starting at
line 3509 of src/command.c.
----------------------------------------------------------------------


SVN revision: 12495
This commit is contained in:
Michael Jennings 2004-12-17 21:37:39 +00:00
parent 19cae1a53d
commit d7f979b78b
4 changed files with 59 additions and 10 deletions

View File

@ -5271,3 +5271,11 @@ The benefits of being a passenger on a long road trip... The
beginnings of EWMH opacity (a la xcompmgr) support and (hopefully)
UTF-8 support. Both as yet untested.
----------------------------------------------------------------------
Fri Dec 17 16:35:31 2004 Michael Jennings (mej)
Another attempt (and failure) at UTF-8 support. Disabled until
someone can help me out.
If you know iconv(), please have a look at the FIXME_BLOCK starting at
line 3509 of src/command.c.
----------------------------------------------------------------------

View File

@ -111,6 +111,7 @@ static const char cvs_ident[] = "$Id$";
# include <locale.h>
# include <langinfo.h>
# include <iconv.h>
# include <wchar.h>
#endif
/* Eterm-specific Headers */
@ -3505,29 +3506,45 @@ main_loop(void)
}
D_SCREEN(("Adding %d lines (%d chars); str == %8p, cmdbuf_ptr == %8p, cmdbuf_endp == %8p\n",
nlines, cmdbuf_ptr - str, str, cmdbuf_ptr, cmdbuf_endp));
#ifdef MULTI_CHARSET
#if FIXME_BLOCK
/*
* iconv() is not my friend. :-( I've tried various things
* to make this work (including UCS2, SJIS, EUCJ, and
* WCHAR_T), but nothing has worked. I'm obviously
* missing something, so if you know what, kindly throw me
* a clue. :-) -- mej
*/
if (!strcmp(nl_langinfo(CODESET), "UTF-8")) {
iconv_t handle;
handle = iconv_open("UTF-8", "UCS2");
if (encoding_method != UCS2) {
set_multichar_encoding("utf8");
}
handle = iconv_open("UTF-8", "WCHAR_T");
if (handle == SPIF_CAST_C(iconv_t) -1) {
print_error("Unable to decode UTF-8 locale %s to UCS-2. Defaulting to portable C locale.\n",
print_error("Unable to decode UTF-8 locale %s to WCHAR_T. Defaulting to portable C locale.\n",
setlocale(LC_ALL, ""));
setlocale(LC_ALL, "C");
scr_add_lines(str, nlines, (cmdbuf_ptr - str));
} else {
char *outbuff, *pinbuff, *poutbuff;
wchar_t *wcbuff;
mbstate_t mbs;
size_t bufflen, outlen = 0, retval;
pinbuff = (char *) str;
bufflen = cmdbuf_ptr - str;
poutbuff = outbuff = SPIF_CAST_C(char *) MALLOC(bufflen * 6);
outlen = bufflen * 6;
poutbuff = outbuff = SPIF_CAST_C(char *) MALLOC(outlen);
errno = 0;
D_VT(("Allocated output buffer of %lu chars at %010p against input buffer of %lu\n", bufflen * 6, outbuff, bufflen));
print_warning("Moo: %s\n", safe_print_string(str, bufflen));
retval = iconv(handle, &pinbuff, &bufflen, &poutbuff, &outlen);
if (retval != (size_t) -1) {
errno = 0;
}
if (errno == E2BIG) {
print_error("My UTF-8 decode buffer was too small by %lu bytes?!", bufflen);
print_error("My UTF-8 decode buffer was too small by %lu bytes?!\n", bufflen);
} else if (errno == EILSEQ) {
print_error("Illegal multibyte sequence encountered at \'%c\' (0x%02x); skipping.\n",
*pinbuff, *pinbuff);
@ -3535,12 +3552,33 @@ main_loop(void)
pinbuff++;
} else if (errno == EINVAL) {
D_VT(("Incomplete multibyte sequence encountered.\n"));
print_warning("Converted %lu input chars to %lu output chars before incomplete sequence.\n", (cmdbuf_ptr - str), outlen);
} else {
print_warning("Converted %lu input chars to %lu output chars.\n", (cmdbuf_ptr - str), outlen);
}
if (pinbuff > (char *) str) {
cmdbuf_ptr = (unsigned char *) pinbuff;
scr_add_lines(str, nlines, (cmdbuf_ptr - str));
print_warning("Moo2: %s\n", safe_print_string(outbuff, outlen));
MEMSET(outbuff + outlen, 0, sizeof(wchar_t));
wcbuff = SPIF_CAST_C(wchar_t *) outbuff;
MEMSET(&mbs, 0, sizeof(mbstate_t));
outlen = wcsrtombs(NULL, &wcbuff, 0, &mbs) + 1;
if (outlen > 0) {
outbuff = SPIF_CAST_C(char *) MALLOC(outlen);
outlen = wcsrtombs(outbuff, &wcbuff, outlen, &mbs);
if ((long)outlen >= 0) {
FREE(wcbuff);
print_error("I win!\n");
} else {
print_error("wcsrtombs() returned %ld (errno is %d (%s))\n", (unsigned long) outlen, errno, strerror(errno));
}
if (pinbuff > (char *) str) {
cmdbuf_ptr = (unsigned char *) pinbuff;
scr_add_lines(outbuff, nlines, outlen);
}
} else {
print_error("wcsrtombs(NULL, %10p, 0) returned %ld (errno is %d (%s))\n", wcbuff, (unsigned long) outlen, errno, strerror(errno));
}
FREE(outbuff);
}
} else
#endif

View File

@ -1450,7 +1450,10 @@ set_multichar_encoding(const char *str)
{
#ifdef MULTI_CHARSET
if (str && *str) {
if (!strcasecmp(str, "sjis")) {
if (!strcasecmp(str, "utf8") || !strcasecmp(str, "ucs2")) {
encoding_method = UCS2;
multichar_decode = latin1;
} else if (!strcasecmp(str, "sjis")) {
encoding_method = SJIS;
multichar_decode = sjis2jis;
} else if (!strcasecmp(str, "eucj") || !strcasecmp(str, "euckr") || !strcasecmp(str, "gb")) {

View File

@ -165,7 +165,7 @@ typedef enum {
SELECTION_DONE
} selection_op_t;
typedef enum {
LATIN1 = 0, EUCJ, EUCKR = EUCJ, GB = EUCJ, SJIS, BIG5
LATIN1 = 0, UCS2, EUCJ, EUCKR = EUCJ, GB = EUCJ, SJIS, BIG5
} encoding_t;
typedef struct {
short row, col;