Fri Dec 17 16:35:31 2004 Michael Jennings (mej)

Another attempt (and failure) at UTF-8 support. Disabled until someone can help me out. If you know iconv(), please have a look at the FIXME_BLOCK starting at line 3509 of src/command.c. ---------------------------------------------------------------------- SVN revision: 12495
2004-12-17 21:37:39 +00:00 · 2004-12-17 21:37:39 +00:00 · d7f979b78b
parent 19cae1a53d
commit d7f979b78b
4 changed files with 59 additions and 10 deletions
--- a/8
+++ b/8
@ -5271,3 +5271,11 @@ The benefits of being a passenger on a long road trip...  The
 beginnings of EWMH opacity (a la xcompmgr) support and (hopefully)
 UTF-8 support.  Both as yet untested.
 ----------------------------------------------------------------------
 Fri Dec 17 16:35:31 2004                        Michael Jennings (mej)
 Another attempt (and failure) at UTF-8 support.  Disabled until
 someone can help me out.
 If you know iconv(), please have a look at the FIXME_BLOCK starting at
 line 3509 of src/command.c.
 ----------------------------------------------------------------------
--- a/src/command.c
+++ b/src/command.c
@ -111,6 +111,7 @@ static const char cvs_ident[] = "$Id$";
 # include <locale.h>
 # include <langinfo.h>
 # include <iconv.h>
 # include <wchar.h>
 #endif
 /* Eterm-specific Headers */
@ -3505,29 +3506,45 @@ main_loop(void)
            }
            D_SCREEN(("Adding %d lines (%d chars); str == %8p, cmdbuf_ptr == %8p, cmdbuf_endp == %8p\n",
                      nlines, cmdbuf_ptr - str, str, cmdbuf_ptr, cmdbuf_endp));
-#ifdef MULTI_CHARSET
+#if FIXME_BLOCK
            /* 
             * iconv() is not my friend. :-( I've tried various things
             * to make this work (including UCS2, SJIS, EUCJ, and
             * WCHAR_T), but nothing has worked.  I'm obviously
             * missing something, so if you know what, kindly throw me
             * a clue.  :-)                                       -- mej
             */
            if (!strcmp(nl_langinfo(CODESET), "UTF-8")) {
                iconv_t handle;
-                handle = iconv_open("UTF-8", "UCS2");
+                if (encoding_method != UCS2) {
                    set_multichar_encoding("utf8");
                }
                handle = iconv_open("UTF-8", "WCHAR_T");
                if (handle == SPIF_CAST_C(iconv_t) -1) {
-                    print_error("Unable to decode UTF-8 locale %s to UCS-2.  Defaulting to portable C locale.\n",
+                    print_error("Unable to decode UTF-8 locale %s to WCHAR_T.  Defaulting to portable C locale.\n",
                                setlocale(LC_ALL, ""));
                    setlocale(LC_ALL, "C");
                    scr_add_lines(str, nlines, (cmdbuf_ptr - str));
                } else {
                    char *outbuff, *pinbuff, *poutbuff;
                    wchar_t *wcbuff;
                    mbstate_t mbs;
                    size_t bufflen, outlen = 0, retval;
                    pinbuff = (char *) str;
                    bufflen = cmdbuf_ptr - str;
-                    poutbuff = outbuff = SPIF_CAST_C(char *) MALLOC(bufflen * 6);
+                    outlen = bufflen * 6;
                    poutbuff = outbuff = SPIF_CAST_C(char *) MALLOC(outlen);
                    errno = 0;
                    D_VT(("Allocated output buffer of %lu chars at %010p against input buffer of %lu\n", bufflen * 6, outbuff, bufflen));
                    print_warning("Moo:  %s\n", safe_print_string(str, bufflen));
                    retval = iconv(handle, &pinbuff, &bufflen, &poutbuff, &outlen);
                    if (retval != (size_t) -1) {
                        errno = 0;
                    }
                    if (errno == E2BIG) {
-                        print_error("My UTF-8 decode buffer was too small by %lu bytes?!", bufflen);
+                        print_error("My UTF-8 decode buffer was too small by %lu bytes?!\n", bufflen);
                    } else if (errno == EILSEQ) {
                        print_error("Illegal multibyte sequence encountered at \'%c\' (0x%02x); skipping.\n",
                                    *pinbuff, *pinbuff);
@ -3535,12 +3552,33 @@ main_loop(void)
                        pinbuff++;
                    } else if (errno == EINVAL) {
                        D_VT(("Incomplete multibyte sequence encountered.\n"));
                        print_warning("Converted %lu input chars to %lu output chars before incomplete sequence.\n", (cmdbuf_ptr - str), outlen);
                    } else {
                        print_warning("Converted %lu input chars to %lu output chars.\n", (cmdbuf_ptr - str), outlen);
                    }
-                    if (pinbuff > (char *) str) {
+                    print_warning("Moo2:  %s\n", safe_print_string(outbuff, outlen));
-                        cmdbuf_ptr = (unsigned char *) pinbuff;
+                    MEMSET(outbuff + outlen, 0, sizeof(wchar_t));
-                        scr_add_lines(str, nlines, (cmdbuf_ptr - str));
+                    wcbuff = SPIF_CAST_C(wchar_t *) outbuff;
                    MEMSET(&mbs, 0, sizeof(mbstate_t));
                    outlen = wcsrtombs(NULL, &wcbuff, 0, &mbs) + 1;
                    if (outlen > 0) {
                        outbuff = SPIF_CAST_C(char *) MALLOC(outlen);
                        outlen = wcsrtombs(outbuff, &wcbuff, outlen, &mbs);
                        if ((long)outlen >= 0) {
                            FREE(wcbuff);
                            print_error("I win!\n");
                        } else {
                            print_error("wcsrtombs() returned %ld (errno is %d (%s))\n", (unsigned long) outlen, errno, strerror(errno));
                        }
                        if (pinbuff > (char *) str) {
                            cmdbuf_ptr = (unsigned char *) pinbuff;
                            scr_add_lines(outbuff, nlines, outlen);
                        }
                    } else {
                        print_error("wcsrtombs(NULL, %10p, 0) returned %ld (errno is %d (%s))\n", wcbuff, (unsigned long) outlen, errno, strerror(errno));
                    }
                    FREE(outbuff);
                }
            } else
 #endif
--- a/src/screen.c
+++ b/src/screen.c
@ -1450,7 +1450,10 @@ set_multichar_encoding(const char *str)
 {
 #ifdef MULTI_CHARSET
    if (str && *str) {
-        if (!strcasecmp(str, "sjis")) {
+        if (!strcasecmp(str, "utf8") || !strcasecmp(str, "ucs2")) {
            encoding_method = UCS2;
            multichar_decode = latin1;
        } else if (!strcasecmp(str, "sjis")) {
            encoding_method = SJIS;
            multichar_decode = sjis2jis;
        } else if (!strcasecmp(str, "eucj") || !strcasecmp(str, "euckr") || !strcasecmp(str, "gb")) {
--- a/src/screen.h
+++ b/src/screen.h
@ -165,7 +165,7 @@ typedef enum {
  SELECTION_DONE
 } selection_op_t;
 typedef enum {
-  LATIN1 = 0, EUCJ, EUCKR = EUCJ, GB = EUCJ, SJIS, BIG5
+  LATIN1 = 0, UCS2, EUCJ, EUCKR = EUCJ, GB = EUCJ, SJIS, BIG5
 } encoding_t;
 typedef struct {
  short row, col;