eterm/src/defaultfont.c

/*
 * Setting default font and encoding according to user's locale (LC_CTYPE).
 */

#include "config.h"
#include "feature.h"

#include "font.h"
#include "defaultfont.h"

#define ENCODINGBUFLEN 100

#ifdef AUTO_ENCODING

/*
 * Table to convert from encoding names to enc_label.
 * This table is useful to normalize encoding names
 * and support various return value from nl_langinfo(3).
 *
 * The encoding names here are "truncated" names,
 * where all alphabets are uppercase and all '-' and
 * '_' are eliminated.
 */
const struct name2encoding n2e[] = {
#ifdef MULTI_CHARSET
  { "EUCJP",		ENC_EUCJ },
  { "UJIS",		ENC_EUCJ },
  { "SHIFTJIS",		ENC_SJIS },
  { "SJIS",		ENC_SJIS },
  { "EUCKR",		ENC_EUCKR },
  { "EUCCN",		ENC_GB },
  { "GB2312",		ENC_GB },
  { "GB",		ENC_GB },
  { "BIG5",		ENC_BIG5 },
  { "BIGFIVE",		ENC_BIG5 },
  { "BIG5HKSCS",	ENC_BIG5 },
  { "UTF8",		ENC_UTF8 },
#endif				/* MULTI_CHARSET */
  { "KOI8R",		ENC_KOI8R },
  { "KOI8U",		ENC_KOI8U },
  { "ISO88591",		ENC_ISO8859_1 },
  { "ISO88592",		ENC_ISO8859_2 },
  { "ISO88593",		ENC_ISO8859_3 },
  { "ISO88594",		ENC_ISO8859_4 },
  { "ISO88595",		ENC_ISO8859_5 },
  { "ISO88596",		ENC_ISO8859_6 },
  { "ISO88597",		ENC_ISO8859_7 },
  { "ISO88598",		ENC_ISO8859_8 },
  { "ISO88599",		ENC_ISO8859_9 },
  { "ISO885910",	ENC_ISO8859_10 },
  { "ISO885911",	ENC_ISO8859_11 },
  { "ISO885912",	ENC_ISO8859_12 },
  { "ISO885913",	ENC_ISO8859_13 },
  { "ISO885914",	ENC_ISO8859_14 },
  { "ISO885915",	ENC_ISO8859_15 },
  { NULL,		ENC_DUMMY }
};


/*
 * This table converts from locale names to enc_label.
 *
 * This table is used to know which encoding is used
 * as the default in the current user environment
 * (LC_CTYPE locale), since it is the standard way
 * for users to specify encoding by LANG/LC_CTYPE/LC_ALL
 * variables (i.e., LC_CTYPE locale).  Consult locale(7).
 *
 * This table is used when nl_langinfo(3) is not available
 * or it fails.
 *
 * locale names whose "encoding" part are listed in n2e[]
 * can be omitted here, because "encoding" part is checked
 * separately before l2e[] check.
 *
 * Note that longer locale names must be written earlier
 * than shorter locale names in this table, because
 * strncmp(3) is used for seek for this table.
 */
const struct name2encoding l2e[] = {
#ifdef MULTI_CHARSET
  { "ja_JP.EUC",	ENC_EUCJ },
  { "ja_JP",		ENC_EUCJ },
  { "ko_KR.EUC",	ENC_EUCKR },
  { "ko_KR",		ENC_EUCKR },
  { "zh_CN.EUC",	ENC_GB },
  { "zh_CN",		ENC_GB },
  { "zh_TW",		ENC_BIG5 },
#endif				/* MULTI_CHARSET */
  { "da",		ENC_ISO8859_1 },
  { "de",		ENC_ISO8859_1 },
  { "en",		ENC_ISO8859_1 },
  { "fi",		ENC_ISO8859_1 },
  { "fr",		ENC_ISO8859_1 },
  { "is",		ENC_ISO8859_1 },
  { "it",		ENC_ISO8859_1 },
  { "la",		ENC_ISO8859_1 },
  { "lt",		ENC_ISO8859_1 },
  { "nl",		ENC_ISO8859_1 },
  { "no",		ENC_ISO8859_1 },
  { "pt",		ENC_ISO8859_1 },
  { "sv",		ENC_ISO8859_1 },
  { "cs",		ENC_ISO8859_2 },
  { "hr",		ENC_ISO8859_2 },
  { "hu",		ENC_ISO8859_2 },
  { "la",		ENC_ISO8859_2 },
  { "lt",		ENC_ISO8859_2 },
  { "pl",		ENC_ISO8859_2 },
  { "ro",		ENC_ISO8859_2 },
  { "sk",		ENC_ISO8859_2 },
  { "sl",		ENC_ISO8859_2 },
  { "ar",		ENC_ISO8859_6 },
  { "el",		ENC_ISO8859_7 },
  { "tr",		ENC_ISO8859_9 },
  { "lt",		ENC_ISO8859_13 },
  { "lv",		ENC_ISO8859_13 },
  { "mi",		ENC_ISO8859_13 },
  { "ru",		ENC_KOI8R },	/* ISO8859-5 ? */
  { "uk",		ENC_KOI8U },
#if 0
  { "vi",		ENC_VISCII },
  { "th",		ENC_TIS620 },
#endif
  { NULL,		ENC_DUMMY }
};


/*
 *  Default font name for each language.
 *  I'd like these names edited by native speakers.
 *
 *  enc_label   -->   ENCODING_METHOD and font informations
 *                    which as needed for Rxvt to work.
 */
const struct defaultfont defaultfont[] = {
#ifdef MULTI_CHARSET
  { ENC_EUCJ,	"eucj",	DEF_EUCJ,  {NFONT_LIST_EUCJ},  {MFONT_LIST_EUCJ} },
  { ENC_SJIS,	"sjis",	DEF_EUCJ,  {NFONT_LIST_EUCJ},  {MFONT_LIST_EUCJ} },
  { ENC_GB,	"gb",	DEF_GB,    {NFONT_LIST_GB},    {MFONT_LIST_GB} },
  { ENC_BIG5,	"big5",	DEF_BIG5,  {NFONT_LIST_BIG5},  {MFONT_LIST_BIG5} },
  { ENC_EUCKR,	"euckr",DEF_EUCKR, {NFONT_LIST_EUCKR}, {MFONT_LIST_EUCKR} },
  { ENC_UTF8,	"iso-10646",DEF_10646,{NFONT_LIST_10646},{MFONT_LIST_10646} },
#endif				/* MULTI_CHARSET */
  { ENC_ISO8859_7,"noenc",DEF_7,   {NFONT_LIST_7},     {MFONT_LIST_NULL} },
  { ENC_KOI8R,	"noenc",DEF_KOI8R, {NFONT_LIST_KOI8R}, {MFONT_LIST_NULL} },
  { ENC_KOI8U,	"noenc",DEF_KOI8U, {NFONT_LIST_KOI8U}, {MFONT_LIST_NULL} },
  { ENC_DUMMY,	"noenc",DEF_DUMMY, {MFONT_LIST_NULL},  {MFONT_LIST_NULL} }
};

/* special common rule for ISO-8859-x */
const char *const defaultfont_8859[] = {
  NFONT_LIST_ISO8859X
};

/* fallback defaults */
const int def_def_idx = DEF_10646;

const char *const def_fonts[] = {
  NFONT_LIST_10646
};

#ifdef MULTI_CHARSET
const char *const def_mfonts[] = {
  MFONT_LIST_10646
};
#endif

/*----------------------------------------------------------------------*/
/* EXTPROTO */
void
eterm_default_font_locale(char ***fonts, char ***mfonts, char **mencoding, int *def_idx)
{
  char           *locale;
  char           *encoding_str = NULL;
  char            encoding_buf[ENCODINGBUFLEN];
  char           *p, *p2;
  enum enc_label  encoding = ENC_DUMMY;
  int             j, k;

  locale = setlocale(LC_CTYPE, "");
  if (locale == NULL)
    if ((locale = getenv("LC_ALL")) == NULL)
      if ((locale = getenv("LC_CTYPE")) == NULL)
	if ((locale = getenv("LANG")) == NULL)
	  locale = "C";	/* failsafe */

  /* Obtain a "normalized" name of current encoding.
   * The current encoding is available via nl_langinfo().
   * Otherwise, it comes from locale name.
   */
#ifdef HAVE_NL_LANGINFO
  encoding_str = nl_langinfo(CODESET);
#else
  encoding_str = NULL;
#endif
  if (encoding_str && *encoding_str) {
    for (j = 0; n2e[j].name; j++) {
      if (!strcmp(encoding_str, n2e[j].name)) {
	encoding = n2e[j].encoding;
	break;
      }
    }
  }

  if (encoding == ENC_DUMMY) {
    p = strchr(locale, '.');
    if (p) {
      strncpy(encoding_buf, p + 1, ENCODINGBUFLEN);
      p = strchr(encoding_buf, '@');
      if (p) *p = 0;
    } else {
      strncpy(encoding_buf, locale, ENCODINGBUFLEN);
    }
    encoding_buf[ENCODINGBUFLEN - 1] = 0;
    for (p = p2 = encoding_buf; 1; p++, p2++) {
      while (*p2 == '_' || *p2 == '-') p2++;
      if (!*p2) break;
      *p = toupper(*p2);
    }
    *p = 0;
    for (j = 0; n2e[j].name; j++) {
      if (!strcmp(encoding_buf, n2e[j].name)) {
	encoding = n2e[j].encoding;
	break;
      }
    }
  }

  /* If the conversion fails, try using "language"/"country"
   * part of the locale name.
   */
  if (encoding == ENC_DUMMY) {
    for (j = 0; l2e[j].name; j++) {
      if (!strncmp(locale, l2e[j].name, strlen(l2e[j].name))) {
	encoding = l2e[j].encoding;
	break;
      }
    }
  }

  /* Now, the encoding to be used has been determined.
   * Fonts and encoding will be determined according to the encoding.
   */
  for (j = 0; defaultfont[j].enc_label != ENC_DUMMY; j++) {
    if (encoding == defaultfont[j].enc_label) {
      *def_idx = defaultfont[j].def_idx;
#ifdef MULTI_CHARSET
      *mencoding = STRDUP(defaultfont[j].encoding_method);
#endif
      for (k = 0; k < NFONTS; k++) {
	eterm_font_add(fonts, defaultfont[j].font[k], k);
#ifdef MULTI_CHARSET
	eterm_font_add(mfonts, defaultfont[j].mfont[k], k);
#endif
      }
      return;
    }
  }

/*
 * fallback for unknown encodings.  ISO-8559-* gets special treatment
 */

#ifdef MULTI_CHARSET
  *mencoding = STRDUP("noenc");
#endif
  if (encoding >= ENC_ISO8859_1 && encoding <= ENC_ISO8859_LAST) {
    /* fallback for ISO-8859-* encodings */
    k = encoding - ENC_ISO8859_1 + 1;
    *def_idx = DEF_8859;
  } else {
    /* fallback for "C", "POSIX", and invalid locales */
    k = 0;
    *def_idx = def_def_idx;
  }

  for (j = 0; j < NFONTS; j++) {
    if (k == 0) eterm_font_add(fonts, def_fonts[j], j);
    else {
      /* couple of wasted bytes each but lots of future expansion */
      sprintf(encoding_buf, defaultfont_8859[j], k);
      eterm_font_add(fonts, encoding_buf, j);
    }
#ifdef MULTI_CHARSET
    eterm_font_add(mfonts, def_mfonts[j], j);
#endif
  }
}
#endif /* AUTO_ENCODING */