more files!

SVN revision: 39974
This commit is contained in:
Carsten Haitzler 2009-04-11 08:11:47 +00:00
parent 02f87aeaf5
commit e279ba4bbe
2 changed files with 321 additions and 0 deletions

View File

@ -0,0 +1,314 @@
/* Authors:
* Tom Hacohen (tom@stsob.com)
*/
#include "../evas_intl_utils.h"
#ifdef ARABIC_SUPPORT
/* arabic contextualizing */
/* arabic input forms */
#define ARABIC_ALEPH_MADDA 0x0622
#define ARABIC_ALEPH 0x0627
#define ARABIC_BET 0x0628
#define ARABIC_TA_MARBUTA 0x0629
#define ARABIC_TAW 0x062A
#define ARABIC_TA 0x062B
#define ARABIC_GIMEL 0x062C
#define ARABIC_HETH 0x062D
#define ARABIC_HA 0x062E
#define ARABIC_DALET 0x062F
#define ARABIC_DAL 0x0630
#define ARABIC_RESH 0x0631
#define ARABIC_ZAYIN 0x0632
#define ARABIC_SHIN 0x0633
#define ARABIC_SH 0x0634
#define ARABIC_TSADE 0x0635
#define ARABIC_DAD 0x0636
#define ARABIC_TETH 0x0637
#define ARABIC_ZA 0x0638
#define ARABIC_AYIN 0x0639
#define ARABIC_GHAIN 0x063A
#define ARABIC_PE 0x0641
#define ARABIC_QOPH 0x0642
#define ARABIC_KAPH 0x0643
#define ARABIC_LAMED 0x0644
#define ARABIC_MEM 0x0645
#define ARABIC_NUN 0x0646
#define ARABIC_HE 0x0647
#define ARABIC_WAW 0x0648
#define ARABIC_ALEPH_MAQSURA 0x0649
#define ARABIC_YODH 0x064A
/* arabic contextual forms */
#define ARABIC_ISOLATED_ALEPH_MADDA 0xFE81
#define ARABIC_ISOLATED_ALEPH 0xFE8D
#define ARABIC_ISOLATED_TA_MARBUTA 0xFE93
#define ARABIC_ISOLATED_BET 0xFE8F
#define ARABIC_ISOLATED_TAW 0xFE95
#define ARABIC_ISOLATED_TA 0xFE99
#define ARABIC_ISOLATED_GIMEL 0xFE9D
#define ARABIC_ISOLATED_HETH 0xFEA1
#define ARABIC_ISOLATED_HA 0xFEA5
#define ARABIC_ISOLATED_DALET 0xFEA9
#define ARABIC_ISOLATED_DAL 0xFEAB
#define ARABIC_ISOLATED_RESH 0xFEAD
#define ARABIC_ISOLATED_ZAYIN 0xFEAF
#define ARABIC_ISOLATED_SHIN 0xFEB1
#define ARABIC_ISOLATED_SH 0xFEB5
#define ARABIC_ISOLATED_TSADE 0xFEB9
#define ARABIC_ISOLATED_DAD 0xFEBD
#define ARABIC_ISOLATED_TETH 0xFEC1
#define ARABIC_ISOLATED_ZA 0xFEC5
#define ARABIC_ISOLATED_AYIN 0xFEC9
#define ARABIC_ISOLATED_GHAIN 0xFECD
#define ARABIC_ISOLATED_PE 0xFED1
#define ARABIC_ISOLATED_QOPH 0xFED5
#define ARABIC_ISOLATED_KAPH 0xFED9
#define ARABIC_ISOLATED_LAMED 0xFEDD
#define ARABIC_ISOLATED_MEM 0xFEE1
#define ARABIC_ISOLATED_NUN 0xFEE5
#define ARABIC_ISOLATED_HE 0xFEE9
#define ARABIC_ISOLATED_WAW 0xFEED
#define ARABIC_ISOLATED_ALEPH_MAQSURA 0xFEEF
#define ARABIC_ISOLATED_YODH 0xFEF1
#define ARABIC_IS_SPECIAL_LETTER(c) ((c) == ARABIC_ISOLATED_ALEPH || \
(c) == ARABIC_ISOLATED_DALET || \
(c) == ARABIC_ISOLATED_DAL || \
(c) == ARABIC_ISOLATED_RESH || \
(c) == ARABIC_ISOLATED_ZAYIN || \
(c) == ARABIC_ISOLATED_WAW || \
(c) == ARABIC_ISOLATED_TA_MARBUTA)
/* from the first to last (including all forms, and special cases
* like aleph maqsura in some forms*/
#define ARABIC_IS_CONTEXT(c) (((c) >= ARABIC_ISOLATED_ALEPH && (c) <= ARABIC_ISOLATED_YODH + 3) || \
((c) >= ARABIC_ISOLATED_ALEPH_MADDA && (c) <= ARABIC_ISOLATED_ALEPH_MADDA + 3) || \
(c) == 0xFBE8 || \
(c) == 0xFBE9)
#define ARABIC_IS_LETTER(c) ARABIC_IS_CONTEXT(c)
/* used for arabic context logic */
/* each value is the offset from the regular char in unicode */
enum _ArabicContext {
ARABIC_CONTEXT_ISOLATED = 0,
ARABIC_CONTEXT_FINAL = 1,
ARABIC_CONTEXT_INITIAL = 2,
ARABIC_CONTEXT_MEDIAL = 3
};
typedef enum _ArabicContext ArabicContext;
static FriBidiChar
_evas_intl_arabic_isolated_to_context(FriBidiChar chr, ArabicContext context);
static int
_evas_intl_arabic_text_to_isolated(FriBidiChar *text);
static FriBidiChar
_evas_intl_arabic_general_to_isolated(FriBidiChar chr);
/* FIXME: there are issues with text that's already in context
* vowels support is needed (skip them when analysing context)*/
int
evas_intl_arabic_to_context(FriBidiChar *text)
{
int i;
int len;
int start_of_context = 1; /* assume the first is special/non arabic */
int last_is_first = 0;
int last_letter = 0;
/* check for empty string */
if (!*text)
return;
len = _evas_intl_arabic_text_to_isolated(text);
/*FIXME: make it skip vowels */
for (i = 0 ; i < len ; i++) {
if (! ARABIC_IS_LETTER(text[i])) {
/* mark so it won't be touched,
* though start formating */
if (last_letter && !start_of_context) {
ArabicContext tmp = (last_is_first) ?
ARABIC_CONTEXT_ISOLATED
:
ARABIC_CONTEXT_FINAL;
text[i-1] = _evas_intl_arabic_isolated_to_context(
last_letter,
tmp);
}
last_is_first = 0;
start_of_context = 1;
last_letter = 0;
continue;
}
/* adjust the last letter */
last_letter = text[i];
if (ARABIC_IS_SPECIAL_LETTER(text[i])) {
if (!start_of_context)
text[i] = _evas_intl_arabic_isolated_to_context(text[i], ARABIC_CONTEXT_FINAL);
/* else: leave isolated */
start_of_context = 1;
last_is_first = 0;
continue;
}
if (start_of_context) {
text[i] = _evas_intl_arabic_isolated_to_context(text[i], ARABIC_CONTEXT_INITIAL);
last_is_first = 1;
}
else {
text[i] = _evas_intl_arabic_isolated_to_context(text[i], ARABIC_CONTEXT_MEDIAL);
last_is_first = 0;
}
/* spceial chars don't get here. */
start_of_context = 0;
}
/* if it's arabic and not isolated, the last is always final */
i--;
if (last_letter && !start_of_context) {
ArabicContext tmp = (last_is_first) ? ARABIC_CONTEXT_ISOLATED : ARABIC_CONTEXT_FINAL;
/* because it's medial atm, and should be isolated */
text[i] = _evas_intl_arabic_isolated_to_context(
last_letter,
tmp);
}
return len;
}
/* I wish I could think about a simpler way to do it.
* Just match every word with it's isolated form */
static FriBidiChar
_evas_intl_arabic_general_to_isolated(FriBidiChar chr)
{
switch (chr) {
case ARABIC_ALEPH_MADDA:
return ARABIC_ISOLATED_ALEPH_MADDA;
case ARABIC_ALEPH:
return ARABIC_ISOLATED_ALEPH;
case ARABIC_TA_MARBUTA:
return ARABIC_ISOLATED_TA_MARBUTA;
case ARABIC_BET:
return ARABIC_ISOLATED_BET;
case ARABIC_TAW:
return ARABIC_ISOLATED_TAW;
case ARABIC_TA:
return ARABIC_ISOLATED_TA;
case ARABIC_GIMEL:
return ARABIC_ISOLATED_GIMEL;
case ARABIC_HETH:
return ARABIC_ISOLATED_HETH;
case ARABIC_HA:
return ARABIC_ISOLATED_HA;
case ARABIC_DALET:
return ARABIC_ISOLATED_DALET;
case ARABIC_DAL:
return ARABIC_ISOLATED_DAL;
case ARABIC_RESH:
return ARABIC_ISOLATED_RESH;
case ARABIC_ZAYIN:
return ARABIC_ISOLATED_ZAYIN;
case ARABIC_SHIN:
return ARABIC_ISOLATED_SHIN;
case ARABIC_SH:
return ARABIC_ISOLATED_SH;
case ARABIC_TSADE:
return ARABIC_ISOLATED_TSADE;
case ARABIC_DAD:
return ARABIC_ISOLATED_DAD;
case ARABIC_TETH:
return ARABIC_ISOLATED_TETH;
case ARABIC_ZA:
return ARABIC_ISOLATED_ZA;
case ARABIC_AYIN:
return ARABIC_ISOLATED_AYIN;
case ARABIC_GHAIN:
return ARABIC_ISOLATED_GHAIN;
case ARABIC_PE:
return ARABIC_ISOLATED_PE;
case ARABIC_QOPH:
return ARABIC_ISOLATED_QOPH;
case ARABIC_KAPH:
return ARABIC_ISOLATED_KAPH;
case ARABIC_LAMED:
return ARABIC_ISOLATED_LAMED;
case ARABIC_MEM:
return ARABIC_ISOLATED_MEM;
case ARABIC_NUN:
return ARABIC_ISOLATED_NUN;
case ARABIC_HE:
return ARABIC_ISOLATED_HE;
case ARABIC_WAW:
return ARABIC_ISOLATED_WAW;
case ARABIC_ALEPH_MAQSURA:
return ARABIC_ISOLATED_ALEPH_MAQSURA;
case ARABIC_YODH:
return ARABIC_ISOLATED_YODH;
default:
return chr;
}
}
static FriBidiChar
_evas_intl_arabic_isolated_to_context(FriBidiChar chr, ArabicContext context)
{
if (ARABIC_IS_SPECIAL_LETTER(chr)) {
if (context == ARABIC_CONTEXT_INITIAL)
return chr;
else
return chr + ARABIC_CONTEXT_FINAL;
}
/* HACK AROUND ALIF MAQSURA */
else if (chr == ARABIC_ISOLATED_ALEPH_MAQSURA && context > 1) {
chr = 0xFBE8; /* the initial form */
context -= 2;
}
return chr + context;
}
static int
_evas_intl_arabic_text_to_isolated(FriBidiChar *text)
{
int i=0;
while (*text) {
/* if it's not arabic/it's already in context
* it's just returned the same */
*text = _evas_intl_arabic_general_to_isolated(*text);
text++;
i++;
}
return i;
}
#endif

View File

@ -0,0 +1,7 @@
#ifndef _EVAS_INTL_ARABIC
#define _EVAS_INTL_ARABIC
int
evas_intl_arabic_to_context(FriBidiChar *text);
#endif