forked from enlightenment/efl
parent
02f87aeaf5
commit
e279ba4bbe
|
@ -0,0 +1,314 @@
|
||||||
|
/* Authors:
|
||||||
|
* Tom Hacohen (tom@stsob.com)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "../evas_intl_utils.h"
|
||||||
|
|
||||||
|
#ifdef ARABIC_SUPPORT
|
||||||
|
/* arabic contextualizing */
|
||||||
|
|
||||||
|
/* arabic input forms */
|
||||||
|
#define ARABIC_ALEPH_MADDA 0x0622
|
||||||
|
#define ARABIC_ALEPH 0x0627
|
||||||
|
#define ARABIC_BET 0x0628
|
||||||
|
#define ARABIC_TA_MARBUTA 0x0629
|
||||||
|
#define ARABIC_TAW 0x062A
|
||||||
|
#define ARABIC_TA 0x062B
|
||||||
|
#define ARABIC_GIMEL 0x062C
|
||||||
|
#define ARABIC_HETH 0x062D
|
||||||
|
#define ARABIC_HA 0x062E
|
||||||
|
#define ARABIC_DALET 0x062F
|
||||||
|
#define ARABIC_DAL 0x0630
|
||||||
|
#define ARABIC_RESH 0x0631
|
||||||
|
#define ARABIC_ZAYIN 0x0632
|
||||||
|
#define ARABIC_SHIN 0x0633
|
||||||
|
#define ARABIC_SH 0x0634
|
||||||
|
#define ARABIC_TSADE 0x0635
|
||||||
|
#define ARABIC_DAD 0x0636
|
||||||
|
#define ARABIC_TETH 0x0637
|
||||||
|
#define ARABIC_ZA 0x0638
|
||||||
|
#define ARABIC_AYIN 0x0639
|
||||||
|
#define ARABIC_GHAIN 0x063A
|
||||||
|
#define ARABIC_PE 0x0641
|
||||||
|
#define ARABIC_QOPH 0x0642
|
||||||
|
#define ARABIC_KAPH 0x0643
|
||||||
|
#define ARABIC_LAMED 0x0644
|
||||||
|
#define ARABIC_MEM 0x0645
|
||||||
|
#define ARABIC_NUN 0x0646
|
||||||
|
#define ARABIC_HE 0x0647
|
||||||
|
#define ARABIC_WAW 0x0648
|
||||||
|
#define ARABIC_ALEPH_MAQSURA 0x0649
|
||||||
|
#define ARABIC_YODH 0x064A
|
||||||
|
/* arabic contextual forms */
|
||||||
|
#define ARABIC_ISOLATED_ALEPH_MADDA 0xFE81
|
||||||
|
#define ARABIC_ISOLATED_ALEPH 0xFE8D
|
||||||
|
#define ARABIC_ISOLATED_TA_MARBUTA 0xFE93
|
||||||
|
#define ARABIC_ISOLATED_BET 0xFE8F
|
||||||
|
#define ARABIC_ISOLATED_TAW 0xFE95
|
||||||
|
#define ARABIC_ISOLATED_TA 0xFE99
|
||||||
|
#define ARABIC_ISOLATED_GIMEL 0xFE9D
|
||||||
|
#define ARABIC_ISOLATED_HETH 0xFEA1
|
||||||
|
#define ARABIC_ISOLATED_HA 0xFEA5
|
||||||
|
#define ARABIC_ISOLATED_DALET 0xFEA9
|
||||||
|
#define ARABIC_ISOLATED_DAL 0xFEAB
|
||||||
|
#define ARABIC_ISOLATED_RESH 0xFEAD
|
||||||
|
#define ARABIC_ISOLATED_ZAYIN 0xFEAF
|
||||||
|
#define ARABIC_ISOLATED_SHIN 0xFEB1
|
||||||
|
#define ARABIC_ISOLATED_SH 0xFEB5
|
||||||
|
#define ARABIC_ISOLATED_TSADE 0xFEB9
|
||||||
|
#define ARABIC_ISOLATED_DAD 0xFEBD
|
||||||
|
#define ARABIC_ISOLATED_TETH 0xFEC1
|
||||||
|
#define ARABIC_ISOLATED_ZA 0xFEC5
|
||||||
|
#define ARABIC_ISOLATED_AYIN 0xFEC9
|
||||||
|
#define ARABIC_ISOLATED_GHAIN 0xFECD
|
||||||
|
#define ARABIC_ISOLATED_PE 0xFED1
|
||||||
|
#define ARABIC_ISOLATED_QOPH 0xFED5
|
||||||
|
#define ARABIC_ISOLATED_KAPH 0xFED9
|
||||||
|
#define ARABIC_ISOLATED_LAMED 0xFEDD
|
||||||
|
#define ARABIC_ISOLATED_MEM 0xFEE1
|
||||||
|
#define ARABIC_ISOLATED_NUN 0xFEE5
|
||||||
|
#define ARABIC_ISOLATED_HE 0xFEE9
|
||||||
|
#define ARABIC_ISOLATED_WAW 0xFEED
|
||||||
|
#define ARABIC_ISOLATED_ALEPH_MAQSURA 0xFEEF
|
||||||
|
#define ARABIC_ISOLATED_YODH 0xFEF1
|
||||||
|
|
||||||
|
#define ARABIC_IS_SPECIAL_LETTER(c) ((c) == ARABIC_ISOLATED_ALEPH || \
|
||||||
|
(c) == ARABIC_ISOLATED_DALET || \
|
||||||
|
(c) == ARABIC_ISOLATED_DAL || \
|
||||||
|
(c) == ARABIC_ISOLATED_RESH || \
|
||||||
|
(c) == ARABIC_ISOLATED_ZAYIN || \
|
||||||
|
(c) == ARABIC_ISOLATED_WAW || \
|
||||||
|
(c) == ARABIC_ISOLATED_TA_MARBUTA)
|
||||||
|
/* from the first to last (including all forms, and special cases
|
||||||
|
* like aleph maqsura in some forms*/
|
||||||
|
#define ARABIC_IS_CONTEXT(c) (((c) >= ARABIC_ISOLATED_ALEPH && (c) <= ARABIC_ISOLATED_YODH + 3) || \
|
||||||
|
((c) >= ARABIC_ISOLATED_ALEPH_MADDA && (c) <= ARABIC_ISOLATED_ALEPH_MADDA + 3) || \
|
||||||
|
(c) == 0xFBE8 || \
|
||||||
|
(c) == 0xFBE9)
|
||||||
|
#define ARABIC_IS_LETTER(c) ARABIC_IS_CONTEXT(c)
|
||||||
|
/* used for arabic context logic */
|
||||||
|
/* each value is the offset from the regular char in unicode */
|
||||||
|
enum _ArabicContext {
|
||||||
|
ARABIC_CONTEXT_ISOLATED = 0,
|
||||||
|
ARABIC_CONTEXT_FINAL = 1,
|
||||||
|
ARABIC_CONTEXT_INITIAL = 2,
|
||||||
|
ARABIC_CONTEXT_MEDIAL = 3
|
||||||
|
};
|
||||||
|
typedef enum _ArabicContext ArabicContext;
|
||||||
|
|
||||||
|
static FriBidiChar
|
||||||
|
_evas_intl_arabic_isolated_to_context(FriBidiChar chr, ArabicContext context);
|
||||||
|
|
||||||
|
static int
|
||||||
|
_evas_intl_arabic_text_to_isolated(FriBidiChar *text);
|
||||||
|
|
||||||
|
static FriBidiChar
|
||||||
|
_evas_intl_arabic_general_to_isolated(FriBidiChar chr);
|
||||||
|
|
||||||
|
/* FIXME: there are issues with text that's already in context
|
||||||
|
* vowels support is needed (skip them when analysing context)*/
|
||||||
|
int
|
||||||
|
evas_intl_arabic_to_context(FriBidiChar *text)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int len;
|
||||||
|
int start_of_context = 1; /* assume the first is special/non arabic */
|
||||||
|
int last_is_first = 0;
|
||||||
|
int last_letter = 0;
|
||||||
|
|
||||||
|
/* check for empty string */
|
||||||
|
if (!*text)
|
||||||
|
return;
|
||||||
|
|
||||||
|
len = _evas_intl_arabic_text_to_isolated(text);
|
||||||
|
/*FIXME: make it skip vowels */
|
||||||
|
for (i = 0 ; i < len ; i++) {
|
||||||
|
|
||||||
|
if (! ARABIC_IS_LETTER(text[i])) {
|
||||||
|
/* mark so it won't be touched,
|
||||||
|
* though start formating */
|
||||||
|
if (last_letter && !start_of_context) {
|
||||||
|
ArabicContext tmp = (last_is_first) ?
|
||||||
|
ARABIC_CONTEXT_ISOLATED
|
||||||
|
:
|
||||||
|
ARABIC_CONTEXT_FINAL;
|
||||||
|
text[i-1] = _evas_intl_arabic_isolated_to_context(
|
||||||
|
last_letter,
|
||||||
|
tmp);
|
||||||
|
|
||||||
|
}
|
||||||
|
last_is_first = 0;
|
||||||
|
start_of_context = 1;
|
||||||
|
last_letter = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* adjust the last letter */
|
||||||
|
last_letter = text[i];
|
||||||
|
if (ARABIC_IS_SPECIAL_LETTER(text[i])) {
|
||||||
|
if (!start_of_context)
|
||||||
|
text[i] = _evas_intl_arabic_isolated_to_context(text[i], ARABIC_CONTEXT_FINAL);
|
||||||
|
/* else: leave isolated */
|
||||||
|
|
||||||
|
start_of_context = 1;
|
||||||
|
last_is_first = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (start_of_context) {
|
||||||
|
text[i] = _evas_intl_arabic_isolated_to_context(text[i], ARABIC_CONTEXT_INITIAL);
|
||||||
|
last_is_first = 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
text[i] = _evas_intl_arabic_isolated_to_context(text[i], ARABIC_CONTEXT_MEDIAL);
|
||||||
|
last_is_first = 0;
|
||||||
|
}
|
||||||
|
/* spceial chars don't get here. */
|
||||||
|
start_of_context = 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
/* if it's arabic and not isolated, the last is always final */
|
||||||
|
i--;
|
||||||
|
if (last_letter && !start_of_context) {
|
||||||
|
ArabicContext tmp = (last_is_first) ? ARABIC_CONTEXT_ISOLATED : ARABIC_CONTEXT_FINAL;
|
||||||
|
/* because it's medial atm, and should be isolated */
|
||||||
|
text[i] = _evas_intl_arabic_isolated_to_context(
|
||||||
|
last_letter,
|
||||||
|
tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* I wish I could think about a simpler way to do it.
|
||||||
|
* Just match every word with it's isolated form */
|
||||||
|
static FriBidiChar
|
||||||
|
_evas_intl_arabic_general_to_isolated(FriBidiChar chr)
|
||||||
|
{
|
||||||
|
switch (chr) {
|
||||||
|
case ARABIC_ALEPH_MADDA:
|
||||||
|
return ARABIC_ISOLATED_ALEPH_MADDA;
|
||||||
|
|
||||||
|
case ARABIC_ALEPH:
|
||||||
|
return ARABIC_ISOLATED_ALEPH;
|
||||||
|
|
||||||
|
case ARABIC_TA_MARBUTA:
|
||||||
|
return ARABIC_ISOLATED_TA_MARBUTA;
|
||||||
|
case ARABIC_BET:
|
||||||
|
return ARABIC_ISOLATED_BET;
|
||||||
|
|
||||||
|
case ARABIC_TAW:
|
||||||
|
return ARABIC_ISOLATED_TAW;
|
||||||
|
|
||||||
|
case ARABIC_TA:
|
||||||
|
return ARABIC_ISOLATED_TA;
|
||||||
|
|
||||||
|
case ARABIC_GIMEL:
|
||||||
|
return ARABIC_ISOLATED_GIMEL;
|
||||||
|
|
||||||
|
case ARABIC_HETH:
|
||||||
|
return ARABIC_ISOLATED_HETH;
|
||||||
|
|
||||||
|
case ARABIC_HA:
|
||||||
|
return ARABIC_ISOLATED_HA;
|
||||||
|
|
||||||
|
case ARABIC_DALET:
|
||||||
|
return ARABIC_ISOLATED_DALET;
|
||||||
|
|
||||||
|
case ARABIC_DAL:
|
||||||
|
return ARABIC_ISOLATED_DAL;
|
||||||
|
|
||||||
|
case ARABIC_RESH:
|
||||||
|
return ARABIC_ISOLATED_RESH;
|
||||||
|
|
||||||
|
case ARABIC_ZAYIN:
|
||||||
|
return ARABIC_ISOLATED_ZAYIN;
|
||||||
|
|
||||||
|
case ARABIC_SHIN:
|
||||||
|
return ARABIC_ISOLATED_SHIN;
|
||||||
|
|
||||||
|
case ARABIC_SH:
|
||||||
|
return ARABIC_ISOLATED_SH;
|
||||||
|
|
||||||
|
case ARABIC_TSADE:
|
||||||
|
return ARABIC_ISOLATED_TSADE;
|
||||||
|
|
||||||
|
case ARABIC_DAD:
|
||||||
|
return ARABIC_ISOLATED_DAD;
|
||||||
|
|
||||||
|
case ARABIC_TETH:
|
||||||
|
return ARABIC_ISOLATED_TETH;
|
||||||
|
|
||||||
|
case ARABIC_ZA:
|
||||||
|
return ARABIC_ISOLATED_ZA;
|
||||||
|
|
||||||
|
case ARABIC_AYIN:
|
||||||
|
return ARABIC_ISOLATED_AYIN;
|
||||||
|
|
||||||
|
case ARABIC_GHAIN:
|
||||||
|
return ARABIC_ISOLATED_GHAIN;
|
||||||
|
|
||||||
|
case ARABIC_PE:
|
||||||
|
return ARABIC_ISOLATED_PE;
|
||||||
|
|
||||||
|
case ARABIC_QOPH:
|
||||||
|
return ARABIC_ISOLATED_QOPH;
|
||||||
|
|
||||||
|
case ARABIC_KAPH:
|
||||||
|
return ARABIC_ISOLATED_KAPH;
|
||||||
|
|
||||||
|
case ARABIC_LAMED:
|
||||||
|
return ARABIC_ISOLATED_LAMED;
|
||||||
|
|
||||||
|
case ARABIC_MEM:
|
||||||
|
return ARABIC_ISOLATED_MEM;
|
||||||
|
|
||||||
|
case ARABIC_NUN:
|
||||||
|
return ARABIC_ISOLATED_NUN;
|
||||||
|
|
||||||
|
case ARABIC_HE:
|
||||||
|
return ARABIC_ISOLATED_HE;
|
||||||
|
|
||||||
|
case ARABIC_WAW:
|
||||||
|
return ARABIC_ISOLATED_WAW;
|
||||||
|
|
||||||
|
case ARABIC_ALEPH_MAQSURA:
|
||||||
|
return ARABIC_ISOLATED_ALEPH_MAQSURA;
|
||||||
|
|
||||||
|
case ARABIC_YODH:
|
||||||
|
return ARABIC_ISOLATED_YODH;
|
||||||
|
default:
|
||||||
|
return chr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static FriBidiChar
|
||||||
|
_evas_intl_arabic_isolated_to_context(FriBidiChar chr, ArabicContext context)
|
||||||
|
{
|
||||||
|
if (ARABIC_IS_SPECIAL_LETTER(chr)) {
|
||||||
|
if (context == ARABIC_CONTEXT_INITIAL)
|
||||||
|
return chr;
|
||||||
|
else
|
||||||
|
return chr + ARABIC_CONTEXT_FINAL;
|
||||||
|
}
|
||||||
|
/* HACK AROUND ALIF MAQSURA */
|
||||||
|
else if (chr == ARABIC_ISOLATED_ALEPH_MAQSURA && context > 1) {
|
||||||
|
chr = 0xFBE8; /* the initial form */
|
||||||
|
context -= 2;
|
||||||
|
}
|
||||||
|
return chr + context;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
_evas_intl_arabic_text_to_isolated(FriBidiChar *text)
|
||||||
|
{
|
||||||
|
int i=0;
|
||||||
|
while (*text) {
|
||||||
|
/* if it's not arabic/it's already in context
|
||||||
|
* it's just returned the same */
|
||||||
|
*text = _evas_intl_arabic_general_to_isolated(*text);
|
||||||
|
text++;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
#endif
|
|
@ -0,0 +1,7 @@
|
||||||
|
#ifndef _EVAS_INTL_ARABIC
|
||||||
|
#define _EVAS_INTL_ARABIC
|
||||||
|
|
||||||
|
int
|
||||||
|
evas_intl_arabic_to_context(FriBidiChar *text);
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in New Issue