evas textblock: add/apply cursor cluster APIs based on grapheme cluster

Summary:
Add a feature for moving cursor over a grapheme cluster.
It is applied to edje_entry.c and elm_entry.c for improving
cursor handling just like other modern text editors. ex) gedit
The patch on Evas needs to update libunibreak library.
So, the patch will update libunibreak, too.
@feature

Test Plan:
1. Put "ഹലോ" in your entry.
2. Your cursor can reach at the end of text from the beginning
   only in 2 right key event with this feature.

Reviewers: raster, cedric, jpeg, herdsman, zmike, devilhorns

Reviewed By: herdsman, zmike

Subscribers: #reviewers, #committers, zmike, bowonryu, woohyun

Tags: #efl

Differential Revision: https://phab.enlightenment.org/D5490
This commit is contained in:
Youngbok Shin 2018-08-20 07:21:53 -04:00 committed by Mike Blumenkrantz
parent 8da56ac873
commit 517018e008
26 changed files with 2295 additions and 103 deletions

View File

@ -193,7 +193,10 @@ static_libs/libunibreak/linebreak.h \
static_libs/libunibreak/linebreakdef.h \
static_libs/libunibreak/wordbreakdef.h \
static_libs/libunibreak/wordbreak.h \
static_libs/libunibreak/wordbreakdata.c
static_libs/libunibreak/wordbreakdata.c \
static_libs/libunibreak/graphemebreak.h \
static_libs/libunibreak/graphemebreakdef.h \
static_libs/libunibreak/graphemebreakdata.c
# Linebreak
lib_evas_libevas_la_SOURCES = \
@ -202,7 +205,8 @@ static_libs/libunibreak/unibreakdef.c \
static_libs/libunibreak/linebreak.c \
static_libs/libunibreak/linebreakdata.c \
static_libs/libunibreak/linebreakdef.c \
static_libs/libunibreak/wordbreak.c
static_libs/libunibreak/wordbreak.c \
static_libs/libunibreak/graphemebreak.c
# Main
lib_evas_libevas_la_SOURCES += \

View File

@ -536,7 +536,7 @@ _curs_jump_line(Evas_Textblock_Cursor *c, Evas_Object *o, Entry *en, int ln)
if (!evas_object_textblock_line_number_geometry_get(o, ln, &lx, &ly, &lw, &lh))
return EINA_FALSE;
if (evas_textblock_cursor_char_coord_set(c, cx, ly + (lh / 2)))
if (evas_textblock_cursor_cluster_coord_set(c, cx, ly + (lh / 2)))
return EINA_TRUE;
evas_textblock_cursor_line_set(c, ln);
if (cx < (lx + (lw / 2)))
@ -1607,24 +1607,33 @@ _delete_emit(Edje *ed, Evas_Textblock_Cursor *c, Entry *en, size_t pos,
ERR("Running very low on memory");
return;
}
char *tmp = evas_textblock_cursor_content_get(c);
char *tmp = NULL;
info->insert = EINA_FALSE;
if (backspace)
{
info->change.del.start = pos - 1;
info->change.del.end = pos;
tmp = evas_textblock_cursor_content_get(c);
evas_textblock_cursor_char_delete(c);
}
else
{
info->change.del.start = pos + 1;
Evas_Textblock_Cursor *cc = evas_object_textblock_cursor_new(en->rp->object);
evas_textblock_cursor_copy(c, cc);
evas_textblock_cursor_cluster_next(cc);
info->change.del.start = evas_textblock_cursor_pos_get(cc);
info->change.del.end = pos;
tmp = evas_textblock_cursor_range_text_get(c, cc, EVAS_TEXTBLOCK_TEXT_MARKUP);
evas_textblock_cursor_range_delete(c, cc);
evas_textblock_cursor_free(cc);
}
info->change.del.content = eina_stringshare_add(tmp);
if (tmp) free(tmp);
evas_textblock_cursor_char_delete(c);
_edje_emit(ed, "entry,changed", en->rp->part->name);
_edje_emit_full(ed, "entry,changed,user", en->rp->part->name,
info, _free_entry_change_info);
@ -1855,7 +1864,7 @@ _edje_key_down_cb(void *data, Evas *e EINA_UNUSED, Evas_Object *obj EINA_UNUSED,
}
}
}
if (evas_textblock_cursor_char_prev(en->cursor))
if (evas_textblock_cursor_cluster_prev(en->cursor))
ev->event_flags |= EVAS_EVENT_FLAG_ON_HOLD;
#if defined(__APPLE__) && defined(__MACH__)
if (altgr) evas_textblock_cursor_word_start(en->cursor);
@ -1903,7 +1912,7 @@ _edje_key_down_cb(void *data, Evas *e EINA_UNUSED, Evas_Object *obj EINA_UNUSED,
#else
if (control) evas_textblock_cursor_word_end(en->cursor);
#endif
if (evas_textblock_cursor_char_next(en->cursor))
if (evas_textblock_cursor_cluster_next(en->cursor))
ev->event_flags |= EVAS_EVENT_FLAG_ON_HOLD;
if (en->select_allow)
{
@ -1921,7 +1930,7 @@ _edje_key_down_cb(void *data, Evas *e EINA_UNUSED, Evas_Object *obj EINA_UNUSED,
// del to start of previous word
_sel_start(en->cursor, rp->object, en);
evas_textblock_cursor_char_prev(en->cursor);
evas_textblock_cursor_cluster_prev(en->cursor);
evas_textblock_cursor_word_start(en->cursor);
_sel_preextend(ed, en->cursor, rp->object, en);
@ -1961,7 +1970,7 @@ _edje_key_down_cb(void *data, Evas *e EINA_UNUSED, Evas_Object *obj EINA_UNUSED,
_sel_start(en->cursor, rp->object, en);
evas_textblock_cursor_word_end(en->cursor);
evas_textblock_cursor_char_next(en->cursor);
evas_textblock_cursor_cluster_next(en->cursor);
_sel_extend(ed, en->cursor, rp->object, en);
@ -2400,7 +2409,7 @@ _edje_key_up_cb(void *data, Evas *e EINA_UNUSED, Evas_Object *obj EINA_UNUSED, v
}
static Evas_Textblock_Cursor *
_edje_cursor_char_coord_set(Edje_Real_Part *rp, Evas_Coord canvasx, Evas_Coord canvasy, Evas_Coord *cx, Evas_Coord *cy)
_edje_cursor_cluster_coord_set(Edje_Real_Part *rp, Evas_Coord canvasx, Evas_Coord canvasy, Evas_Coord *cx, Evas_Coord *cy)
{
Entry *en;
Evas_Coord x, y, lh = 0, cly = 0;
@ -2432,7 +2441,7 @@ _edje_cursor_char_coord_set(Edje_Real_Part *rp, Evas_Coord canvasx, Evas_Coord c
evas_textblock_cursor_free(line_cur);
/* No need to check return value if not able to set the char coord Textblock
* will take care */
evas_textblock_cursor_char_coord_set(en->cursor, *cx, *cy);
evas_textblock_cursor_cluster_coord_set(en->cursor, *cx, *cy);
return tc;
}
@ -2530,7 +2539,7 @@ _edje_part_mouse_down_cb(void *data, Evas *e EINA_UNUSED, Evas_Object *obj EINA_
else
{
evas_textblock_cursor_word_end(en->cursor);
evas_textblock_cursor_char_next(en->cursor);
evas_textblock_cursor_cluster_next(en->cursor);
}
_sel_extend(en->ed, en->cursor, rp->object, en);
}
@ -2544,13 +2553,13 @@ _edje_part_mouse_down_cb(void *data, Evas *e EINA_UNUSED, Evas_Object *obj EINA_
evas_textblock_cursor_word_start(en->cursor);
_sel_start(en->cursor, rp->object, en);
evas_textblock_cursor_word_end(en->cursor);
evas_textblock_cursor_char_next(en->cursor);
evas_textblock_cursor_cluster_next(en->cursor);
_sel_extend(en->ed, en->cursor, rp->object, en);
}
goto end;
}
}
tc = _edje_cursor_char_coord_set(rp, ev->canvas.x, ev->canvas.y, &cx, &cy);
tc = _edje_cursor_cluster_coord_set(rp, ev->canvas.x, ev->canvas.y, &cx, &cy);
if (dosel)
{
@ -2670,7 +2679,7 @@ _edje_part_mouse_up_cb(void *data, Evas *e EINA_UNUSED, Evas_Object *obj EINA_UN
#endif
/* cx cy are unused but needed in mouse down, please bear with it */
tc = _edje_cursor_char_coord_set(rp, ev->canvas.x, ev->canvas.y, &cx, &cy);
tc = _edje_cursor_cluster_coord_set(rp, ev->canvas.x, ev->canvas.y, &cx, &cy);
if (en->select_allow)
{
@ -2748,7 +2757,7 @@ _edje_part_mouse_move_cb(void *data, Evas *e EINA_UNUSED, Evas_Object *obj EINA_
evas_object_geometry_get(rp->object, &x, &y, &w, &h);
cx = ev->cur.canvas.x - x;
cy = ev->cur.canvas.y - y;
if (!evas_textblock_cursor_char_coord_set(en->cursor, cx, cy))
if (!evas_textblock_cursor_cluster_coord_set(en->cursor, cx, cy))
{
Evas_Coord lx, ly, lw, lh;
@ -2760,7 +2769,7 @@ _edje_part_mouse_move_cb(void *data, Evas *e EINA_UNUSED, Evas_Object *obj EINA_
{
evas_textblock_cursor_paragraph_first(en->cursor);
evas_textblock_cursor_line_geometry_get(en->cursor, &lx, &ly, &lw, &lh);
if (!evas_textblock_cursor_char_coord_set(en->cursor, cx, ly + (lh / 2)))
if (!evas_textblock_cursor_cluster_coord_set(en->cursor, cx, ly + (lh / 2)))
_curs_end(en->cursor, rp->object, en);
}
}
@ -4029,7 +4038,7 @@ _edje_text_cursor_next(Edje_Real_Part *rp, Efl_Text_Cursor_Cursor *c)
_edje_entry_imf_context_reset(rp);
if (!evas_textblock_cursor_char_next(c))
if (!evas_textblock_cursor_cluster_next(c))
{
return EINA_FALSE;
}
@ -4062,7 +4071,7 @@ _edje_text_cursor_prev(Edje_Real_Part *rp, Efl_Text_Cursor_Cursor *c)
_edje_entry_imf_context_reset(rp);
if (!evas_textblock_cursor_char_prev(c))
if (!evas_textblock_cursor_cluster_prev(c))
{
if (evas_textblock_cursor_paragraph_prev(c)) goto ok;
else return EINA_FALSE;
@ -4106,7 +4115,7 @@ _edje_text_cursor_up(Edje_Real_Part *rp, Efl_Text_Cursor_Cursor *c)
&lx, &ly, &lw, &lh))
return EINA_FALSE;
evas_textblock_cursor_char_geometry_get(c, &cx, &cy, &cw, &ch);
if (!evas_textblock_cursor_char_coord_set(c, cx, ly + (lh / 2)))
if (!evas_textblock_cursor_cluster_coord_set(c, cx, ly + (lh / 2)))
evas_textblock_cursor_line_char_last(c);
_sel_update(en->ed, c, rp->object, rp->typedata.text->entry_data);
@ -4145,7 +4154,7 @@ _edje_text_cursor_down(Edje_Real_Part *rp, Efl_Text_Cursor_Cursor *c)
&lx, &ly, &lw, &lh))
return EINA_FALSE;
evas_textblock_cursor_char_geometry_get(c, &cx, &cy, &cw, &ch);
if (!evas_textblock_cursor_char_coord_set(c, cx, ly + (lh / 2)))
if (!evas_textblock_cursor_cluster_coord_set(c, cx, ly + (lh / 2)))
evas_textblock_cursor_line_char_last(c);
_sel_update(en->ed, c, rp->object, rp->typedata.text->entry_data);
@ -4347,7 +4356,7 @@ _edje_text_cursor_coord_set(Edje_Real_Part *rp, Efl_Text_Cursor_Cursor *c,
_edje_emit(en->ed, "selection,changed", rp->part->name);
}
}
return evas_textblock_cursor_char_coord_set(c, x, y);
return evas_textblock_cursor_cluster_coord_set(c, x, y);
}
Eina_Bool

View File

@ -145,6 +145,20 @@ interface Efl.Text_Cursor {
/* @inout */ cur: ptr(Efl.Text_Cursor_Cursor); [[Cursor object]]
}
}
cursor_cluster_next {
[[Advances to the next grapheme cluster]]
legacy: null;
params {
/* @inout */ cur: ptr(Efl.Text_Cursor_Cursor); [[Cursor object]]
}
}
cursor_cluster_prev {
[[Advances to the previous grapheme cluster]]
legacy: null;
params {
/* @inout */ cur: ptr(Efl.Text_Cursor_Cursor); [[Cursor object]]
}
}
cursor_paragraph_char_first {
[[Advances to the first character in this paragraph]]
legacy: null;
@ -232,6 +246,17 @@ interface Efl.Text_Cursor {
@in y: int; [[Y coord to set by.]]
}
}
cursor_cluster_coord_set {
[[Set cursor coordinates according to grapheme clusters.
It does not allow to put a cursor to the middle of a grapheme cluster.
]]
legacy: null;
params {
/* @inout */ cur: ptr(Efl.Text_Cursor_Cursor); [[Cursor object]]
@in x: int; [[X coord to set by.]]
@in y: int; [[Y coord to set by.]]
}
}
cursor_text_insert {
[[Adds text to the current cursor position and set the cursor to
*after* the start of the text just added.

View File

@ -327,6 +327,8 @@ class Efl.Canvas.Text (Efl.Canvas.Object, Efl.Text,
Efl.Text_Cursor.cursor_copy;
Efl.Text_Cursor.cursor_char_next;
Efl.Text_Cursor.cursor_char_prev;
Efl.Text_Cursor.cursor_cluster_next;
Efl.Text_Cursor.cursor_cluster_prev;
Efl.Text_Cursor.cursor_paragraph_char_first;
Efl.Text_Cursor.cursor_paragraph_char_last;
Efl.Text_Cursor.cursor_word_start;
@ -339,6 +341,7 @@ class Efl.Canvas.Text (Efl.Canvas.Object, Efl.Text,
Efl.Text_Cursor.cursor_paragraph_prev;
Efl.Text_Cursor.cursor_line_jump_by;
Efl.Text_Cursor.cursor_coord_set;
Efl.Text_Cursor.cursor_cluster_coord_set;
Efl.Text_Cursor.cursor_text_insert;
Efl.Text_Cursor.cursor_char_delete;
Efl.Text_Annotate.annotation { set; get; }

View File

@ -77,6 +77,7 @@
#include "linebreak.h"
#include "wordbreak.h"
#include "graphemebreak.h"
#include "evas_filter.h"
#include "efl_canvas_filter_internal.eo.h"
@ -9271,20 +9272,122 @@ _efl_canvas_text_efl_text_cursor_cursor_word_end(Eo *eo_obj, Efl_Canvas_Text_Dat
efl_event_callback_legacy_call(eo_obj, EFL_CANVAS_TEXT_EVENT_CURSOR_CHANGED, NULL);
}
EAPI Eina_Bool
evas_textblock_cursor_char_next(Efl_Text_Cursor_Cursor *cur)
static char *
_evas_textblock_grapheme_breaks_new(Evas_Object_Textblock_Item *it, size_t len)
{
int ind;
char *grapheme_breaks = NULL;
const char *lang = (it->format->font.fdesc) ? it->format->font.fdesc->lang : "";
grapheme_breaks = malloc(len);
if (!grapheme_breaks) return NULL;
set_graphemebreaks_utf32((const utf32_t *)
eina_ustrbuf_string_get(
it->text_node->unicode),
len, lang, grapheme_breaks);
return grapheme_breaks;
}
static size_t
_evas_textblock_cursor_cluster_pos_get(Evas_Textblock_Cursor *cur, Eina_Bool inc)
{
Evas_Object_Textblock_Paragraph *par;
Efl_Canvas_Text_Data *o;
size_t cur_pos = cur->pos;
size_t ret = cur->pos;
if (!inc) cur_pos--;
if (!cur->node->par)
{
o = efl_data_scope_get(cur->obj, MY_CLASS);
if (o) _relayout_if_needed(cur->obj, o);
}
par = cur->node->par;
if (par)
{
Eina_List *l;
Evas_Object_Textblock_Item *it, *last_it = NULL;
EINA_LIST_FOREACH(par->logical_items, l, it)
{
if (it->text_pos > cur_pos)
{
if (!last_it) last_it = it;
break;
}
last_it = it;
}
if (last_it)
{
it = last_it;
if (it->type == EVAS_TEXTBLOCK_ITEM_TEXT)
{
size_t len = eina_ustrbuf_length_get(it->text_node->unicode);
char *grapheme_breaks = _evas_textblock_grapheme_breaks_new(it, len);
if (grapheme_breaks)
{
size_t grapheme_breaks_index = cur_pos;
if (inc)
{
while ((grapheme_breaks_index < len) &&
(grapheme_breaks[grapheme_breaks_index] != GRAPHEMEBREAK_BREAK))
{
grapheme_breaks_index++;
}
ret = grapheme_breaks_index + 1;
}
else
{
while ((grapheme_breaks_index > 0) &&
(grapheme_breaks[grapheme_breaks_index - 1] != GRAPHEMEBREAK_BREAK))
{
grapheme_breaks_index--;
}
ret = grapheme_breaks_index;
}
free(grapheme_breaks);
}
}
}
}
return ret;
}
static Eina_Bool
_evas_textblock_cursor_next(Evas_Textblock_Cursor *cur, Eina_Bool per_cluster)
{
Evas_Object_Protected_Data *obj;
const Eina_Unicode *text;
int ind;
if (!cur) return EINA_FALSE;
Evas_Object_Protected_Data *obj = efl_data_scope_get(cur->obj, EFL_CANVAS_OBJECT_CLASS);
evas_object_async_block(obj);
TB_NULL_CHECK(cur->node, EINA_FALSE);
obj = efl_data_scope_get(cur->obj, EFL_CANVAS_OBJECT_CLASS);
evas_object_async_block(obj);
ind = cur->pos;
text = eina_ustrbuf_string_get(cur->node->unicode);
if (text[ind]) ind++;
if (text[ind])
{
if (per_cluster)
ind = _evas_textblock_cursor_cluster_pos_get(cur, EINA_TRUE);
if (ind <= (int)cur->pos)
ind = cur->pos + 1;
}
/* Only allow pointing a null if it's the last paragraph.
* because we don't have a PS there. */
if (text[ind])
@ -9311,42 +9414,90 @@ evas_textblock_cursor_char_next(Efl_Text_Cursor_Cursor *cur)
}
}
EOLIAN static void
_efl_canvas_text_efl_text_cursor_cursor_char_next(Eo *eo_obj, Efl_Canvas_Text_Data *o EINA_UNUSED, Efl_Text_Cursor_Cursor *cur)
{
ASYNC_BLOCK;
evas_textblock_cursor_char_next(cur);
efl_event_callback_legacy_call(eo_obj, EFL_CANVAS_TEXT_EVENT_CURSOR_CHANGED, NULL);
}
static Eina_Bool
_evas_textblock_cursor_char_prev(Efl_Text_Cursor_Cursor *cur)
_evas_textblock_cursor_prev(Evas_Textblock_Cursor *cur, Eina_Bool per_cluster)
{
Evas_Object_Protected_Data *obj;
if (!cur) return EINA_FALSE;
TB_NULL_CHECK(cur->node, EINA_FALSE);
obj = efl_data_scope_get(cur->obj, EFL_CANVAS_OBJECT_CLASS);
evas_object_async_block(obj);
if (cur->pos != 0)
{
if (per_cluster)
{
size_t ret = _evas_textblock_cursor_cluster_pos_get(cur, EINA_FALSE);
if (ret != cur->pos)
{
cur->pos = ret;
return EINA_TRUE;
}
}
cur->pos--;
return EINA_TRUE;
}
return evas_textblock_cursor_paragraph_prev(cur);
}
EAPI Eina_Bool
evas_textblock_cursor_char_next(Efl_Text_Cursor_Cursor *cur)
{
return _evas_textblock_cursor_next(cur, EINA_FALSE);
}
EOLIAN static void
_efl_canvas_text_efl_text_cursor_cursor_char_next(Eo *eo_obj, Efl_Canvas_Text_Data *o EINA_UNUSED, Efl_Text_Cursor_Cursor *cur)
{
ASYNC_BLOCK;
if (_evas_textblock_cursor_next(cur, EINA_FALSE))
efl_event_callback_legacy_call(eo_obj, EFL_CANVAS_TEXT_EVENT_CURSOR_CHANGED, NULL);
}
EAPI Eina_Bool
evas_textblock_cursor_char_prev(Efl_Text_Cursor_Cursor *cur)
{
if (!cur) return EINA_FALSE;
return _evas_textblock_cursor_char_prev(cur);
return _evas_textblock_cursor_prev(cur, EINA_FALSE);
}
EOLIAN static void
_efl_canvas_text_efl_text_cursor_cursor_char_prev(Eo *eo_obj EINA_UNUSED, Efl_Canvas_Text_Data *o EINA_UNUSED, Efl_Text_Cursor_Cursor *cur)
{
ASYNC_BLOCK;
_evas_textblock_cursor_char_prev(cur);
efl_event_callback_legacy_call(eo_obj, EFL_CANVAS_TEXT_EVENT_CURSOR_CHANGED, NULL);
if (_evas_textblock_cursor_prev(cur, EINA_FALSE))
efl_event_callback_legacy_call(eo_obj, EFL_CANVAS_TEXT_EVENT_CURSOR_CHANGED, NULL);
}
EAPI Eina_Bool
evas_textblock_cursor_cluster_next(Efl_Text_Cursor_Cursor *cur)
{
return _evas_textblock_cursor_next(cur, EINA_TRUE);
}
EOLIAN static void
_efl_canvas_text_efl_text_cursor_cursor_cluster_next(Eo *eo_obj, Efl_Canvas_Text_Data *o EINA_UNUSED, Efl_Text_Cursor_Cursor *cur)
{
ASYNC_BLOCK;
if (_evas_textblock_cursor_next(cur, EINA_TRUE))
efl_event_callback_legacy_call(eo_obj, EFL_CANVAS_TEXT_EVENT_CURSOR_CHANGED, NULL);
}
EAPI Eina_Bool
evas_textblock_cursor_cluster_prev(Efl_Text_Cursor_Cursor *cur)
{
return _evas_textblock_cursor_prev(cur, EINA_TRUE);
}
EOLIAN static void
_efl_canvas_text_efl_text_cursor_cursor_cluster_prev(Eo *eo_obj, Efl_Canvas_Text_Data *o EINA_UNUSED, Efl_Text_Cursor_Cursor *cur)
{
ASYNC_BLOCK;
if (_evas_textblock_cursor_prev(cur, EINA_TRUE))
efl_event_callback_legacy_call(eo_obj, EFL_CANVAS_TEXT_EVENT_CURSOR_CHANGED, NULL);
}
EAPI void
@ -12031,15 +12182,16 @@ _efl_canvas_text_visible_range_get(Eo *eo_obj EINA_UNUSED,
return EINA_TRUE;
}
EAPI Eina_Bool
evas_textblock_cursor_char_coord_set(Evas_Textblock_Cursor *cur, Evas_Coord x, Evas_Coord y)
static Eina_Bool
_evas_textblock_cursor_coord_set(Evas_Textblock_Cursor *cur, Evas_Coord x, Evas_Coord y, Eina_Bool per_cluster)
{
Evas_Object_Textblock_Paragraph *found_par;
Evas_Object_Textblock_Line *ln;
Evas_Object_Textblock_Item *it = NULL;
Eina_Bool ret = EINA_FALSE;
if (!cur) return ret;
Evas_Object_Protected_Data *obj = efl_data_scope_get(cur->obj, EFL_CANVAS_OBJECT_CLASS);
evas_object_async_block(obj);
Efl_Canvas_Text_Data *o = efl_data_scope_get(cur->obj, MY_CLASS);
@ -12112,6 +12264,63 @@ evas_textblock_cursor_char_coord_set(Evas_Textblock_Cursor *cur, Evas_Coord x,
&cx, &cy, &cw, &ch);
if (pos < 0)
goto end;
if ((pos > 0) && per_cluster)
{
size_t len = eina_ustrbuf_length_get(it->text_node->unicode);
char *grapheme_breaks = _evas_textblock_grapheme_breaks_new(it, len);
/* If current position is not breakable,
* try to move cursor to a nearest breakable position. */
if (grapheme_breaks && (grapheme_breaks[pos + it->text_pos - 1] != GRAPHEMEBREAK_BREAK))
{
size_t left_index = pos + it->text_pos - 1;
size_t right_index = pos + it->text_pos - 1;
int lx, rx;
/* To the left */
while ((left_index > 0) &&
(grapheme_breaks[left_index] != GRAPHEMEBREAK_BREAK))
{
left_index--;
}
ENFN->font_pen_coords_get(ENC,
ti->parent.format->font.font,
&ti->text_props,
left_index - it->text_pos + 1,
&lx, NULL, NULL, NULL);
/* To the right */
while ((right_index < len) &&
(grapheme_breaks[right_index] != GRAPHEMEBREAK_BREAK))
{
right_index++;
}
ENFN->font_pen_coords_get(ENC,
ti->parent.format->font.font,
&ti->text_props,
right_index - it->text_pos + 1,
&rx, NULL, NULL, NULL);
/* Decide a nearest position by checking its geometry. */
if (((ti->text_props.bidi_dir != EVAS_BIDI_DIRECTION_RTL) &&
((ln->x + it->x + rx - x) >= (x - (lx + ln->x + it->x)))) ||
((ti->text_props.bidi_dir == EVAS_BIDI_DIRECTION_RTL) &&
((ln->x + it->x + lx - x) >= (x - (rx + ln->x + it->x)))))
{
pos = left_index - it->text_pos + 1;
}
else
{
pos = right_index - it->text_pos + 1;
}
}
free(grapheme_breaks);
}
cur->pos = pos + it->text_pos;
cur->node = it->text_node;
ret = EINA_TRUE;
@ -12167,6 +12376,18 @@ end:
return ret;
}
EAPI Eina_Bool
evas_textblock_cursor_char_coord_set(Evas_Textblock_Cursor *cur, Evas_Coord x, Evas_Coord y)
{
return _evas_textblock_cursor_coord_set(cur, x, y, EINA_FALSE);
}
EAPI Eina_Bool
evas_textblock_cursor_cluster_coord_set(Evas_Textblock_Cursor *cur, Evas_Coord x, Evas_Coord y)
{
return _evas_textblock_cursor_coord_set(cur, x, y, EINA_TRUE);
}
EOLIAN static void
_efl_canvas_text_efl_text_cursor_cursor_coord_set(Eo *eo_obj EINA_UNUSED, Efl_Canvas_Text_Data *o EINA_UNUSED, Efl_Text_Cursor_Cursor *cur EINA_UNUSED,
Evas_Coord x, Evas_Coord y)
@ -12175,6 +12396,14 @@ _efl_canvas_text_efl_text_cursor_cursor_coord_set(Eo *eo_obj EINA_UNUSED, Efl_Ca
evas_textblock_cursor_char_coord_set(cur, x, y);
}
EOLIAN static void
_efl_canvas_text_efl_text_cursor_cursor_cluster_coord_set(Eo *eo_obj EINA_UNUSED, Efl_Canvas_Text_Data *o EINA_UNUSED, Efl_Text_Cursor_Cursor *cur EINA_UNUSED,
Evas_Coord x, Evas_Coord y)
{
ASYNC_BLOCK;
evas_textblock_cursor_cluster_coord_set(cur, x, y);
}
EAPI int
evas_textblock_cursor_line_coord_set(Evas_Textblock_Cursor *cur, Evas_Coord y)
{
@ -13279,6 +13508,7 @@ evas_object_textblock_init(Evas_Object *eo_obj)
linebreak_init = EINA_TRUE;
init_linebreak();
init_wordbreak();
init_graphemebreak();
}
o = obj->private_data;
@ -15194,7 +15424,7 @@ _efl_canvas_text_efl_text_annotate_range_annotations_get(const Eo *eo_obj, Efl_C
if (!it->start_node || !it->end_node) continue;
_textblock_cursor_pos_at_fnode_set(eo_obj, &start2, it->start_node);
_textblock_cursor_pos_at_fnode_set(eo_obj, &end2, it->end_node);
_evas_textblock_cursor_char_prev(&end2);
evas_textblock_cursor_char_prev(&end2);
if (!((evas_textblock_cursor_compare(&start2, end) > 0) ||
(evas_textblock_cursor_compare(&end2, start) < 0)))
{

View File

@ -604,6 +604,22 @@ EAPI Eina_Bool evas_textblock_cursor_char_prev(Evas_Textblock_Cursor *obj);
*/
EAPI Eina_Bool evas_textblock_cursor_char_next(Evas_Textblock_Cursor *obj);
/**
* @brief Advances the cursor one grapheme cluster backwards.
*
* @return @c true on success, @c false otherwise.
*/
EAPI Eina_Bool evas_textblock_cursor_cluster_prev(Evas_Textblock_Cursor *obj);
/**
* @brief Advances the cursor one grapheme cluster forward.
*
* @return @c true on success, @c false otherwise.
*
* @ingroup Evas_Textblock_Cursor
*/
EAPI Eina_Bool evas_textblock_cursor_cluster_next(Evas_Textblock_Cursor *obj);
/**
* @brief Advances to the start of the next text node
*
@ -859,6 +875,16 @@ EAPI Evas_Textblock_Cursor *evas_object_textblock_cursor_new(const Evas_Object *
*/
EAPI Eina_Bool evas_textblock_cursor_char_coord_set(Evas_Textblock_Cursor *obj, Evas_Coord x, Evas_Coord y);
/**
* @brief Sets the position of the cursor according to the X and Y coordinates and
* grapheme clusters of text.
*
* @param[in] y y coord to set by.
*
* @return @c true on success, @c false otherwise.
*/
EAPI Eina_Bool evas_textblock_cursor_cluster_coord_set(Evas_Textblock_Cursor *obj, Evas_Coord x, Evas_Coord y);
/**
* Free the cursor and unassociate it from the object.
* @note do not use it to free unassociated cursors.

View File

@ -1,3 +1,95 @@
2016-12-15 Wu Yongwei <wuyongwei@gmail.com>
* src/Makefile.am (include_HEADERS): Move graphemebreakdef.h to
EXTRA_DIST.
(EXTRA_DIST): Add graphemebreakdef.h and test_skips.h.
2016-12-14 Wu Yongwei <wuyongwei@gmail.com>
* src/linebreak.c: Adjust documentation comment.
* src/wordbreak.c: Ditto.
* src/graphemebreak.c: Ditto.
2016-12-14 Wu Yongwei <wuyongwei@gmail.com>
* Doxyfile (FULL_PATH_NAMES): Set to `NO'.
(DOT_IMAGE_FORMAT): Set to `svg'.
(SEARCHENGINE): Set to `YES'.
2016-12-10 Wu Yongwei <wuyongwei@gmail.com>
Update for the libunibreak 4.0 release.
* NEWS: Add information about libunibreak 4.0.
* Doxyfile (PROJECT_NUMBER): Change to `4.0'.
* configure.ac (AC_INIT): Change the library version to `4.0'.
* src/Makefile.am (libunibreak_la_LDFLAGS): Set the version-info to
`4:0:1'.
* src/unibreakbase.h (UNIBREAK_VERSION): Set to 0x0400.
2016-12-10 Wu Yongwei <wuyongwei@gmail.com>
* src/Makefile.am (include_HEADERS): Add a missing file
graphemebreakdef.h.
2016-12-10 Wu Yongwei <wuyongwei@gmail.com>
* bootstrap: Add a missing `--copy' argument to glibtoolize.
2016-12-10 Wu Yongwei <wuyongwei@gmail.com>
* README.md: Update for grapheme break and links.
* LICENCE: Add Andreas Röver and update copyright information.
2016-12-10 Wu Yongwei <wuyongwei@gmail.com>
* Doxyfile (EXCLUDE): Add `src/tests.c'.
2016-12-10 Wu Yongwei <wuyongwei@gmail.com>
* src/wordbreak.c: Update Unicode version and link information.
* src/wordbreak.h: Ditto.
* src/wordbreakdef.h: Ditto.
* src/graphemebreak.c: Ditto.
* src/graphemebreak.h: Ditto.
* src/graphemebreakdef.h: Ditto.
2016-12-10 Wu Yongwei <wuyongwei@gmail.com>
* src/linebreak.c: Remove `@version' and update copyright header.
* src/linebreak.h: Ditto.
* src/linebreakdef.c: Ditto.
* src/linebreakdef.h: Ditto.
* src/wordbreak.c: Ditto.
* src/wordbreak.h: Ditto.
* src/wordbreakdef.h: Ditto.
* src/graphemebreak.c: Ditto.
* src/graphemebreak.h: Ditto.
* src/graphemebreakdef.h: Ditto.
* src/unibreakbase.c: Ditto.
* src/unibreakbase.h: Ditto.
* src/unibreakdef.c: Ditto.
* src/unibreakdef.h: Ditto.
2016-12-10 Wu Yongwei <wuyongwei@gmail.com>
* src/Makefile.msvc: Add graphemebreak.c.
* src/graphemebreak.c: Add a workaround of stdbool.h for MSVC
versions earlier than 2013.
* src/graphemebreak.h: Make include order consistent.
* src/linebreak.c (ends_with): Make the code compile under C89.
2016-12-10 Wu Yongwei <wuyongwei@gmail.com>
* src/Makefile.gcc (CFILES): Add graphemebreak.c.
(graphemebreakdata): New phony target.
(GraphemeBreakProperty.txt): New target.
(distclean): Add WordBreakProperty.txt and GraphemeBreakProperty.txt
as well.
2016-12-05 Tom Hacohen <tom@stosb.com>
* src/test_skips.h: New file.
2016-12-04 Wu Yongwei <wuyongwei@gmail.com>
Simpify implementation about RI pairing.

View File

@ -1,6 +1,7 @@
Copyright (C) 2008-2015 Wu Yongwei <wuyongwei at gmail dot com>
Copyright (C) 2012-2015 Tom Hacohen <tom at stosb dot com>
Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com>
Copyright (C) 2012-2016 Tom Hacohen <tom at stosb dot com>
Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net>
This software is provided 'as-is', without any express or implied
warranty. In no event will the author be held liable for any damages

View File

@ -1,3 +1,10 @@
New in libunibreak 4.0
- Update the code and data to conform to Unicode 9.0.0
- Add grapheme breaking support
- Tested and enhanced according to the Unicode test suite
- Make bug fixes
New in libunibreak 3.0
- Update the code and data to conform to Unicode 7.0.0

View File

@ -9,8 +9,8 @@ breaking and word breaking algorithms as described in [Unicode Standard
Annex 14] [1] and [Unicode Standard Annex 29] [2]. Check the project's
[home page] [3] for up-to-date information.
[1]: http://www.unicode.org/reports/tr14/tr14-30.html
[2]: http://www.unicode.org/reports/tr29/tr29-21.html
[1]: http://www.unicode.org/reports/tr14/tr14-37.html
[2]: http://www.unicode.org/reports/tr29/tr29-29.html
[3]: https://github.com/adah1972/libunibreak
@ -46,6 +46,8 @@ There are three ways to build the library:
*LineBreak.txt*.
- type `make wordbreakdata` to regenerate *wordbreakdata.c* from
*WordBreakProperty.txt*.
- type `make graphemebreakdata` to regenerate *graphemebreakdata.c*
from *GraphemeBreakProperty.txt*.
2. On systems where GCC and Binutils are supported, one can type
@ -61,6 +63,8 @@ There are three ways to build the library:
*LineBreak.txt*.
- type `make wordbreakdata` to regenerate *wordbreakdata.c* from
*WordBreakProperty.txt*.
- type `make graphemebreakdata` to regenerate *graphemebreakdata.c*
from *GraphemeBreakProperty.txt*.
3. On Windows, apart from using method 1 (Cygwin/MSYS) and method 2
(MinGW), MSVC can also be used. Type
@ -77,9 +81,9 @@ There are three ways to build the library:
Documentation
-------------
Check the generated document *doc/html/linebreak\_8h.html* and
*doc/html/wordbreak\_8h.html* in the downloaded file for the public
interfaces exposed to applications.
Check the generated document *doc/html/linebreak\_8h.html*,
*doc/html/wordbreak\_8h.html*, and *doc/html/graphemebreak\_8h.html* in
the downloaded file for the public interfaces exposed to applications.
<!--

View File

@ -0,0 +1,283 @@
/*
* Grapheme breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute
* it freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must
* not claim that you wrote the original software. If you use this
* software in a product, an acknowledgement in the product
* documentation would be appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must
* not be misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source
* distribution.
*
* The main reference is Unicode Standard Annex 29 (UAX #29):
* <URL:http://unicode.org/reports/tr29>
*
* When this library was designed, this annex was at Revision 29, for
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
*/
/**
* @file graphemebreak.c
*
* Implementation of the grapheme breaking algorithm as described in Unicode
* Standard Annex 29.
*
* @author Andreas Roever
*/
#if defined(_MSC_VER) && _MSC_VER < 1800
typedef int bool;
#define false 0
#define true 1
#else
#include <stdbool.h>
#endif
#include <string.h>
#include "graphemebreak.h"
#include "graphemebreakdata.c"
#include "unibreakdef.h"
#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
/**
* Initializes the wordbreak internals. It currently does nothing, but
* it may in the future.
*/
void init_graphemebreak(void)
{
}
/**
* Gets the grapheme breaking class of a character.
*
* @param ch character to check
* @return the grapheme breaking class if found; \c GBP_Other otherwise
*/
static enum GraphemeBreakClass get_char_gb_class(utf32_t ch)
{
int min = 0;
int max = ARRAY_LEN(gb_prop_default) - 1;
int mid;
do
{
mid = (min + max) / 2;
if (ch < gb_prop_default[mid].start)
max = mid - 1;
else if (ch > gb_prop_default[mid].end)
min = mid + 1;
else
return gb_prop_default[mid].prop;
} while (min <= max);
return GBP_Other;
}
/**
* Sets the grapheme breaking information for a generic input string.
*
* @param[in] s input string
* @param[in] len length of the input
* @param[out] brks pointer to the output breaking data, containing
* #GRAPHEMEBREAK_BREAK or #GRAPHEMEBREAK_NOBREAK
* @param[in] get_next_char function to get the next UTF-32 character
*/
static void set_graphemebreaks(const void *s, size_t len, char *brks,
get_next_char_t get_next_char)
{
size_t posNext = 0;
bool rule10Left = false; // is the left side of rule 10 fulfilled?
bool evenRegionalIndicators = true; // is the number of preceeding
// GBP_RegionalIndicator characters
// even
utf32_t ch = get_next_char(s, len, &posNext);
enum GraphemeBreakClass current_class = get_char_gb_class(ch);
// initialize whole output to inside char
memset(brks, GRAPHEMEBREAK_INSIDEACHAR, len);
while (true)
{
enum GraphemeBreakClass prev_class = current_class;
// safe position if current character so that we can store the
// result there later on
size_t brksPos = posNext - 1;
// get nect character
ch = get_next_char(s, len, &posNext);
if (ch == EOS)
{
// done, place one final break after the last character as per
// algorithm rule GB1
brks[brksPos] = GRAPHEMEBREAK_BREAK;
break;
}
// get class of current character
current_class = get_char_gb_class(ch);
// update some helper variables
if ((prev_class == GBP_E_Base) || (prev_class == GBP_E_Base_GAZ))
{
rule10Left = true;
}
else if (prev_class != GBP_Extend)
{
rule10Left = false;
}
if (prev_class == GBP_Regional_Indicator)
{
evenRegionalIndicators = !evenRegionalIndicators;
}
else
{
evenRegionalIndicators = true;
}
// check all rules
if (prev_class == GBP_CR && current_class == GBP_LF)
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB3
}
else if ((prev_class == GBP_CR) || (prev_class == GBP_LF) ||
(prev_class == GBP_Control) || (current_class == GBP_CR) ||
(current_class == GBP_LF) ||
(current_class == GBP_Control))
{
brks[brksPos] = GRAPHEMEBREAK_BREAK; // Rule: GB4 + GB5
}
else if ((prev_class == GBP_L) &&
((current_class == GBP_L) || (current_class == GBP_V) ||
(current_class == GBP_LV) || (current_class == GBP_LVT)))
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB6
}
else if (((prev_class == GBP_LV) || (prev_class == GBP_V)) &&
((current_class == GBP_V) || (current_class == GBP_T)))
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB7
}
else if (((prev_class == GBP_LVT) || (prev_class == GBP_T)) &&
(current_class == GBP_T))
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB8
}
else if ((current_class == GBP_Extend) ||
(current_class == GBP_ZWJ))
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB9
}
else if (current_class == GBP_SpacingMark)
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB9a
}
else if (prev_class == GBP_Prepend)
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB9b
}
else if (rule10Left && (current_class == GBP_E_Modifier))
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB10
}
else if ((prev_class == GBP_ZWJ) &&
((current_class == GBP_Glue_After_Zwj) ||
(current_class == GBP_E_Base_GAZ)))
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB11
}
else if (!evenRegionalIndicators &&
(current_class == GBP_Regional_Indicator))
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB12 + GB13
}
else
{
brks[brksPos] = GRAPHEMEBREAK_BREAK; // Rule: GB999
}
}
}
/**
* Sets the grapheme breaking information for a UTF-8 input string.
*
* @param[in] s input UTF-8 string
* @param[in] len length of the input
* @param[in] lang language of the input (reserved for future use)
* @param[out] brks pointer to the output breaking data, containing
* #GRAPHEMEBREAK_BREAK or #GRAPHEMEBREAK_NOBREAK.
* First element in output array is for the break behind
* the first character the pointer must point to an
* array with at least as many elements as there
* are characters in the string
*/
void set_graphemebreaks_utf8(const utf8_t *s, size_t len, const char *lang,
char *brks)
{
(void)lang;
set_graphemebreaks(s, len, brks,
(get_next_char_t)ub_get_next_char_utf8);
}
/**
* Sets the grapheme breaking information for a UTF-16 input string.
*
* @param[in] s input UTF-16 string
* @param[in] len length of the input
* @param[in] lang language of the input (reserved for future use)
* @param[out] brks pointer to the output breaking data, containing
* #GRAPHEMEBREAK_BREAK or #GRAPHEMEBREAK_NOBREAK.
* First element in output array is for the break behind
* the first character the pointer must point to an
* array with at least as many elements as there
* are characters in the string
*/
void set_graphemebreaks_utf16(const utf16_t *s, size_t len,
const char *lang, char *brks)
{
(void)lang;
set_graphemebreaks(s, len, brks,
(get_next_char_t)ub_get_next_char_utf16);
}
/**
* Sets the grapheme breaking information for a UTF-32 input string.
*
* @param[in] s input UTF-32 string
* @param[in] len length of the input
* @param[in] lang language of the input (reserved for future use)
* @param[out] brks pointer to the output breaking data, containing
* #GRAPHEMEBREAK_BREAK or #GRAPHEMEBREAK_NOBREAK.
* First element in output array is for the break behind
* the first character the pointer must point to an
* array with at least as many elements as there
* are characters in the string
*/
void set_graphemebreaks_utf32(const utf32_t *s, size_t len,
const char *lang, char *brks)
{
(void)lang;
set_graphemebreaks(s, len, brks,
(get_next_char_t)ub_get_next_char_utf32);
}

View File

@ -0,0 +1,69 @@
/*
* Grapheme breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute
* it freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must
* not claim that you wrote the original software. If you use this
* software in a product, an acknowledgement in the product
* documentation would be appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must
* not be misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source
* distribution.
*
* The main reference is Unicode Standard Annex 29 (UAX #29):
* <URL:http://unicode.org/reports/tr29>
*
* When this library was designed, this annex was at Revision 29, for
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
*/
/**
* @file graphemebreak.h
*
* Header file for the grapheme breaking algorithm.
*
* @author Andreas Röver
*/
#ifndef GRAPHEMEBREAK_H
#define GRAPHEMEBREAK_H
#include <stddef.h>
#include "unibreakbase.h"
#ifdef __cplusplus
extern "C" {
#endif
#define GRAPHEMEBREAK_BREAK 0 /**< Between 2 graphemes */
#define GRAPHEMEBREAK_NOBREAK 1 /**< Inside a grapheme */
#define GRAPHEMEBREAK_INSIDEACHAR 2 /**< Inside a unicode character */
void init_graphemebreak(void);
void set_graphemebreaks_utf8(const utf8_t *s, size_t len, const char *lang,
char *brks);
void set_graphemebreaks_utf16(const utf16_t *s, size_t len,
const char *lang, char *brks);
void set_graphemebreaks_utf32(const utf32_t *s, size_t len,
const char *lang, char *brks);
#ifdef __cplusplus
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,82 @@
/*
* Grapheme breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute
* it freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must
* not claim that you wrote the original software. If you use this
* software in a product, an acknowledgement in the product
* documentation would be appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must
* not be misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source
* distribution.
*
* The main reference is Unicode Standard Annex 29 (UAX #29):
* <URL:http://unicode.org/reports/tr29>
*
* When this library was designed, this annex was at Revision 29, for
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
*/
/**
* @file graphemebreakdef.h
*
* Definitions of internal data structures, declarations of global
* variables, and function prototypes for the grapheme breaking algorithm.
*
* @author Andreas Röver
*/
#include "unibreakdef.h"
/**
* Word break classes. This is a direct mapping of Table 2 of Unicode
* Standard Annex 29
*/
enum GraphemeBreakClass
{
GBP_CR,
GBP_LF,
GBP_Control,
GBP_Extend,
GBP_ZWJ,
GBP_Regional_Indicator,
GBP_Prepend,
GBP_SpacingMark,
GBP_L,
GBP_V,
GBP_T,
GBP_LV,
GBP_LVT,
GBP_E_Base,
GBP_E_Modifier,
GBP_Glue_After_Zwj,
GBP_E_Base_GAZ,
GBP_Other,
GBP_Undefined
};
/**
* Struct for entries of grapheme break properties. The array of the
* entries \e must be sorted.
*/
struct GraphemeBreakProperties
{
utf32_t start; /**< Starting coding point */
utf32_t end; /**< End coding point, including */
enum GraphemeBreakClass prop; /**< The grapheme breaking property */
};

View File

@ -45,7 +45,6 @@
* Implementation of the line breaking algorithm as described in Unicode
* Standard Annex 14.
*
* @version 3.2, 2016/12/04
* @author Wu Yongwei
* @author Petr Filipsky
*/
@ -68,7 +67,7 @@
/**
* Enumeration of break actions. They are used in the break action
* pair table below.
* pair table #baTable.
*/
enum BreakAction
{
@ -81,7 +80,7 @@ enum BreakAction
/**
* Break action pair table. This is a direct mapping of Table 2 of
* Unicode Standard Annex 14, Revision 37.
* Unicode Standard Annex 14, Revision 37, except the "CB" part.
*/
static enum BreakAction baTable[LBP_CB][LBP_CB] = {
{ /* OP */
@ -313,11 +312,12 @@ static struct LineBreakPropertiesIndex lb_prop_index[LINEBREAK_INDEX_SIZE] =
static __inline int ends_with(const char *str, const char *suffix,
unsigned suffixLen)
{
unsigned len;
if (str == NULL)
{
return 0;
}
unsigned len = strlen(str);
len = strlen(str);
if (len >= suffixLen &&
memcmp(str + len - suffixLen, suffix, suffixLen) == 0)
{
@ -706,6 +706,23 @@ int lb_process_next_char(
/**
* Sets the line breaking information for a generic input string.
*
* Currently, this implementation has customization for the following
* ISO 639-1 language codes (for \a lang):
*
* - de (German)
* - en (English)
* - es (Spanish)
* - fr (French)
* - ja (Japanese)
* - ko (Korean)
* - ru (Russian)
* - zh (Chinese)
*
* In addition, a suffix <code>"-strict"</code> may be added to indicate
* strict (as versus normal) line-breaking behaviour. See the <a
* href="http://www.unicode.org/reports/tr14/#CJ">Conditional Japanese
* Starter section of UAX #14</a> for more details.
*
* @param[in] s input string
* @param[in] len length of the input
* @param[in] lang language of the input
@ -766,6 +783,7 @@ void set_linebreaks(
* @param[out] brks pointer to the output breaking data, containing
* #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
* #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
* @see #set_linebreaks for a note about \a lang.
*/
void set_linebreaks_utf8(
const utf8_t *s,
@ -786,6 +804,7 @@ void set_linebreaks_utf8(
* @param[out] brks pointer to the output breaking data, containing
* #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
* #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
* @see #set_linebreaks for a note about \a lang.
*/
void set_linebreaks_utf16(
const utf16_t *s,
@ -806,6 +825,7 @@ void set_linebreaks_utf16(
* @param[out] brks pointer to the output breaking data, containing
* #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
* #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
* @see #set_linebreaks for a note about \a lang.
*/
void set_linebreaks_utf32(
const utf32_t *s,

View File

@ -43,7 +43,6 @@
*
* Header file for the line breaking algorithm.
*
* @version 3.1, 2016/09/10
* @author Wu Yongwei
*/

View File

@ -43,7 +43,6 @@
*
* Definition of language-specific data.
*
* @version 3.1, 2016/09/10
* @author Wu Yongwei
*/

View File

@ -45,7 +45,6 @@
* Definitions of internal data structures, declarations of global
* variables, and function prototypes for the line breaking algorithm.
*
* @version 3.2, 2016/12/03
* @author Wu Yongwei
* @author Petr Filipsky
*/

View File

@ -4,7 +4,7 @@
* Break processing in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2015 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2015-2016 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -29,7 +29,6 @@
*
* Definition of basic libunibreak information.
*
* @version 3.0, 2015/05/10
* @author Wu Yongwei
*/

View File

@ -4,7 +4,7 @@
* Break processing in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2015 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2015-2016 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -29,7 +29,6 @@
*
* Header file for common definitions in the libunibreak library.
*
* @version 3.0, 2015/05/10
* @author Wu Yongwei
*/
@ -40,7 +39,7 @@
extern "C" {
#endif
#define UNIBREAK_VERSION 0x0300 /**< Version of the library linebreak */
#define UNIBREAK_VERSION 0x0400 /**< Version of the library linebreak */
extern const int unibreak_version;
#ifndef UNIBREAK_UTF_TYPES_DEFINED

View File

@ -4,7 +4,7 @@
* Break processing in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2015 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2015-2016 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -29,7 +29,6 @@
*
* Definition of utility functions used by the libunibreak library.
*
* @version 3.0, 2015/05/10
* @author Wu Yongwei
*/

View File

@ -4,7 +4,7 @@
* Break processing in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2015 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2015-2016 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -29,7 +29,6 @@
*
* Header file for private definitions in the libunibreak library.
*
* @version 3.0, 2015/05/10
* @author Wu Yongwei
*/

View File

@ -4,7 +4,7 @@
* Word breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2013-2015 Tom Hacohen <tom at stosb dot com>
* Copyright (C) 2013-2016 Tom Hacohen <tom at stosb dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -30,9 +30,9 @@
* Unicode 6.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
*
* This library has been updated according to Revision 27, for
* Unicode 8.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-27.html>
* This library has been updated according to Revision 29, for
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@ -44,7 +44,6 @@
* Implementation of the word breaking algorithm as described in Unicode
* Standard Annex 29.
*
* @version 3.1, 2015/05/18
* @author Tom Hacohen
*/
@ -149,7 +148,7 @@ static void set_brks_to(
*
* @param[in] s input string
* @param[in] len length of the input
* @param[in] lang language of the input
* @param[in] lang language of the input (reserved for future use)
* @param[out] brks pointer to the output breaking data, containing
* #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or
* #WORDBREAK_INSIDEACHAR
@ -210,13 +209,7 @@ static void set_wordbreaks(
posLast = posCur;
break;
}
#ifndef __has_attribute
# define __has_attribute(x) 0
#endif
#if __has_attribute(fallthrough)
__attribute__((fallthrough));
#endif
/* Fall off */
/* Fall off */
case WBP_Newline:
/* WB3a,3b */
@ -329,13 +322,7 @@ static void set_wordbreaks(
wbcSeqStart = wbcCur;
posLast = posCur;
}
#ifndef __has_attribute
# define __has_attribute(x) 0
#endif
#if __has_attribute(fallthrough)
__attribute__((fallthrough));
#endif
/* No break on purpose */
/* No break on purpose */
case WBP_MidNumLet:
if (((wbcLast == WBP_ALetter) ||
(wbcLast == WBP_Hebrew_Letter)) || /* WB6,7 */
@ -511,7 +498,7 @@ static void set_wordbreaks(
*
* @param[in] s input UTF-8 string
* @param[in] len length of the input
* @param[in] lang language of the input
* @param[in] lang language of the input (reserved for future use)
* @param[out] brks pointer to the output breaking data, containing
* #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or
* #WORDBREAK_INSIDEACHAR
@ -531,7 +518,7 @@ void set_wordbreaks_utf8(
*
* @param[in] s input UTF-16 string
* @param[in] len length of the input
* @param[in] lang language of the input
* @param[in] lang language of the input (reserved for future use)
* @param[out] brks pointer to the output breaking data, containing
* #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or
* #WORDBREAK_INSIDEACHAR
@ -551,7 +538,7 @@ void set_wordbreaks_utf16(
*
* @param[in] s input UTF-32 string
* @param[in] len length of the input
* @param[in] lang language of the input
* @param[in] lang language of the input (reserved for future use)
* @param[out] brks pointer to the output breaking data, containing
* #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or
* #WORDBREAK_INSIDEACHAR

View File

@ -4,7 +4,7 @@
* Word breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2013-2015 Tom Hacohen <tom at stosb dot com>
* Copyright (C) 2013-2016 Tom Hacohen <tom at stosb dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -30,9 +30,9 @@
* Unicode 6.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
*
* This library has been updated according to Revision 25, for
* Unicode 7.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-25.html>
* This library has been updated according to Revision 29, for
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@ -43,7 +43,6 @@
*
* Header file for the word breaking (segmentation) algorithm.
*
* @version 3.0, 2015/05/10
* @author Tom Hacohen
*/

View File

@ -4,7 +4,7 @@
* Word breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2013-15 Tom Hacohen <tom at stosb dot com>
* Copyright (C) 2013-16 Tom Hacohen <tom at stosb dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -30,9 +30,9 @@
* Unicode 6.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
*
* This library has been updated according to Revision 25, for
* Unicode 7.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-25.html>
* This library has been updated according to Revision 29, for
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@ -44,7 +44,6 @@
* Definitions of internal data structures, declarations of global
* variables, and function prototypes for the word breaking algorithm.
*
* @version 3.0, 2015/05/10
* @author Tom Hacohen
*/

View File

@ -105,8 +105,9 @@ while (0)
EFL_START_TEST(evas_textblock_cursor)
{
START_TB_TEST();
Evas_Textblock_Cursor *cur2;
Evas_Coord x, y, w, h;
size_t i, len;
size_t i, j, len;
Evas_Coord nw, nh;
Evas_BiDi_Direction dir;
const char *buf = "This is a<br/> test.<ps/>Lets see if this works.<ps/>עוד פסקה.";
@ -973,6 +974,27 @@ EFL_START_TEST(evas_textblock_cursor)
}
evas_textblock_cursor_free(cur2);
}
/* Testing for grapheme cluster */
cur2 = evas_object_textblock_cursor_new(tb);
evas_object_textblock_text_markup_set(tb, "ഹലോ");
evas_textblock_cursor_pos_set(cur, 0);
evas_textblock_cursor_pos_set(cur2, 0);
i = j = 0;
while (evas_textblock_cursor_cluster_next(cur)) i++;
ck_assert_int_eq(i, 2);
while (evas_textblock_cursor_char_next(cur2)) j++;
ck_assert_int_eq(j, 4);
i = j = 0;
while (evas_textblock_cursor_cluster_prev(cur)) i++;
ck_assert_int_eq(i, 2);
while (evas_textblock_cursor_char_prev(cur2)) j++;
ck_assert_int_eq(j, 4);
END_TB_TEST();
}
EFL_END_TEST