forked from enlightenment/efl
Evas textblock: fix wrong hyphenation issues with non UTF8 encoded dictionary
Summary: hnj_hyphen_hyphenate2() needs properly encoded text based on the given dictionary. Each dictionary contains its encoding information at the head of file. So, text will be converted to proper encoding before calling the function. It fixes T3221. @fix Test Plan: Included in Evas test suite. Reviewers: z-wony, tasn, woohyun, herdsman, Blackmole, minudf Subscribers: zmike, stefan_schmidt, raster, cedric, jpeg Tags: #efl Maniphest Tasks: T3221 Differential Revision: https://phab.enlightenment.org/D3863
This commit is contained in:
parent
c33ef15d5d
commit
73c39bcf12
|
@ -2524,7 +2524,9 @@ tests/evas/images/bg_gray.png \
|
|||
tests/evas/images/flower.jp2 \
|
||||
tests/evas/images/flower.jp2.png \
|
||||
tests/evas/images/train.j2k \
|
||||
tests/evas/images/train.j2k.png
|
||||
tests/evas/images/train.j2k.png \
|
||||
tests/evas/dicts/hyph_en_US.dic \
|
||||
tests/evas/dicts/hyph_de_DE.dic
|
||||
|
||||
if HAVE_ELUA
|
||||
|
||||
|
|
|
@ -26,14 +26,19 @@ _dicts_hyphen_init(Eo *eo_obj)
|
|||
}
|
||||
}
|
||||
|
||||
static void *
|
||||
static HyphenDict *
|
||||
_dict_hyphen_load(const char *lang)
|
||||
{
|
||||
Eina_Iterator *it;
|
||||
Eina_Iterator *it = NULL;
|
||||
Eina_File_Direct_Info *dir;
|
||||
void *dict = NULL;
|
||||
HyphenDict *dict = NULL;
|
||||
const char *env_dir = getenv("EVAS_DICTS_HYPHEN_DIR");
|
||||
|
||||
if (env_dir && strlen(env_dir) > 0)
|
||||
it = eina_file_direct_ls(env_dir);
|
||||
|
||||
if (!it) it = eina_file_direct_ls(EVAS_DICTS_HYPHEN_DIR);
|
||||
|
||||
it = eina_file_direct_ls(EVAS_DICTS_HYPHEN_DIR);
|
||||
if (!it)
|
||||
{
|
||||
ERR("Couldn't list files in hyphens path: %s\n", EVAS_DICTS_HYPHEN_DIR);
|
||||
|
@ -103,7 +108,7 @@ _dicts_hyphen_detach(Eo *eo_obj)
|
|||
|
||||
/* Returns the hyphen dictionary that matches the given language
|
||||
* string. The string should be in the format xx_XX e.g. en_US */
|
||||
static inline void *
|
||||
static inline HyphenDict *
|
||||
_hyphen_dict_get_from_lang(const char *lang)
|
||||
{
|
||||
if (!lang || !(*lang))
|
||||
|
@ -127,13 +132,14 @@ static char *
|
|||
_layout_wrap_hyphens_get(const Eina_Unicode *text, const char *lang,
|
||||
int word_start, int word_len)
|
||||
{
|
||||
char *utf8;
|
||||
int utf8_len; /* length of word */
|
||||
char *hyphens;
|
||||
char *hyphens = NULL;
|
||||
char **rep = NULL;
|
||||
int *pos = NULL;
|
||||
int *cut = NULL;
|
||||
void *dict;
|
||||
HyphenDict *dict;
|
||||
char *converted_text = NULL;
|
||||
size_t converted_text_offset = 0;
|
||||
size_t converted_len = 0;
|
||||
|
||||
dict = _hyphen_dict_get_from_lang(lang);
|
||||
if (!dict)
|
||||
|
@ -142,11 +148,44 @@ _layout_wrap_hyphens_get(const Eina_Unicode *text, const char *lang,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
utf8 = eina_unicode_unicode_to_utf8_range(
|
||||
text + word_start, word_len, &utf8_len);
|
||||
hyphens = malloc(sizeof(char) * (word_len + 5));
|
||||
hnj_hyphen_hyphenate2(dict, utf8, word_len, hyphens, NULL, &rep, &pos, &cut);
|
||||
free(utf8);
|
||||
/* Convert UTF-32 encoded text to the other encoding
|
||||
* which is described in hyphen dictionary. */
|
||||
if (dict->cset && strcmp(dict->cset, "UTF-32"))
|
||||
{
|
||||
converted_text = eina_str_convert_len("UTF-32", dict->cset,
|
||||
(char *)(text + word_start),
|
||||
word_len * sizeof(Eina_Unicode),
|
||||
&converted_len);
|
||||
|
||||
if (!converted_text) goto hyphens_done;
|
||||
|
||||
/* Skip BOM character (0xFFFE) from converted text */
|
||||
if ((converted_len >= 2) &&
|
||||
(converted_text[0] == 0xff) &&
|
||||
(converted_text[1] == 0xfe))
|
||||
converted_text_offset = 2;
|
||||
|
||||
/* If there is only a BOM character, return NULL */
|
||||
if (converted_len == converted_text_offset)
|
||||
goto hyphens_done;
|
||||
}
|
||||
|
||||
if (converted_text)
|
||||
{
|
||||
hyphens = malloc(sizeof(char) * (converted_len + 5));
|
||||
hnj_hyphen_hyphenate2(dict, converted_text + converted_text_offset,
|
||||
(int)(converted_len - converted_text_offset), hyphens, NULL, &rep, &pos, &cut);
|
||||
}
|
||||
else
|
||||
{
|
||||
hyphens = malloc(sizeof(char) * (word_len + 5));
|
||||
hnj_hyphen_hyphenate2(dict, (char *)(text + word_start),
|
||||
word_len, hyphens, NULL, &rep, &pos, &cut);
|
||||
}
|
||||
|
||||
hyphens_done:
|
||||
if (converted_text) free(converted_text);
|
||||
|
||||
return hyphens;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
Hyphenation dictionary
|
||||
----------------------
|
||||
|
||||
Language: German (de DE).
|
||||
Origin: Based on the TeX hyphenation tables
|
||||
http://www.tug.org/tex-archive/language/hyphenation/dehyphn.tex
|
||||
License: GNU LGPL license.
|
||||
Author: conversion author is Marco Huggenberger<marco@by-night.ch>
|
||||
revised conversion and extensions: Daniel Naber
|
||||
http://qa.openoffice.org/issues/show_bug.cgi?id=26355
|
||||
|
||||
Please note, this dictionary is based on syllable matching patterns
|
||||
and thus should be suitable under other variations of German
|
||||
|
||||
HYPH de DE hyph_de_DE
|
||||
HYPH de CH hyph_de_CH
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
Trennmuster (hyph_de_DE.dic):
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Die Trennmuster (hyph_de_DE.dic) basieren auf den TeX Trennmustern
|
||||
"dehyphn.tex", revision level 31.
|
||||
Lizenz der Trennmuster: GNU LGPL. Die Anpassung der Trennmuster an
|
||||
den in OpenOffice.org benutzten "ALTLinux LibHnj Hyphenator" wurde
|
||||
mit dem Script substrings.pl durchgeführt, das unter
|
||||
http://lingucomponent.openoffice.org/hyphenator.html als Teil der
|
||||
Datei altlinux_Hyph.zip heruntergeladen werden kann.
|
||||
Die Original-Trennmuster können hier heruntergeladen werden:
|
||||
http://www.tug.org/tex-archive/language/hyphenation/dehyphn.tex
|
|
@ -0,0 +1,53 @@
|
|||
hyph_en_US.dic - American English hyphenation patterns for OpenOffice.org
|
||||
|
||||
version 2010-02-23
|
||||
|
||||
Changes
|
||||
|
||||
- set correct LEFTHYPHENMIN = 2, RIGHTHYPHENMIN = 3
|
||||
- handle apostrophes (forbid *o'=clock etc.)
|
||||
- set COMPOUNDLEFTHYPHENMIN, COMPOUNDRIGHTHYPHENMIN values
|
||||
- UTF-8 encoding
|
||||
- Unicode ligature support
|
||||
|
||||
License
|
||||
|
||||
BSD-style. Unlimited copying, redistribution and modification of this file
|
||||
is permitted with this copyright and license information.
|
||||
|
||||
See original license in this file.
|
||||
|
||||
Conversion and modifications by László Németh (nemeth at OOo).
|
||||
|
||||
Based on the plain TeX hyphenation table
|
||||
(http://tug.ctan.org/text-archive/macros/plain/base/hyphen.tex) and
|
||||
the TugBoat hyphenation exceptions log in
|
||||
http://www.ctan.org/tex-archive/info/digests/tugboat/tb0hyf.tex, processed
|
||||
by the hyphenex.sh script (see in the same directory).
|
||||
|
||||
Originally developed and distributed with the Hyphen hyphenation library,
|
||||
see http://hunspell.sourceforge.net/ for the source files and the conversion
|
||||
scripts.
|
||||
|
||||
Licenses
|
||||
|
||||
hyphen.tex:
|
||||
% The Plain TeX hyphenation tables [NOT TO BE CHANGED IN ANY WAY!]
|
||||
% Unlimited copying and redistribution of this file are permitted as long
|
||||
% as this file is not modified. Modifications are permitted, but only if
|
||||
% the resulting file is not named hyphen.tex.
|
||||
|
||||
output of hyphenex.sh:
|
||||
% Hyphenation exceptions for US English, based on hyphenation exception
|
||||
% log articles in TUGboat.
|
||||
%
|
||||
% Copyright 2007 TeX Users Group.
|
||||
% You may freely use, modify and/or distribute this file.
|
||||
%
|
||||
% This is an automatically generated file. Do not edit!
|
||||
%
|
||||
% Please contact the TUGboat editorial staff <tugboat@tug.org>
|
||||
% for corrections and omissions.
|
||||
|
||||
hyph_en_US.txt:
|
||||
See the previous licenses.
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -16,6 +16,8 @@
|
|||
#include "evas_suite.h"
|
||||
#include "evas_tests_helpers.h"
|
||||
|
||||
#define TESTS_DIC_DIR TESTS_SRC_DIR"/dicts"
|
||||
|
||||
/* Functions defined in evas_object_textblock.c */
|
||||
EAPI Eina_Bool
|
||||
_evas_textblock_check_item_node_link(Evas_Object *obj);
|
||||
|
@ -4150,6 +4152,21 @@ EFL_START_TEST(evas_textblock_hyphenation)
|
|||
evas_object_textblock_text_markup_set(tb, buf);
|
||||
_hyphenation_width_stress(tb, cur);
|
||||
|
||||
setenv("EVAS_DICTS_HYPHEN_DIR", TESTS_DIC_DIR, 1);
|
||||
|
||||
buf = "europäi-";
|
||||
evas_object_textblock_text_markup_set(tb, buf);
|
||||
evas_object_textblock_size_formatted_get(tb, &w, NULL);
|
||||
|
||||
buf = "europäischen";
|
||||
evas_object_textblock_text_markup_set(tb, buf);
|
||||
evas_textblock_cursor_format_prepend(cur, "<wrap=hyphenation lang=de_DE>");
|
||||
evas_object_resize(tb, w, 100);
|
||||
evas_object_textblock_size_formatted_get(tb, &fw, NULL);
|
||||
ck_assert_int_eq(w, fw);
|
||||
|
||||
unsetenv("EVAS_DICTS_HYPHEN_DIR");
|
||||
|
||||
END_TB_TEST();
|
||||
}
|
||||
EFL_END_TEST;
|
||||
|
|
Loading…
Reference in New Issue