summaryrefslogtreecommitdiff
path: root/src/lib/eina/eina_inline_unicode.x
blob: 7b7f7592b43b34b35f8aa1fe2d5e766df4e6add6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
/* EINA - EFL data type library
 * Copyright (C) 2013 Cedric Bail
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library;
 * if not, see <http://www.gnu.org/licenses/>.
 */

#ifndef EINA_INLINE_UNICODE_
# define EINA_INLINE_UNICODE_

EAPI Eina_Unicode _eina_unicode_utf8_next_get(int ind,
                                              unsigned char d,
                                              const char *buf,
                                              int *iindex);

#define ERROR_REPLACEMENT_BASE  0xDC80
#define EINA_IS_INVALID_BYTE(x)      ((x == 192) || (x == 193) || (x >= 245))
#define EINA_IS_CONTINUATION_BYTE(x) ((x & 0xC0) == 0x80)

static inline Eina_Unicode
eina_unicode_utf8_next_get(const char *buf, int *iindex)
{
   int ind;
   Eina_Unicode r;
   unsigned char d;

   if (!buf || !iindex) return 0;

   ind = *iindex;

   /* if this char is the null terminator, exit */
   if ((d = buf[ind++]) == 0) return 0;

   if ((d & 0x80) == 0)
     { // 1 byte (7bit) - 0xxxxxxx
        *iindex = ind;
        return d;
     }

   if ((d & 0xe0) == 0xc0)
     { // 2 byte (11bit) - 110xxxxx 10xxxxxx
        r  = (d & 0x1f) << 6;
        if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
            !EINA_IS_CONTINUATION_BYTE(d)) goto error;
        r |= (d & 0x3f);
        if (r <= 0x7F) goto error;
        *iindex = ind;
        return r;
     }

   return _eina_unicode_utf8_next_get(ind, d, buf, iindex);

/* Gets here where there was an error and we want to replace the char
 * we just use the invalid unicode codepoints 8 lower bits represent
 * the original char */
error:
   d = buf[*iindex];
   (*iindex)++;
   return ERROR_REPLACEMENT_BASE | d;
}

#endif