summaryrefslogtreecommitdiff
path: root/src/lib/eina/eina_simple_xml_parser.h
blob: 19c0bca44f94668597b5331e33ab8c34cd15233b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
/* EINA - EFL data type library
 * Copyright (C) 2011 Gustavo Sverzut Barbieri
 *                    Cedric Bail
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library;
 * if not, see <http://www.gnu.org/licenses/>.
 */

#ifndef EINA_SIMPLE_XML_H_
#define EINA_SIMPLE_XML_H_

#include "eina_config.h"

#include "eina_types.h"
#include "eina_magic.h"
#include "eina_inlist.h"

/**
 * @page eina_simple_xml_parser_example_01_page
 * @dontinclude eina_simple_xml_parser_01.c
 *
 * We are going to parse an XML sample file and print the data to stdout.
 *
 * Like all examples we start by including Eina:
 * @skipline #include
 *
 * We declare 2 booleans to keep track of tags:
 * @skipline tag_login
 * @skipline tag_message
 *
 * Here we declare some variables and initialize eina:
 * @until eina_init
 *
 * We fill buffer with the XML data from chat.xml:
 * @until fread
 *
 * We will use an Eina_Array to store the data:
 * @skipline array
 *
 * Here we call eina_simple_xml_parse(). We pass the buffer with data, its size,
 * we ask to strip leading and trailing whitespace, we give the callback
 * function and the array to store the formatted data:
 * @until _xml_tag_cb
 *
 * This will loop over the array and print the data using _print callback:
 * @skipline foreach
 *
 * This is the main XML parser callback, it will check for known tags and get
 * the corresponding values:
 * @skip static
 * @until str
 *
 * We first check for opening tag:
 * @skipline type
 *
 * If we know the tag should have attributes, then we find them using
 * eina_simple_xml_tag_attributes_find() and give them to another parsing
 * function using eina_simple_xml_attributes_parse():
 * @until _xml_attr_cb
 *
 * We check for other known tags:
 * @until tag_message
 *
 * We then check data for corresponding tag:
 * @until EINA_FALSE
 *
 * We are doing the formatting in same time and put all the \<post\> children
 * in str.
 * @until EINA_FALSE
 *
 * Finally, we store our string in the array:
 * @skipline push
 *
 * This is the callback to parse the attributes, we check for key name and keep
 * the value:
 * @skip static
 * @until snprintf
 *
 * This is the function that simply print items of the array:
 * @until EINA_TRUE
 *
 * You can see the full source code
 * @ref eina_simple_xml_parser_example_01 "here".
 */

/**
 * @page eina_simple_xml_parser_example_01
 * @include eina_simple_xml_parser_01.c
 * @example eina_simple_xml_parser_01.c
 */

/**
 * @defgroup Eina_Simple_XML_Group Simple_XML
 *
 * Simplistic relaxed SAX-like XML parser.
 *
 * This parser is far from being compliant with XML standard, but will
 * do for most XMLs out there. If you know that your format is simple
 * and will not vary in future with strange corner cases, then you can
 * use it safely.
 *
 * The parser is SAX like, that is, it will tokenize contents and call
 * you back so you can take some action. No contents are allocated
 * during this parser work and it's not recursive, so you can use it
 * with a very large document without worries.
 *
 * It will not validate the document anyhow, neither it will create a
 * tree hierarchy. That's up to you.
 *
 * Accordingly to XML, open tags may contain attributes. This parser
 * will not tokenize this. If you want you can use
 * eina_simple_xml_tag_attributes_find() and then
 * eina_simple_xml_attributes_parse().
 *
 * For more information, see
 * @ref eina_simple_xml_parser_example_01_page "this example".
 */

/**
 * @addtogroup Eina_Tools_Group Tools
 *
 * @{
 */

/**
 * @defgroup Eina_Simple_XML_Group Simple_XML
 *
 * @{
 */

typedef struct _Eina_Simple_XML_Node      Eina_Simple_XML_Node;
typedef struct _Eina_Simple_XML_Node_Tag  Eina_Simple_XML_Node_Root;
typedef struct _Eina_Simple_XML_Node_Tag  Eina_Simple_XML_Node_Tag;
typedef struct _Eina_Simple_XML_Node_Data Eina_Simple_XML_Node_Data;
typedef struct _Eina_Simple_XML_Node_Data Eina_Simple_XML_Node_CData;
typedef struct _Eina_Simple_XML_Node_Data Eina_Simple_XML_Node_Processing;
typedef struct _Eina_Simple_XML_Node_Data Eina_Simple_XML_Node_Doctype;
typedef struct _Eina_Simple_XML_Node_Data Eina_Simple_XML_Node_Doctype_Child; /**< @since 1.8 */
typedef struct _Eina_Simple_XML_Node_Data Eina_Simple_XML_Node_Comment;
typedef struct _Eina_Simple_XML_Attribute Eina_Simple_XML_Attribute;

struct _Eina_Simple_XML_Attribute
{
   EINA_INLIST;
   EINA_MAGIC;

   Eina_Simple_XML_Node_Tag *parent;
   const char *key;
   const char *value;
};

typedef enum _Eina_Simple_XML_Node_Type
{
  EINA_SIMPLE_XML_NODE_ROOT = 0,
  EINA_SIMPLE_XML_NODE_TAG,
  EINA_SIMPLE_XML_NODE_DATA,
  EINA_SIMPLE_XML_NODE_CDATA,
  EINA_SIMPLE_XML_NODE_PROCESSING,
  EINA_SIMPLE_XML_NODE_DOCTYPE,
  EINA_SIMPLE_XML_NODE_COMMENT,
  EINA_SIMPLE_XML_NODE_DOCTYPE_CHILD, /**< @since 1.8 */
} Eina_Simple_XML_Node_Type;

struct _Eina_Simple_XML_Node
{
   EINA_INLIST;
   EINA_MAGIC;

   Eina_Simple_XML_Node_Tag *parent;
   Eina_Simple_XML_Node_Type type;
};

struct _Eina_Simple_XML_Node_Tag
{
   Eina_Simple_XML_Node base;
   Eina_Inlist *children;
   Eina_Inlist *attributes;
   const char *name;
};

struct _Eina_Simple_XML_Node_Data
{
   Eina_Simple_XML_Node base;
   size_t length;
   char data[];
};
/**
 * @typedef _Eina_Simple_XML_Type
 * a simple XML type.
 */
typedef enum _Eina_Simple_XML_Type
{
  EINA_SIMPLE_XML_OPEN = 0, /*!< \<tag attribute="value"\> */
  EINA_SIMPLE_XML_OPEN_EMPTY, /*!< \<tag attribute="value" /\> */
  EINA_SIMPLE_XML_CLOSE, /*!< \</tag\> */
  EINA_SIMPLE_XML_DATA, /*!< tag text data */
  EINA_SIMPLE_XML_CDATA, /*!< \<![CDATA[something]]\> */
  EINA_SIMPLE_XML_ERROR, /*!< error contents */
  EINA_SIMPLE_XML_PROCESSING, /*!< \<?xml ... ?\> \<?php .. ?\> */
  EINA_SIMPLE_XML_DOCTYPE, /*!< \<!DOCTYPE html */
  EINA_SIMPLE_XML_COMMENT, /*!< \<!-- something --\> */
  EINA_SIMPLE_XML_IGNORED, /*!< whatever is ignored by parser, like whitespace */
  EINA_SIMPLE_XML_DOCTYPE_CHILD /*!< \<!DOCTYPE_CHILD @since 1.8 */
} Eina_Simple_XML_Type;

typedef Eina_Bool (*Eina_Simple_XML_Cb)(void *data, Eina_Simple_XML_Type type, const char *content, unsigned offset, unsigned length);
typedef Eina_Bool (*Eina_Simple_XML_Attribute_Cb)(void *data, const char *key, const char *value);


/**
 * @brief Parses a section of XML string text
 *
 * @param[in] buf The input string. May not contain \0 terminator.
 * @param[in] buflen The input string size.
 * @param[in] strip Whenever this parser should strip leading and trailing
 *            whitespace. These whitespace will still be issued, but as type
 *            #EINA_SIMPLE_XML_IGNORED.
 * @param[in] func What to call back while parse to do some action.  The
 *            first parameter is the given user @a data, the second is the
 *            token type, the third is the pointer to content start (it's
 *            not a NULL terminated string!), the fourth is where this
 *            content is located inside @a buf (does not include tag
 *            start, for instance "<!DOCTYPE value>" the offset points at
 *            "value"), the fifth is the size of the content. Whenever this
 *            function return #EINA_FALSE the parser will abort.
 * @param[in] data What to give as context to @a func.
 * @return #EINA_TRUE on success, or #EINA_FALSE if it was aborted by user or
 * parsing error.
 */
EAPI Eina_Bool eina_simple_xml_parse(const char *buf, unsigned buflen,
                                     Eina_Bool strip,
                                     Eina_Simple_XML_Cb func, const void *data);


/**
 * @brief Given the contents of a tag, find where the attributes start.
 *
 * @param[in] buf The input string. May not contain \0 terminator.
 * @param[in] buflen The input string size.
 * @return Pointer to the start of attributes, it can be used
 *         to feed eina_simple_xml_attributes_parse(). @c NULL is returned
 *         if no attributes were found.
 *
 * The tag contents is returned by eina_simple_xml_parse() when
 * type is #EINA_SIMPLE_XML_OPEN or #EINA_SIMPLE_XML_OPEN_EMPTY.
 *
 */
EAPI const char * eina_simple_xml_tag_attributes_find(const char *buf, unsigned buflen);

/**
 * @brief Given a buffer with xml attributes, parse them to key=value pairs.
 *
 * @param[in] buf The input string. May not contain \0 terminator.
 * @param[in] buflen The input string size.
 * @param[in] func What to call back while parse to do some action. The
 *            first parameter is the given user @a data, the second is the
 *            key (null-terminated) and the last is the value (null
 *            terminated). These strings should not be modified and
 *            reference is just valid until the function return.
 * @param[in] data Data to pass to the callback function.
 *
 * @return #EINA_TRUE on success, or #EINA_FALSE if it was aborted by user or
 *         parsing error.
 */
EAPI Eina_Bool eina_simple_xml_attributes_parse(const char *buf, unsigned buflen,
						Eina_Simple_XML_Attribute_Cb func, const void *data);

/**
 * @brief Given a buffer with the xml value of an attributes, parse them to key:value pairs.
 *
 * @param[in] buf the input string. Need to contain \0 terminator.
 * @param[in] func what to call back while parse to do some action. The
 *            first parameter is the given user @a data, the second is the
 *            key (null-terminated) and the last is the value (null
 *            terminated). These strings should not be modified and
 *            reference is just valid until the function return.
 * @param[in] data data to pass to the callback function.
 *
 * @return #EINA_TRUE on success or #EINA_FALSE if it was aborted by user or
 *          parsing error.
 *
 * @since 1.14
 */
EAPI Eina_Bool
eina_simple_xml_attribute_w3c_parse(const char *buf, Eina_Simple_XML_Attribute_Cb func, const void *data);

/**
 * @brief Creates (and appends) new attribute to tag.
 *
 * @param[in,out] parent If provided, will be set in the resulting
 *                structure as well as the attribute will be appended to
 *                attributes list.
 * @param[in] key Null-terminated string. Must not be @c NULL.
 * @param[in] value Null-terminated string. If @c NULL, the empty string will be used.
 *
 * @return Newly allocated memory or @c NULL on error. This memory should be
 *         released with eina_simple_xml_attribute_free() or indirectly
 *         with eina_simple_xml_node_tag_free().
 */
EAPI Eina_Simple_XML_Attribute * eina_simple_xml_attribute_new(Eina_Simple_XML_Node_Tag *parent, const char *key, const char *value);

/**
 * @brief Removes attribute from parent and deletes it.
 *
 * @param[in] attr attribute to release memory.
 */
EAPI void eina_simple_xml_attribute_free(Eina_Simple_XML_Attribute *attr);

/**
 * @brief Creates new tag. If parent is provided, it is automatically appended.
 *
 * @param[in] parent If provided, will be set in the resulting structure
 *            as well as the tag will be appended to children list.
 * @param[in] name Null-terminated string. Must not be @c NULL.
 *
 * @return Newly allocated memory or @c NULL on error. This memory should be
 *         released with eina_simple_xml_node_tag_free() or indirectly
 *         with eina_simple_xml_node_tag_free() of the parent.
 */
EAPI Eina_Simple_XML_Node_Tag * eina_simple_xml_node_tag_new(Eina_Simple_XML_Node_Tag *parent, const char *name);

/**
 * @brief Removes tag from parent and deletes it.
 *
 * @param[in] tag to release memory.
 */
EAPI void eina_simple_xml_node_tag_free(Eina_Simple_XML_Node_Tag *tag);


/**
 * @brief Creates new data. If parent is provided, it is automatically appended.
 *
 * @param[in,out] parent If provided, will be set in the resulting structure
 *                as well as the data will be appended to children list.
 * @param[in] contents String to be used. Must not be @c NULL.
 * @param[in] length Size in bytes of @a contents.
 *
 * @return Newly allocated memory or NULL on error. This memory should be
 *         released with eina_simple_xml_node_data_free() or indirectly
 *         with eina_simple_xml_node_tag_free() of the parent.
 */
EAPI Eina_Simple_XML_Node_Data * eina_simple_xml_node_data_new(Eina_Simple_XML_Node_Tag *parent, const char *contents, size_t length);

/**
 * @brief Removes data from parent and deletes it.
 *
 * @param[in] node to release memory.
 */
EAPI void eina_simple_xml_node_data_free(Eina_Simple_XML_Node_Data *node);


/**
 * @brief Creates new cdata. If parent is provided, it is automatically appended.
 *
 * @param[in,out] parent If provided, will be set in the resulting structure
 *        as well as the cdata will be appended to children list.
 * @param[in] contents String to be used. Must not be @c NULL.
 * @param[in] length Size in bytes of @a content.
 *
 * @return Newly allocated memory or @c NULL on error. This memory should be
 *         released with eina_simple_xml_node_cdata_free() or indirectly
 *         with eina_simple_xml_node_tag_free() of the parent.
 */
EAPI Eina_Simple_XML_Node_CData * eina_simple_xml_node_cdata_new(Eina_Simple_XML_Node_Tag *parent, const char *contents, size_t length);

/**
 * @brief Removes cdata from parent and deletes it.
 *
 * @param[in] node to release memory.
 */
EAPI void eina_simple_xml_node_cdata_free(Eina_Simple_XML_Node_Data *node);


/**
 * @brief Creates new doctype child. If parent is provided, it is automatically appended.
 *
 * @param[in,out] parent If provided, will be set in the resulting structure
 *                as well as the doctype child will be appended to children list.
 * @param[in] contents String to be used. Must not be @c NULL.
 * @param[in] length size in bytes of @a content.
 *
 * @return Newly allocated memory or @c NULL on error. This memory should be
 *         released with eina_simple_xml_node_doctype_child_free() or indirectly
 *         with eina_simple_xml_node_tag_free() of the parent.
 *
 * @since 1.8
 */
EAPI Eina_Simple_XML_Node_Doctype_Child * eina_simple_xml_node_doctype_child_new(Eina_Simple_XML_Node_Tag *parent, const char *contents, size_t length);

/**
 * @brief Removes doctype child from parent and deletes it.
 *
 * @param[in] node to release memory.
 *
 * @since 1.8
 */
EAPI void eina_simple_xml_node_doctype_child_free(Eina_Simple_XML_Node_Data *node);


/**
 * @brief Creates new processing. If parent is provided, it is automatically appended.
 *
 * @param[in,out] parent If provided, will be set in the resulting structure
 *                as well as the processing will be appended to children list.
 * @param[in] contents String to be used. Must not be @c NULL.
 * @param[in] length Size in bytes of @a contents.
 *
 * @return Newly allocated memory or @c NULL on error. This memory should be
 *         released with eina_simple_xml_node_processing_free() or indirectly
 *         with eina_simple_xml_node_tag_free() of the parent.
 */
EAPI Eina_Simple_XML_Node_Processing * eina_simple_xml_node_processing_new(Eina_Simple_XML_Node_Tag *parent, const char *contents, size_t length);

/**
 * @brief Removes processing from parent and deletes it.
 *
 * @param[in] node processing to release memory.
 */
EAPI void eina_simple_xml_node_processing_free(Eina_Simple_XML_Node_Data *node);


/**
 * @brief Creates new doctype. If parent is provided, it is automatically appended.
 *
 * @param[in,out] parent If provided, will be set in the resulting structure
 *                as well as the doctype will be appended to children list.
 * @param[in] contents String to be used. Must not be @c NULL.
 * @param[in] length Size in bytes of @a contents.
 *
 * @return Newly allocated memory or @c NULL on error. This memory should be
 *         released with eina_simple_xml_node_doctype_free() or indirectly
 *         with eina_simple_xml_node_tag_free() of the parent.
 */
EAPI Eina_Simple_XML_Node_Doctype * eina_simple_xml_node_doctype_new(Eina_Simple_XML_Node_Tag *parent, const char *contents, size_t length);

/**
 * @brief Removes doctype from parent and deletes it.
 *
 * @param[in] node doctype to release memory.
 */
EAPI void eina_simple_xml_node_doctype_free(Eina_Simple_XML_Node_Data *node);


/**
 * @brief Creates new comment. If parent is provided, it is automatically appended.
 *
 * @param[in,out] parent If provided, will be set in the resulting structure
 *                as well as the comment will be appended to children list.
 * @param[in] contents String to be used. Must not be @c NULL.
 * @param[in] length Size in bytes of @a contents.
 *
 * @return Newly allocated memory or @c NULL on error. This memory should be
 *         released with eina_simple_xml_node_comment_free() or indirectly
 *         with eina_simple_xml_node_tag_free() of the parent.
 */
EAPI Eina_Simple_XML_Node_Comment * eina_simple_xml_node_comment_new(Eina_Simple_XML_Node_Tag *parent, const char *contents, size_t length);

/**
 * @brief Removes comment from parent and deletes it.
 *
 * @param[in] node comment to release memory.
 */
EAPI void eina_simple_xml_node_comment_free(Eina_Simple_XML_Node_Data *node);


/**
 * @brief Loads a XML node tree based on the given string.
 *
 * @param[in] buf The input string. May not contain \0 terminator.
 * @param[in] buflen The input string size.
 * @param[in] strip Whenever this parser should strip leading and trailing
 *            whitespace.
 *
 * @return Document root with children tags, or @c NULL on errors.
 *         Document with errors may return partial tree instead of @c NULL,
 *         we'll do our best to avoid returning nothing.
 */
EAPI Eina_Simple_XML_Node_Root * eina_simple_xml_node_load(const char *buf, unsigned buflen, Eina_Bool strip);

/**
 * @brief Frees node tree build with eina_simple_xml_node_load()
 *
 * @param[in] root Memory returned by eina_simple_xml_node_load()
 */
EAPI void eina_simple_xml_node_root_free(Eina_Simple_XML_Node_Root *root);

/**
 * @brief Converts the node tree under the given element to a XML string.
 *
 * @param[in,out] node The base node to convert.
 * @param[in] indent Indentation string, or @c NULL to disable it.
 *
 * @return @c NULL on errors, or a newly allocated string on success.
 */
EAPI char * eina_simple_xml_node_dump(Eina_Simple_XML_Node *node, const char *indent);


/**
 * @}
 */

/**
 * @}
 */

#endif /* EINA_SIMPLE_XML_H_ */