summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordiscomfitor <michael.blumenkrantz@gmail.com>2013-09-08 15:34:58 +0100
committerdiscomfitor <michael.blumenkrantz@gmail.com>2013-09-08 16:04:22 +0100
commit5eda1a5383a4134fd843debee4ee3d278575965b (patch)
treefd8d0e2c07671c78f362433309e8d7fb08c693a6
parent837507cf3c7f6166d54c6a04dcad4e8df93cad6e (diff)
add enchilada module
-rw-r--r--Makefile.am1
-rw-r--r--configure.ac57
-rw-r--r--src/bin/Makefile.mk6
-rw-r--r--src/modules/Makefile.mk45
-rw-r--r--src/modules/e_mod_main.c89
-rw-r--r--src/modules/e_mod_main.h50
-rw-r--r--src/modules/module.desktop.in6
-rw-r--r--src/modules/pugiconfig.hpp69
-rw-r--r--src/modules/pugixml.cpp10250
-rw-r--r--src/modules/pugixml.hpp1265
-rw-r--r--src/modules/virt.c349
-rw-r--r--src/modules/virt_xml.cpp50
12 files changed, 12229 insertions, 8 deletions
diff --git a/Makefile.am b/Makefile.am
index c1d3650..a4a3d0a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -25,6 +25,7 @@ ylwrap
25 25
26include src/bin/Makefile.mk 26include src/bin/Makefile.mk
27include src/lib/Makefile.mk 27include src/lib/Makefile.mk
28include src/modules/Makefile.mk
28 29
29maintainer-clean-local: 30maintainer-clean-local:
30 rm -rf autom4te.cache 31 rm -rf autom4te.cache
diff --git a/configure.ac b/configure.ac
index 25a973f..cb46587 100644
--- a/configure.ac
+++ b/configure.ac
@@ -47,19 +47,14 @@ AC_PROG_LIBTOOL
47AC_PROG_INSTALL 47AC_PROG_INSTALL
48AC_PROG_CC 48AC_PROG_CC
49AM_PROG_CC_C_O 49AM_PROG_CC_C_O
50AC_PROG_CXX
50AC_PROG_MAKE_SET 51AC_PROG_MAKE_SET
51AC_HEADER_STDC
52
53AC_C_CONST
54AC_C_INLINE
55AC_TYPE_SIZE_T
56 52
57AC_FUNC_ALLOCA 53AC_FUNC_ALLOCA
58AC_CHECK_FUNCS([strdup strndup]) 54AC_CHECK_FUNCS([strdup strndup])
59 55
60PKG_PROG_PKG_CONFIG 56PKG_PROG_PKG_CONFIG
61 57
62PKG_CHECK_MODULES([EFL], [ecore-evas])
63PKG_CHECK_MODULES([SPICE], [spice-client-gtk-2.0 ecore]) 58PKG_CHECK_MODULES([SPICE], [spice-client-gtk-2.0 ecore])
64 59
65if test -z "$($PKG_CONFIG --variable=evas spice-client-gtk-2.0)" ; then 60if test -z "$($PKG_CONFIG --variable=evas spice-client-gtk-2.0)" ; then
@@ -68,6 +63,55 @@ fi
68 63
69############################## 64##############################
70 65
66AC_ARG_ENABLE([demos],
67 [AC_HELP_STRING([--disable-demos], [disable demo apps @<:@default=no@:>@])],
68 [want_demos=$enableval],
69 [want_demos="yes"]
70)
71
72if test "x$want_demos" = "xyes" ; then
73 PKG_CHECK_EXISTS([ecore-evas], [build_demos="yes"], [build_demos="no"])
74fi
75
76if test "x$build_demos" = "xyes" ; then
77 PKG_CHECK_MODULES([DEMO], [ecore-evas])
78fi
79AM_CONDITIONAL([BUILD_DEMOS], [test "x${build_demos}" = "xyes"])
80
81
82AC_ARG_ENABLE([enchilada],
83 [AC_HELP_STRING([--disable-enchilada], [disable enchilada module for enlightenment. @<:@default=yes@:>@])],
84 [want_enchilada=$enableval],
85 [want_enchilada="yes"]
86)
87
88build_enchilada="no"
89if test "x$want_enchilada" = "xyes" ; then
90 PKG_CHECK_MODULES([E], [enlightenment libvirt],
91 [
92 build_enchilada="yes"
93 MODULE_DIR=$(${PKG_CONFIG} --variable=modules enlightenment)
94 release=$(${PKG_CONFIG} --variable=release enlightenment)
95 MODULE_ARCH="$host_os-$host_cpu-$release"
96 AC_DEFINE_UNQUOTED([MODULE_ARCH], [$MODULE_ARCH], ["Module architecture"])
97 enchilada_desktop="src/modules/module.desktop"
98 ],
99 [build_enchilada="no"])
100fi
101
102AC_SUBST([MODULE_ARCH])
103
104AM_CONDITIONAL([BUILD_ENCHILADA], [test "x$build_enchilada" = "xyes"])
105
106AC_ARG_ENABLE(enchilada-homedir-install,
107 AS_HELP_STRING([--enable-enchilada-homedir-install], [Install modules in homedir]),
108 [MODULE_DIR="${HOME}/.e/e/modules"]
109)
110
111AC_SUBST([MODULE_DIR])
112
113##############################
114
71m4_ifdef([v_rev], 115m4_ifdef([v_rev],
72 [ 116 [
73 EFL_COMPILER_FLAG([-Wshadow]) 117 EFL_COMPILER_FLAG([-Wshadow])
@@ -77,6 +121,7 @@ m4_ifdef([v_rev],
77 121
78AC_CONFIG_FILES([ 122AC_CONFIG_FILES([
79 Makefile 123 Makefile
124 $enchilada_desktop
80]) 125])
81AC_OUTPUT 126AC_OUTPUT
82 127
diff --git a/src/bin/Makefile.mk b/src/bin/Makefile.mk
index d4650a8..2c30d5b 100644
--- a/src/bin/Makefile.mk
+++ b/src/bin/Makefile.mk
@@ -1,3 +1,4 @@
1if BUILD_DEMOS
1bin_PROGRAMS += src/bin/demo 2bin_PROGRAMS += src/bin/demo
2 3
3src_bin_demo_SOURCES =\ 4src_bin_demo_SOURCES =\
@@ -7,9 +8,10 @@ src_bin_demo_CPPFLAGS =\
7-I$(top_builddir) \ 8-I$(top_builddir) \
8-I$(top_srcdir)/src/lib \ 9-I$(top_srcdir)/src/lib \
9@SPICE_CFLAGS@ \ 10@SPICE_CFLAGS@ \
10@EFL_CFLAGS@ 11@DEMO_CFLAGS@
11 12
12src_bin_demo_LDADD =\ 13src_bin_demo_LDADD =\
13@SPICE_LIBS@ \ 14@SPICE_LIBS@ \
14@EFL_LIBS@ \ 15@DEMO_LIBS@ \
15$(top_builddir)/src/lib/libburrito.la 16$(top_builddir)/src/lib/libburrito.la
17endif
diff --git a/src/modules/Makefile.mk b/src/modules/Makefile.mk
new file mode 100644
index 0000000..1d6c9db
--- /dev/null
+++ b/src/modules/Makefile.mk
@@ -0,0 +1,45 @@
1if BUILD_ENCHILADA
2EXTRA_DIST += \
3src/modules/module.desktop.in
4
5DISTCLEANFILES += \
6src/modules/module.desktop
7
8#src/modules/e-module-enchilada.edc
9
10enchilada_filesdir = $(MODULE_DIR)/enchilada
11enchilada_files_DATA = \
12src/modules/module.desktop
13
14#src/modules/e-module-enchilada.edj
15
16pkgdir = $(MODULE_DIR)/enchilada/$(MODULE_ARCH)
17pkg_LTLIBRARIES = src/modules/module.la
18
19src_modules_module_la_SOURCES = \
20src/modules/e_mod_main.h \
21src/modules/e_mod_main.c \
22src/modules/pugixml.cpp \
23src/modules/pugixml.hpp \
24src/modules/pugiconfig.hpp \
25src/modules/virt.c \
26src/modules/virt_xml.cpp
27
28src_modules_module_la_CPPFLAGS = \
29-I$(top_srcdir) \
30-I$(top_srcdir)/src/lib \
31-DPACKAGE_DATA_DIR=\"$(MODULE_DIR)/enchilada\" \
32@E_CFLAGS@ \
33@SPICE_CFLAGS@
34
35src_modules_module_la_LIBADD = \
36@E_LIBS@ \
37@SPICE_LIBS@ \
38$(top_builddir)/src/lib/libburrito.la
39
40src_modules_module_la_LDFLAGS = -module -avoid-version
41
42#src/modules/e-module-enchilada.edj: src/modules/e-module-enchilada.edc
43# $(edje_cc) $< $@
44
45endif
diff --git a/src/modules/e_mod_main.c b/src/modules/e_mod_main.c
new file mode 100644
index 0000000..a23939b
--- /dev/null
+++ b/src/modules/e_mod_main.c
@@ -0,0 +1,89 @@
1#include "e_mod_main.h"
2
3EINTERN int _e_enchilada_log_dom = -1;
4EINTERN Mod *enchilada_mod = NULL;
5//EINTERN Enchilada_Config *enchilada_config = NULL;
6
7EAPI E_Module_Api e_modapi = {E_MODULE_API_VERSION, "Enchilada"};
8
9EINTERN Eina_Inlist *domains = NULL;
10EINTERN Eina_Hash *domains_hash = NULL;
11
12EAPI void *
13e_modapi_init(E_Module *m)
14{
15 char buf[PATH_MAX];
16
17 snprintf(buf, sizeof(buf), "%s/e-module-enchilada.edj", e_module_dir_get(m));
18 //e_configure_registry_category_add("applications", 20, _("Apps"), NULL,
19 //"preferences-applications");
20 //e_configure_registry_item_add("applications/enchilada", 1, _("Enchilada"), NULL,
21 //buf, e_int_config_enchilada_module);
22
23 enchilada_mod = E_NEW(Mod, 1);
24 enchilada_mod->module = m;
25 m->data = enchilada_mod;
26 //conf_edd = e_enchilada_config_dd_new();
27 //enchilada_config = e_config_domain_load("module.enchilada", conf_edd);
28 //if (enchilada_config)
29 //{
30 //if (!e_util_module_config_check(_("Enchilada"), enchilada_config->config_version, MOD_CONFIG_FILE_VERSION))
31 //E_FREE_FUNC(enchilada_config, free);
32 //}
33//
34 //if (enchilada_config)
35 //{
36 ///* sanity checks */
37 //enchilada_config->mouse_out_delay = E_CLAMP(enchilada_config->mouse_out_delay, 0.0, 5.0);
38 //enchilada_config->popup_size = E_CLAMP(enchilada_config->popup_size, 10.0, 100.0);
39 //enchilada_config->popup_opacity = E_CLAMP(enchilada_config->popup_opacity, 10.0, 100.0);
40 //enchilada_config->allowed_media_fetch_size = E_CLAMP(enchilada_config->allowed_media_fetch_size, 1, 50);
41 //}
42 //else
43 //enchilada_config = e_enchilada_config_new();
44 //enchilada_config->config_version = MOD_CONFIG_FILE_VERSION;
45
46 _e_enchilada_log_dom = eina_log_domain_register("enchilada_mod", EINA_COLOR_ORANGE);
47 eina_log_domain_level_set("enchilada_mod", EINA_LOG_LEVEL_DBG);
48
49 domains_hash = eina_hash_string_djb2_new(NULL);
50
51 if (!virt_init())
52 {
53 e_modapi_shutdown(NULL);
54 return NULL;
55 }
56 //if (!e_enchilada_init())
57 //{
58 //e_modapi_shutdown(NULL);
59 //return NULL;
60 //}
61
62 return m;
63}
64
65EAPI int
66e_modapi_shutdown(E_Module *m EINA_UNUSED)
67{
68 //E_CONFIG_DD_FREE(conf_edd);
69 //eina_log_domain_unregister(_e_enchilada_log_dom);
70 //_e_enchilada_log_dom = -1;
71
72 //e_configure_registry_item_del("applications/enchilada");
73 //e_configure_registry_category_del("applications");
74
75 //E_FREE(enchilada_config);
76 virt_shutdown();
77 E_FREE_FUNC(domains_hash, eina_hash_free);
78 E_FREE(enchilada_mod);
79 return 1;
80}
81
82EAPI int
83e_modapi_save(E_Module *m EINA_UNUSED)
84{
85 //e_config_domain_save("module.enchilada", conf_edd, enchilada_config);
86 return 1;
87}
88
89
diff --git a/src/modules/e_mod_main.h b/src/modules/e_mod_main.h
new file mode 100644
index 0000000..fddc72d
--- /dev/null
+++ b/src/modules/e_mod_main.h
@@ -0,0 +1,50 @@
1#ifndef E_MOD_MAIN_H
2#define E_MOD_MAIN_H
3
4#ifdef HAVE_CONFIG_H
5#include "config.h"
6#endif
7
8#include "Burrito.h"
9#include <e.h>
10
11
12typedef struct Mod
13{
14 E_Module *module;
15 void *conn;
16} Mod;
17
18typedef struct Virt_Domain
19{
20 EINA_INLIST;
21 Eina_Stringshare *name;
22 Eina_Stringshare *uuid;
23 Eina_Stringshare *uri;
24 Eina_Stringshare *addr;
25 unsigned int port;
26 unsigned int tls_port;
27 char *password;
28} Virt_Domain;
29
30extern Eina_Inlist *domains;
31extern Eina_Hash *domains_hash;
32extern Mod *enchilada_mod;
33
34EINTERN Eina_Bool virt_init(void);
35EINTERN void virt_shutdown(void);
36
37#undef DBG
38#undef INF
39#undef WRN
40#undef ERR
41#undef CRIT
42#define DBG(...) EINA_LOG_DOM_DBG(_e_enchilada_log_dom, __VA_ARGS__)
43#define INF(...) EINA_LOG_DOM_INFO(_e_enchilada_log_dom, __VA_ARGS__)
44#define WRN(...) EINA_LOG_DOM_WARN(_e_enchilada_log_dom, __VA_ARGS__)
45#define ERR(...) EINA_LOG_DOM_ERR(_e_enchilada_log_dom, __VA_ARGS__)
46#define CRIT(...) EINA_LOG_DOM_CRIT(_e_enchilada_log_dom, __VA_ARGS__)
47
48EAPI int e_modapi_shutdown(E_Module *m EINA_UNUSED);
49
50#endif
diff --git a/src/modules/module.desktop.in b/src/modules/module.desktop.in
new file mode 100644
index 0000000..6a9c169
--- /dev/null
+++ b/src/modules/module.desktop.in
@@ -0,0 +1,6 @@
1[Desktop Entry]
2Type=Link
3Name=Enchilada
4Icon=e-module-enchilada
5Comment=Enchiladas for your compositor
6X-Enlightenment-ModuleType=system
diff --git a/src/modules/pugiconfig.hpp b/src/modules/pugiconfig.hpp
new file mode 100644
index 0000000..c219671
--- /dev/null
+++ b/src/modules/pugiconfig.hpp
@@ -0,0 +1,69 @@
1/**
2 * pugixml parser - version 1.2
3 * --------------------------------------------------------
4 * Copyright (C) 2006-2012, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5 * Report bugs and download new versions at http://pugixml.org/
6 *
7 * This library is distributed under the MIT License. See notice at the end
8 * of this file.
9 *
10 * This work is based on the pugxml parser, which is:
11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12 */
13
14#ifndef HEADER_PUGICONFIG_HPP
15#define HEADER_PUGICONFIG_HPP
16
17// Uncomment this to enable wchar_t mode
18// #define PUGIXML_WCHAR_MODE
19
20// Uncomment this to disable XPath
21// #define PUGIXML_NO_XPATH
22
23// Uncomment this to disable STL
24// #define PUGIXML_NO_STL
25
26// Uncomment this to disable exceptions
27// #define PUGIXML_NO_EXCEPTIONS
28
29// Set this to control attributes for public classes/functions, i.e.:
30// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL
31// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL
32// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall
33// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead
34
35// Uncomment this to switch to header-only version
36// #define PUGIXML_HEADER_ONLY
37// #include "pugixml.cpp"
38
39// Tune these constants to adjust memory-related behavior
40// #define PUGIXML_MEMORY_PAGE_SIZE 32768
41// #define PUGIXML_MEMORY_OUTPUT_STACK 10240
42// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096
43
44#endif
45
46/**
47 * Copyright (c) 2006-2012 Arseny Kapoulkine
48 *
49 * Permission is hereby granted, free of charge, to any person
50 * obtaining a copy of this software and associated documentation
51 * files (the "Software"), to deal in the Software without
52 * restriction, including without limitation the rights to use,
53 * copy, modify, merge, publish, distribute, sublicense, and/or sell
54 * copies of the Software, and to permit persons to whom the
55 * Software is furnished to do so, subject to the following
56 * conditions:
57 *
58 * The above copyright notice and this permission notice shall be
59 * included in all copies or substantial portions of the Software.
60 *
61 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
62 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
63 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
64 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
65 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
66 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
67 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
68 * OTHER DEALINGS IN THE SOFTWARE.
69 */
diff --git a/src/modules/pugixml.cpp b/src/modules/pugixml.cpp
new file mode 100644
index 0000000..4035ab1
--- /dev/null
+++ b/src/modules/pugixml.cpp
@@ -0,0 +1,10250 @@
1/**
2 * pugixml parser - version 1.2
3 * --------------------------------------------------------
4 * Copyright (C) 2006-2012, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5 * Report bugs and download new versions at http://pugixml.org/
6 *
7 * This library is distributed under the MIT License. See notice at the end
8 * of this file.
9 *
10 * This work is based on the pugxml parser, which is:
11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12 */
13
14#ifndef SOURCE_PUGIXML_CPP
15#define SOURCE_PUGIXML_CPP
16
17#include "pugixml.hpp"
18
19#include <stdlib.h>
20#include <stdio.h>
21#include <string.h>
22#include <assert.h>
23#include <wchar.h>
24
25#ifndef PUGIXML_NO_XPATH
26# include <math.h>
27# include <float.h>
28# ifdef PUGIXML_NO_EXCEPTIONS
29# include <setjmp.h>
30# endif
31#endif
32
33#ifndef PUGIXML_NO_STL
34# include <istream>
35# include <ostream>
36# include <string>
37#endif
38
39// For placement new
40#include <new>
41
42#ifdef _MSC_VER
43# pragma warning(push)
44# pragma warning(disable: 4127) // conditional expression is constant
45# pragma warning(disable: 4324) // structure was padded due to __declspec(align())
46# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
47# pragma warning(disable: 4702) // unreachable code
48# pragma warning(disable: 4996) // this function or variable may be unsafe
49# pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
50#endif
51
52#ifdef __INTEL_COMPILER
53# pragma warning(disable: 177) // function was declared but never referenced
54# pragma warning(disable: 279) // controlling expression is constant
55# pragma warning(disable: 1478 1786) // function was declared "deprecated"
56# pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
57#endif
58
59#if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
60# pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
61#endif
62
63#ifdef __BORLANDC__
64# pragma option push
65# pragma warn -8008 // condition is always false
66# pragma warn -8066 // unreachable code
67#endif
68
69#ifdef __SNC__
70// Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
71# pragma diag_suppress=178 // function was declared but never referenced
72# pragma diag_suppress=237 // controlling expression is constant
73#endif
74
75// Inlining controls
76#if defined(_MSC_VER) && _MSC_VER >= 1300
77# define PUGI__NO_INLINE __declspec(noinline)
78#elif defined(__GNUC__)
79# define PUGI__NO_INLINE __attribute__((noinline))
80#else
81# define PUGI__NO_INLINE
82#endif
83
84// Simple static assertion
85#define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
86
87// Digital Mars C++ bug workaround for passing char loaded from memory via stack
88#ifdef __DMC__
89# define PUGI__DMC_VOLATILE volatile
90#else
91# define PUGI__DMC_VOLATILE
92#endif
93
94// Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
95#if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
96using std::memcpy;
97using std::memmove;
98#endif
99
100// In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
101#if defined(_MSC_VER) && !defined(__S3E__)
102# define PUGI__MSVC_CRT_VERSION _MSC_VER
103#endif
104
105#ifdef PUGIXML_HEADER_ONLY
106# define PUGI__NS_BEGIN namespace pugi { namespace impl {
107# define PUGI__NS_END } }
108# define PUGI__FN inline
109# define PUGI__FN_NO_INLINE inline
110#else
111# if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
112# define PUGI__NS_BEGIN namespace pugi { namespace impl {
113# define PUGI__NS_END } }
114# else
115# define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
116# define PUGI__NS_END } } }
117# endif
118# define PUGI__FN
119# define PUGI__FN_NO_INLINE PUGI__NO_INLINE
120#endif
121
122// uintptr_t
123#if !defined(_MSC_VER) || _MSC_VER >= 1600
124# include <stdint.h>
125#else
126# ifndef _UINTPTR_T_DEFINED
127// No native uintptr_t in MSVC6 and in some WinCE versions
128typedef size_t uintptr_t;
129#define _UINTPTR_T_DEFINED
130# endif
131PUGI__NS_BEGIN
132 typedef unsigned __int8 uint8_t;
133 typedef unsigned __int16 uint16_t;
134 typedef unsigned __int32 uint32_t;
135PUGI__NS_END
136#endif
137
138// Memory allocation
139PUGI__NS_BEGIN
140 PUGI__FN void* default_allocate(size_t size)
141 {
142 return malloc(size);
143 }
144
145 PUGI__FN void default_deallocate(void* ptr)
146 {
147 free(ptr);
148 }
149
150 template <typename T>
151 struct xml_memory_management_function_storage
152 {
153 static allocation_function allocate;
154 static deallocation_function deallocate;
155 };
156
157 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
158 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
159
160 typedef xml_memory_management_function_storage<int> xml_memory;
161PUGI__NS_END
162
163// String utilities
164PUGI__NS_BEGIN
165 // Get string length
166 PUGI__FN size_t strlength(const char_t* s)
167 {
168 assert(s);
169
170 #ifdef PUGIXML_WCHAR_MODE
171 return wcslen(s);
172 #else
173 return strlen(s);
174 #endif
175 }
176
177 // Compare two strings
178 PUGI__FN bool strequal(const char_t* src, const char_t* dst)
179 {
180 assert(src && dst);
181
182 #ifdef PUGIXML_WCHAR_MODE
183 return wcscmp(src, dst) == 0;
184 #else
185 return strcmp(src, dst) == 0;
186 #endif
187 }
188
189 // Compare lhs with [rhs_begin, rhs_end)
190 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
191 {
192 for (size_t i = 0; i < count; ++i)
193 if (lhs[i] != rhs[i])
194 return false;
195
196 return lhs[count] == 0;
197 }
198
199#ifdef PUGIXML_WCHAR_MODE
200 // Convert string to wide string, assuming all symbols are ASCII
201 PUGI__FN void widen_ascii(wchar_t* dest, const char* source)
202 {
203 for (const char* i = source; *i; ++i) *dest++ = *i;
204 *dest = 0;
205 }
206#endif
207PUGI__NS_END
208
209#if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)
210// auto_ptr-like buffer holder for exception recovery
211PUGI__NS_BEGIN
212 struct buffer_holder
213 {
214 void* data;
215 void (*deleter)(void*);
216
217 buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_)
218 {
219 }
220
221 ~buffer_holder()
222 {
223 if (data) deleter(data);
224 }
225
226 void* release()
227 {
228 void* result = data;
229 data = 0;
230 return result;
231 }
232 };
233PUGI__NS_END
234#endif
235
236PUGI__NS_BEGIN
237 static const size_t xml_memory_page_size =
238 #ifdef PUGIXML_MEMORY_PAGE_SIZE
239 PUGIXML_MEMORY_PAGE_SIZE
240 #else
241 32768
242 #endif
243 ;
244
245 static const uintptr_t xml_memory_page_alignment = 32;
246 static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
247 static const uintptr_t xml_memory_page_name_allocated_mask = 16;
248 static const uintptr_t xml_memory_page_value_allocated_mask = 8;
249 static const uintptr_t xml_memory_page_type_mask = 7;
250
251 struct xml_allocator;
252
253 struct xml_memory_page
254 {
255 static xml_memory_page* construct(void* memory)
256 {
257 if (!memory) return 0; //$ redundant, left for performance
258
259 xml_memory_page* result = static_cast<xml_memory_page*>(memory);
260
261 result->allocator = 0;
262 result->memory = 0;
263 result->prev = 0;
264 result->next = 0;
265 result->busy_size = 0;
266 result->freed_size = 0;
267
268 return result;
269 }
270
271 xml_allocator* allocator;
272
273 void* memory;
274
275 xml_memory_page* prev;
276 xml_memory_page* next;
277
278 size_t busy_size;
279 size_t freed_size;
280
281 char data[1];
282 };
283
284 struct xml_memory_string_header
285 {
286 uint16_t page_offset; // offset from page->data
287 uint16_t full_size; // 0 if string occupies whole page
288 };
289
290 struct xml_allocator
291 {
292 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
293 {
294 }
295
296 xml_memory_page* allocate_page(size_t data_size)
297 {
298 size_t size = offsetof(xml_memory_page, data) + data_size;
299
300 // allocate block with some alignment, leaving memory for worst-case padding
301 void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
302 if (!memory) return 0;
303
304 // align upwards to page boundary
305 void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
306
307 // prepare page structure
308 xml_memory_page* page = xml_memory_page::construct(page_memory);
309
310 page->memory = memory;
311 page->allocator = _root->allocator;
312
313 return page;
314 }
315
316 static void deallocate_page(xml_memory_page* page)
317 {
318 xml_memory::deallocate(page->memory);
319 }
320
321 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
322
323 void* allocate_memory(size_t size, xml_memory_page*& out_page)
324 {
325 if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
326
327 void* buf = _root->data + _busy_size;
328
329 _busy_size += size;
330
331 out_page = _root;
332
333 return buf;
334 }
335
336 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
337 {
338 if (page == _root) page->busy_size = _busy_size;
339
340 assert(ptr >= page->data && ptr < page->data + page->busy_size);
341 (void)!ptr;
342
343 page->freed_size += size;
344 assert(page->freed_size <= page->busy_size);
345
346 if (page->freed_size == page->busy_size)
347 {
348 if (page->next == 0)
349 {
350 assert(_root == page);
351
352 // top page freed, just reset sizes
353 page->busy_size = page->freed_size = 0;
354 _busy_size = 0;
355 }
356 else
357 {
358 assert(_root != page);
359 assert(page->prev);
360
361 // remove from the list
362 page->prev->next = page->next;
363 page->next->prev = page->prev;
364
365 // deallocate
366 deallocate_page(page);
367 }
368 }
369 }
370
371 char_t* allocate_string(size_t length)
372 {
373 // allocate memory for string and header block
374 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
375
376 // round size up to pointer alignment boundary
377 size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
378
379 xml_memory_page* page;
380 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
381
382 if (!header) return 0;
383
384 // setup header
385 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data;
386
387 assert(page_offset >= 0 && page_offset < (1 << 16));
388 header->page_offset = static_cast<uint16_t>(page_offset);
389
390 // full_size == 0 for large strings that occupy the whole page
391 assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));
392 header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
393
394 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
395 // header is guaranteed a pointer-sized alignment, which should be enough for char_t
396 return static_cast<char_t*>(static_cast<void*>(header + 1));
397 }
398
399 void deallocate_string(char_t* string)
400 {
401 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
402 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
403
404 // get header
405 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
406
407 // deallocate
408 size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset;
409 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
410
411 // if full_size == 0 then this string occupies the whole page
412 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
413
414 deallocate_memory(header, full_size, page);
415 }
416
417 xml_memory_page* _root;
418 size_t _busy_size;
419 };
420
421 PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
422 {
423 const size_t large_allocation_threshold = xml_memory_page_size / 4;
424
425 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
426 out_page = page;
427
428 if (!page) return 0;
429
430 if (size <= large_allocation_threshold)
431 {
432 _root->busy_size = _busy_size;
433
434 // insert page at the end of linked list
435 page->prev = _root;
436 _root->next = page;
437 _root = page;
438
439 _busy_size = size;
440 }
441 else
442 {
443 // insert page before the end of linked list, so that it is deleted as soon as possible
444 // the last page is not deleted even if it's empty (see deallocate_memory)
445 assert(_root->prev);
446
447 page->prev = _root->prev;
448 page->next = _root;
449
450 _root->prev->next = page;
451 _root->prev = page;
452 }
453
454 // allocate inside page
455 page->busy_size = size;
456
457 return page->data;
458 }
459PUGI__NS_END
460
461namespace pugi
462{
463 /// A 'name=value' XML attribute structure.
464 struct xml_attribute_struct
465 {
466 /// Default ctor
467 xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
468 {
469 }
470
471 uintptr_t header;
472
473 char_t* name; ///< Pointer to attribute name.
474 char_t* value; ///< Pointer to attribute value.
475
476 xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list)
477 xml_attribute_struct* next_attribute; ///< Next attribute
478 };
479
480 /// An XML document tree node.
481 struct xml_node_struct
482 {
483 /// Default ctor
484 /// \param type - node type
485 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
486 {
487 }
488
489 uintptr_t header;
490
491 xml_node_struct* parent; ///< Pointer to parent
492
493 char_t* name; ///< Pointer to element name.
494 char_t* value; ///< Pointer to any associated string data.
495
496 xml_node_struct* first_child; ///< First child
497
498 xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list)
499 xml_node_struct* next_sibling; ///< Right brother
500
501 xml_attribute_struct* first_attribute; ///< First attribute
502 };
503}
504
505PUGI__NS_BEGIN
506 struct xml_document_struct: public xml_node_struct, public xml_allocator
507 {
508 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0)
509 {
510 }
511
512 const char_t* buffer;
513 };
514
515 inline xml_allocator& get_allocator(const xml_node_struct* node)
516 {
517 assert(node);
518
519 return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;
520 }
521PUGI__NS_END
522
523// Low-level DOM operations
524PUGI__NS_BEGIN
525 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
526 {
527 xml_memory_page* page;
528 void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
529
530 return new (memory) xml_attribute_struct(page);
531 }
532
533 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
534 {
535 xml_memory_page* page;
536 void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
537
538 return new (memory) xml_node_struct(page, type);
539 }
540
541 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
542 {
543 uintptr_t header = a->header;
544
545 if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
546 if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
547
548 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
549 }
550
551 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
552 {
553 uintptr_t header = n->header;
554
555 if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
556 if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
557
558 for (xml_attribute_struct* attr = n->first_attribute; attr; )
559 {
560 xml_attribute_struct* next = attr->next_attribute;
561
562 destroy_attribute(attr, alloc);
563
564 attr = next;
565 }
566
567 for (xml_node_struct* child = n->first_child; child; )
568 {
569 xml_node_struct* next = child->next_sibling;
570
571 destroy_node(child, alloc);
572
573 child = next;
574 }
575
576 alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
577 }
578
579 PUGI__FN_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
580 {
581 xml_node_struct* child = allocate_node(alloc, type);
582 if (!child) return 0;
583
584 child->parent = node;
585
586 xml_node_struct* first_child = node->first_child;
587
588 if (first_child)
589 {
590 xml_node_struct* last_child = first_child->prev_sibling_c;
591
592 last_child->next_sibling = child;
593 child->prev_sibling_c = last_child;
594 first_child->prev_sibling_c = child;
595 }
596 else
597 {
598 node->first_child = child;
599 child->prev_sibling_c = child;
600 }
601
602 return child;
603 }
604
605 PUGI__FN_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc)
606 {
607 xml_attribute_struct* a = allocate_attribute(alloc);
608 if (!a) return 0;
609
610 xml_attribute_struct* first_attribute = node->first_attribute;
611
612 if (first_attribute)
613 {
614 xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c;
615
616 last_attribute->next_attribute = a;
617 a->prev_attribute_c = last_attribute;
618 first_attribute->prev_attribute_c = a;
619 }
620 else
621 {
622 node->first_attribute = a;
623 a->prev_attribute_c = a;
624 }
625
626 return a;
627 }
628PUGI__NS_END
629
630// Helper classes for code generation
631PUGI__NS_BEGIN
632 struct opt_false
633 {
634 enum { value = 0 };
635 };
636
637 struct opt_true
638 {
639 enum { value = 1 };
640 };
641PUGI__NS_END
642
643// Unicode utilities
644PUGI__NS_BEGIN
645 inline uint16_t endian_swap(uint16_t value)
646 {
647 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
648 }
649
650 inline uint32_t endian_swap(uint32_t value)
651 {
652 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
653 }
654
655 struct utf8_counter
656 {
657 typedef size_t value_type;
658
659 static value_type low(value_type result, uint32_t ch)
660 {
661 // U+0000..U+007F
662 if (ch < 0x80) return result + 1;
663 // U+0080..U+07FF
664 else if (ch < 0x800) return result + 2;
665 // U+0800..U+FFFF
666 else return result + 3;
667 }
668
669 static value_type high(value_type result, uint32_t)
670 {
671 // U+10000..U+10FFFF
672 return result + 4;
673 }
674 };
675
676 struct utf8_writer
677 {
678 typedef uint8_t* value_type;
679
680 static value_type low(value_type result, uint32_t ch)
681 {
682 // U+0000..U+007F
683 if (ch < 0x80)
684 {
685 *result = static_cast<uint8_t>(ch);
686 return result + 1;
687 }
688 // U+0080..U+07FF
689 else if (ch < 0x800)
690 {
691 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
692 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
693 return result + 2;
694 }
695 // U+0800..U+FFFF
696 else
697 {
698 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
699 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
700 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
701 return result + 3;
702 }
703 }
704
705 static value_type high(value_type result, uint32_t ch)
706 {
707 // U+10000..U+10FFFF
708 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
709 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
710 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
711 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
712 return result + 4;
713 }
714
715 static value_type any(value_type result, uint32_t ch)
716 {
717 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
718 }
719 };
720
721 struct utf16_counter
722 {
723 typedef size_t value_type;
724
725 static value_type low(value_type result, uint32_t)
726 {
727 return result + 1;
728 }
729
730 static value_type high(value_type result, uint32_t)
731 {
732 return result + 2;
733 }
734 };
735
736 struct utf16_writer
737 {
738 typedef uint16_t* value_type;
739
740 static value_type low(value_type result, uint32_t ch)
741 {
742 *result = static_cast<uint16_t>(ch);
743
744 return result + 1;
745 }
746
747 static value_type high(value_type result, uint32_t ch)
748 {
749 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
750 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
751
752 result[0] = static_cast<uint16_t>(0xD800 + msh);
753 result[1] = static_cast<uint16_t>(0xDC00 + lsh);
754
755 return result + 2;
756 }
757
758 static value_type any(value_type result, uint32_t ch)
759 {
760 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
761 }
762 };
763
764 struct utf32_counter
765 {
766 typedef size_t value_type;
767
768 static value_type low(value_type result, uint32_t)
769 {
770 return result + 1;
771 }
772
773 static value_type high(value_type result, uint32_t)
774 {
775 return result + 1;
776 }
777 };
778
779 struct utf32_writer
780 {
781 typedef uint32_t* value_type;
782
783 static value_type low(value_type result, uint32_t ch)
784 {
785 *result = ch;
786
787 return result + 1;
788 }
789
790 static value_type high(value_type result, uint32_t ch)
791 {
792 *result = ch;
793
794 return result + 1;
795 }
796
797 static value_type any(value_type result, uint32_t ch)
798 {
799 *result = ch;
800
801 return result + 1;
802 }
803 };
804
805 struct latin1_writer
806 {
807 typedef uint8_t* value_type;
808
809 static value_type low(value_type result, uint32_t ch)
810 {
811 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
812
813 return result + 1;
814 }
815
816 static value_type high(value_type result, uint32_t ch)
817 {
818 (void)ch;
819
820 *result = '?';
821
822 return result + 1;
823 }
824 };
825
826 template <size_t size> struct wchar_selector;
827
828 template <> struct wchar_selector<2>
829 {
830 typedef uint16_t type;
831 typedef utf16_counter counter;
832 typedef utf16_writer writer;
833 };
834
835 template <> struct wchar_selector<4>
836 {
837 typedef uint32_t type;
838 typedef utf32_counter counter;
839 typedef utf32_writer writer;
840 };
841
842 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
843 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
844
845 template <typename Traits, typename opt_swap = opt_false> struct utf_decoder
846 {
847 static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)
848 {
849 const uint8_t utf8_byte_mask = 0x3f;
850
851 while (size)
852 {
853 uint8_t lead = *data;
854
855 // 0xxxxxxx -> U+0000..U+007F
856 if (lead < 0x80)
857 {
858 result = Traits::low(result, lead);
859 data += 1;
860 size -= 1;
861
862 // process aligned single-byte (ascii) blocks
863 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
864 {
865 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
866 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
867 {
868 result = Traits::low(result, data[0]);
869 result = Traits::low(result, data[1]);
870 result = Traits::low(result, data[2]);
871 result = Traits::low(result, data[3]);
872 data += 4;
873 size -= 4;
874 }
875 }
876 }
877 // 110xxxxx -> U+0080..U+07FF
878 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
879 {
880 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
881 data += 2;
882 size -= 2;
883 }
884 // 1110xxxx -> U+0800-U+FFFF
885 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
886 {
887 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
888 data += 3;
889 size -= 3;
890 }
891 // 11110xxx -> U+10000..U+10FFFF
892 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
893 {
894 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
895 data += 4;
896 size -= 4;
897 }
898 // 10xxxxxx or 11111xxx -> invalid
899 else
900 {
901 data += 1;
902 size -= 1;
903 }
904 }
905
906 return result;
907 }
908
909 static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)
910 {
911 const uint16_t* end = data + size;
912
913 while (data < end)
914 {
915 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
916
917 // U+0000..U+D7FF
918 if (lead < 0xD800)
919 {
920 result = Traits::low(result, lead);
921 data += 1;
922 }
923 // U+E000..U+FFFF
924 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
925 {
926 result = Traits::low(result, lead);
927 data += 1;
928 }
929 // surrogate pair lead
930 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end)
931 {
932 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
933
934 if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
935 {
936 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
937 data += 2;
938 }
939 else
940 {
941 data += 1;
942 }
943 }
944 else
945 {
946 data += 1;
947 }
948 }
949
950 return result;
951 }
952
953 static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)
954 {
955 const uint32_t* end = data + size;
956
957 while (data < end)
958 {
959 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
960
961 // U+0000..U+FFFF
962 if (lead < 0x10000)
963 {
964 result = Traits::low(result, lead);
965 data += 1;
966 }
967 // U+10000..U+10FFFF
968 else
969 {
970 result = Traits::high(result, lead);
971 data += 1;
972 }
973 }
974
975 return result;
976 }
977
978 static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result)
979 {
980 for (size_t i = 0; i < size; ++i)
981 {
982 result = Traits::low(result, data[i]);
983 }
984
985 return result;
986 }
987
988 static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result)
989 {
990 return decode_utf16_block(data, size, result);
991 }
992
993 static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result)
994 {
995 return decode_utf32_block(data, size, result);
996 }
997
998 static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result)
999 {
1000 return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::type*>(data), size, result);
1001 }
1002 };
1003
1004 template <typename T> PUGI__FN void convert_utf_endian_swap(T* result, const T* data, size_t length)
1005 {
1006 for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);
1007 }
1008
1009#ifdef PUGIXML_WCHAR_MODE
1010 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1011 {
1012 for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1013 }
1014#endif
1015PUGI__NS_END
1016
1017PUGI__NS_BEGIN
1018 enum chartype_t
1019 {
1020 ct_parse_pcdata = 1, // \0, &, \r, <
1021 ct_parse_attr = 2, // \0, &, \r, ', "
1022 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
1023 ct_space = 8, // \r, \n, space, tab
1024 ct_parse_cdata = 16, // \0, ], >, \r
1025 ct_parse_comment = 32, // \0, -, >, \r
1026 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1027 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
1028 };
1029
1030 static const unsigned char chartype_table[256] =
1031 {
1032 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
1033 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
1034 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
1035 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
1036 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1037 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
1038 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1039 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
1040
1041 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
1042 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1043 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1044 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1045 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1046 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1047 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1048 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
1049 };
1050
1051 enum chartypex_t
1052 {
1053 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1054 ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
1055 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
1056 ctx_digit = 8, // 0-9
1057 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1058 };
1059
1060 static const unsigned char chartypex_table[256] =
1061 {
1062 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
1063 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
1064 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
1065 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
1066
1067 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
1068 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
1069 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
1070 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
1071
1072 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
1073 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1074 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1075 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1076 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1077 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1078 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1079 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
1080 };
1081
1082#ifdef PUGIXML_WCHAR_MODE
1083 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1084#else
1085 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1086#endif
1087
1088 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1089 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1090
1091 PUGI__FN bool is_little_endian()
1092 {
1093 unsigned int ui = 1;
1094
1095 return *reinterpret_cast<unsigned char*>(&ui) == 1;
1096 }
1097
1098 PUGI__FN xml_encoding get_wchar_encoding()
1099 {
1100 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1101
1102 if (sizeof(wchar_t) == 2)
1103 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1104 else
1105 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1106 }
1107
1108 PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
1109 {
1110 // look for BOM in first few bytes
1111 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1112 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1113 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1114 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1115 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1116
1117 // look for <, <? or <?xm in various encodings
1118 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1119 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1120 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1121 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1122 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
1123
1124 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1125 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1126 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1127
1128 // no known BOM detected, assume utf8
1129 return encoding_utf8;
1130 }
1131
1132 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
1133 {
1134 // replace wchar encoding with utf implementation
1135 if (encoding == encoding_wchar) return get_wchar_encoding();
1136
1137 // replace utf16 encoding with utf16 with specific endianness
1138 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1139
1140 // replace utf32 encoding with utf32 with specific endianness
1141 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1142
1143 // only do autodetection if no explicit encoding is requested
1144 if (encoding != encoding_auto) return encoding;
1145
1146 // skip encoding autodetection if input buffer is too small
1147 if (size < 4) return encoding_utf8;
1148
1149 // try to guess encoding (based on XML specification, Appendix F.1)
1150 const uint8_t* data = static_cast<const uint8_t*>(contents);
1151
1152 PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1153
1154 return guess_buffer_encoding(d0, d1, d2, d3);
1155 }
1156
1157 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1158 {
1159 if (is_mutable)
1160 {
1161 out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
1162 }
1163 else
1164 {
1165 void* buffer = xml_memory::allocate(size > 0 ? size : 1);
1166 if (!buffer) return false;
1167
1168 memcpy(buffer, contents, size);
1169
1170 out_buffer = static_cast<char_t*>(buffer);
1171 }
1172
1173 out_length = size / sizeof(char_t);
1174
1175 return true;
1176 }
1177
1178#ifdef PUGIXML_WCHAR_MODE
1179 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
1180 {
1181 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
1182 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
1183 }
1184
1185 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1186 {
1187 const char_t* data = static_cast<const char_t*>(contents);
1188
1189 if (is_mutable)
1190 {
1191 out_buffer = const_cast<char_t*>(data);
1192 }
1193 else
1194 {
1195 out_buffer = static_cast<char_t*>(xml_memory::allocate(size > 0 ? size : 1));
1196 if (!out_buffer) return false;
1197 }
1198
1199 out_length = size / sizeof(char_t);
1200
1201 convert_wchar_endian_swap(out_buffer, data, out_length);
1202
1203 return true;
1204 }
1205
1206 PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
1207 {
1208 const uint8_t* data = static_cast<const uint8_t*>(contents);
1209
1210 // first pass: get length in wchar_t units
1211 out_length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
1212
1213 // allocate buffer of suitable length
1214 out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1215 if (!out_buffer) return false;
1216
1217 // second pass: convert utf8 input to wchar_t
1218 wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
1219 wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, out_begin);
1220
1221 assert(out_end == out_begin + out_length);
1222 (void)!out_end;
1223
1224 return true;
1225 }
1226
1227 template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1228 {
1229 const uint16_t* data = static_cast<const uint16_t*>(contents);
1230 size_t length = size / sizeof(uint16_t);
1231
1232 // first pass: get length in wchar_t units
1233 out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, length, 0);
1234
1235 // allocate buffer of suitable length
1236 out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1237 if (!out_buffer) return false;
1238
1239 // second pass: convert utf16 input to wchar_t
1240 wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
1241 wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
1242
1243 assert(out_end == out_begin + out_length);
1244 (void)!out_end;
1245
1246 return true;
1247 }
1248
1249 template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1250 {
1251 const uint32_t* data = static_cast<const uint32_t*>(contents);
1252 size_t length = size / sizeof(uint32_t);
1253
1254 // first pass: get length in wchar_t units
1255 out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, length, 0);
1256
1257 // allocate buffer of suitable length
1258 out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1259 if (!out_buffer) return false;
1260
1261 // second pass: convert utf32 input to wchar_t
1262 wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
1263 wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
1264
1265 assert(out_end == out_begin + out_length);
1266 (void)!out_end;
1267
1268 return true;
1269 }
1270
1271 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
1272 {
1273 const uint8_t* data = static_cast<const uint8_t*>(contents);
1274
1275 // get length in wchar_t units
1276 out_length = size;
1277
1278 // allocate buffer of suitable length
1279 out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1280 if (!out_buffer) return false;
1281
1282 // convert latin1 input to wchar_t
1283 wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
1284 wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_latin1_block(data, size, out_begin);
1285
1286 assert(out_end == out_begin + out_length);
1287 (void)!out_end;
1288
1289 return true;
1290 }
1291
1292 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
1293 {
1294 // get native encoding
1295 xml_encoding wchar_encoding = get_wchar_encoding();
1296
1297 // fast path: no conversion required
1298 if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1299
1300 // only endian-swapping is required
1301 if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
1302
1303 // source encoding is utf8
1304 if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
1305
1306 // source encoding is utf16
1307 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
1308 {
1309 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1310
1311 return (native_encoding == encoding) ?
1312 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
1313 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
1314 }
1315
1316 // source encoding is utf32
1317 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
1318 {
1319 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1320
1321 return (native_encoding == encoding) ?
1322 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
1323 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
1324 }
1325
1326 // source encoding is latin1
1327 if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size);
1328
1329 assert(!"Invalid encoding");
1330 return false;
1331 }
1332#else
1333 template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1334 {
1335 const uint16_t* data = static_cast<const uint16_t*>(contents);
1336 size_t length = size / sizeof(uint16_t);
1337
1338 // first pass: get length in utf8 units
1339 out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, length, 0);
1340
1341 // allocate buffer of suitable length
1342 out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1343 if (!out_buffer) return false;
1344
1345 // second pass: convert utf16 input to utf8
1346 uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
1347 uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
1348
1349 assert(out_end == out_begin + out_length);
1350 (void)!out_end;
1351
1352 return true;
1353 }
1354
1355 template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1356 {
1357 const uint32_t* data = static_cast<const uint32_t*>(contents);
1358 size_t length = size / sizeof(uint32_t);
1359
1360 // first pass: get length in utf8 units
1361 out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, length, 0);
1362
1363 // allocate buffer of suitable length
1364 out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1365 if (!out_buffer) return false;
1366
1367 // second pass: convert utf32 input to utf8
1368 uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
1369 uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
1370
1371 assert(out_end == out_begin + out_length);
1372 (void)!out_end;
1373
1374 return true;
1375 }
1376
1377 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
1378 {
1379 for (size_t i = 0; i < size; ++i)
1380 if (data[i] > 127)
1381 return i;
1382
1383 return size;
1384 }
1385
1386 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1387 {
1388 const uint8_t* data = static_cast<const uint8_t*>(contents);
1389
1390 // get size of prefix that does not need utf8 conversion
1391 size_t prefix_length = get_latin1_7bit_prefix_length(data, size);
1392 assert(prefix_length <= size);
1393
1394 const uint8_t* postfix = data + prefix_length;
1395 size_t postfix_length = size - prefix_length;
1396
1397 // if no conversion is needed, just return the original buffer
1398 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1399
1400 // first pass: get length in utf8 units
1401 out_length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);
1402
1403 // allocate buffer of suitable length
1404 out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1405 if (!out_buffer) return false;
1406
1407 // second pass: convert latin1 input to utf8
1408 memcpy(out_buffer, data, prefix_length);
1409
1410 uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
1411 uint8_t* out_end = utf_decoder<utf8_writer>::decode_latin1_block(postfix, postfix_length, out_begin + prefix_length);
1412
1413 assert(out_end == out_begin + out_length);
1414 (void)!out_end;
1415
1416 return true;
1417 }
1418
1419 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
1420 {
1421 // fast path: no conversion required
1422 if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1423
1424 // source encoding is utf16
1425 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
1426 {
1427 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1428
1429 return (native_encoding == encoding) ?
1430 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
1431 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
1432 }
1433
1434 // source encoding is utf32
1435 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
1436 {
1437 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1438
1439 return (native_encoding == encoding) ?
1440 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
1441 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
1442 }
1443
1444 // source encoding is latin1
1445 if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
1446
1447 assert(!"Invalid encoding");
1448 return false;
1449 }
1450#endif
1451
1452 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
1453 {
1454 // get length in utf8 characters
1455 return utf_decoder<utf8_counter>::decode_wchar_block(str, length, 0);
1456 }
1457
1458 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
1459 {
1460 // convert to utf8
1461 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
1462 uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(str, length, begin);
1463
1464 assert(begin + size == end);
1465 (void)!end;
1466
1467 // zero-terminate
1468 buffer[size] = 0;
1469 }
1470
1471#ifndef PUGIXML_NO_STL
1472 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
1473 {
1474 // first pass: get length in utf8 characters
1475 size_t size = as_utf8_begin(str, length);
1476
1477 // allocate resulting string
1478 std::string result;
1479 result.resize(size);
1480
1481 // second pass: convert to utf8
1482 if (size > 0) as_utf8_end(&result[0], size, str, length);
1483
1484 return result;
1485 }
1486
1487 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
1488 {
1489 const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
1490
1491 // first pass: get length in wchar_t units
1492 size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
1493
1494 // allocate resulting string
1495 std::basic_string<wchar_t> result;
1496 result.resize(length);
1497
1498 // second pass: convert to wchar_t
1499 if (length > 0)
1500 {
1501 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
1502 wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);
1503
1504 assert(begin + length == end);
1505 (void)!end;
1506 }
1507
1508 return result;
1509 }
1510#endif
1511
1512 inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target)
1513 {
1514 assert(target);
1515 size_t target_length = strlength(target);
1516
1517 // always reuse document buffer memory if possible
1518 if (!allocated) return target_length >= length;
1519
1520 // reuse heap memory if waste is not too great
1521 const size_t reuse_threshold = 32;
1522
1523 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
1524 }
1525
1526 PUGI__FN bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)
1527 {
1528 size_t source_length = strlength(source);
1529
1530 if (source_length == 0)
1531 {
1532 // empty string and null pointer are equivalent, so just deallocate old memory
1533 xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
1534
1535 if (header & header_mask) alloc->deallocate_string(dest);
1536
1537 // mark the string as not allocated
1538 dest = 0;
1539 header &= ~header_mask;
1540
1541 return true;
1542 }
1543 else if (dest && strcpy_insitu_allow(source_length, header & header_mask, dest))
1544 {
1545 // we can reuse old buffer, so just copy the new data (including zero terminator)
1546 memcpy(dest, source, (source_length + 1) * sizeof(char_t));
1547
1548 return true;
1549 }
1550 else
1551 {
1552 xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
1553
1554 // allocate new buffer
1555 char_t* buf = alloc->allocate_string(source_length + 1);
1556 if (!buf) return false;
1557
1558 // copy the string (including zero terminator)
1559 memcpy(buf, source, (source_length + 1) * sizeof(char_t));
1560
1561 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
1562 if (header & header_mask) alloc->deallocate_string(dest);
1563
1564 // the string is now allocated, so set the flag
1565 dest = buf;
1566 header |= header_mask;
1567
1568 return true;
1569 }
1570 }
1571
1572 struct gap
1573 {
1574 char_t* end;
1575 size_t size;
1576
1577 gap(): end(0), size(0)
1578 {
1579 }
1580
1581 // Push new gap, move s count bytes further (skipping the gap).
1582 // Collapse previous gap.
1583 void push(char_t*& s, size_t count)
1584 {
1585 if (end) // there was a gap already; collapse it
1586 {
1587 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
1588 assert(s >= end);
1589 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
1590 }
1591
1592 s += count; // end of current gap
1593
1594 // "merge" two gaps
1595 end = s;
1596 size += count;
1597 }
1598
1599 // Collapse all gaps, return past-the-end pointer
1600 char_t* flush(char_t* s)
1601 {
1602 if (end)
1603 {
1604 // Move [old_gap_end, current_pos) to [old_gap_start, ...)
1605 assert(s >= end);
1606 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
1607
1608 return s - size;
1609 }
1610 else return s;
1611 }
1612 };
1613
1614 PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
1615 {
1616 char_t* stre = s + 1;
1617
1618 switch (*stre)
1619 {
1620 case '#': // &#...
1621 {
1622 unsigned int ucsc = 0;
1623
1624 if (stre[1] == 'x') // &#x... (hex code)
1625 {
1626 stre += 2;
1627
1628 char_t ch = *stre;
1629
1630 if (ch == ';') return stre;
1631
1632 for (;;)
1633 {
1634 if (static_cast<unsigned int>(ch - '0') <= 9)
1635 ucsc = 16 * ucsc + (ch - '0');
1636 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
1637 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
1638 else if (ch == ';')
1639 break;
1640 else // cancel
1641 return stre;
1642
1643 ch = *++stre;
1644 }
1645
1646 ++stre;
1647 }
1648 else // &#... (dec code)
1649 {
1650 char_t ch = *++stre;
1651
1652 if (ch == ';') return stre;
1653
1654 for (;;)
1655 {
1656 if (static_cast<unsigned int>(ch - '0') <= 9)
1657 ucsc = 10 * ucsc + (ch - '0');
1658 else if (ch == ';')
1659 break;
1660 else // cancel
1661 return stre;
1662
1663 ch = *++stre;
1664 }
1665
1666 ++stre;
1667 }
1668
1669 #ifdef PUGIXML_WCHAR_MODE
1670 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
1671 #else
1672 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
1673 #endif
1674
1675 g.push(s, stre - s);
1676 return stre;
1677 }
1678
1679 case 'a': // &a
1680 {
1681 ++stre;
1682
1683 if (*stre == 'm') // &am
1684 {
1685 if (*++stre == 'p' && *++stre == ';') // &amp;
1686 {
1687 *s++ = '&';
1688 ++stre;
1689
1690 g.push(s, stre - s);
1691 return stre;
1692 }
1693 }
1694 else if (*stre == 'p') // &ap
1695 {
1696 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
1697 {
1698 *s++ = '\'';
1699 ++stre;
1700
1701 g.push(s, stre - s);
1702 return stre;
1703 }
1704 }
1705 break;
1706 }
1707
1708 case 'g': // &g
1709 {
1710 if (*++stre == 't' && *++stre == ';') // &gt;
1711 {
1712 *s++ = '>';
1713 ++stre;
1714
1715 g.push(s, stre - s);
1716 return stre;
1717 }
1718 break;
1719 }
1720
1721 case 'l': // &l
1722 {
1723 if (*++stre == 't' && *++stre == ';') // &lt;
1724 {
1725 *s++ = '<';
1726 ++stre;
1727
1728 g.push(s, stre - s);
1729 return stre;
1730 }
1731 break;
1732 }
1733
1734 case 'q': // &q
1735 {
1736 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
1737 {
1738 *s++ = '"';
1739 ++stre;
1740
1741 g.push(s, stre - s);
1742 return stre;
1743 }
1744 break;
1745 }
1746
1747 default:
1748 break;
1749 }
1750
1751 return stre;
1752 }
1753
1754 // Utility macro for last character handling
1755 #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
1756
1757 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
1758 {
1759 gap g;
1760
1761 while (true)
1762 {
1763 while (!PUGI__IS_CHARTYPE(*s, ct_parse_comment)) ++s;
1764
1765 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1766 {
1767 *s++ = '\n'; // replace first one with 0x0a
1768
1769 if (*s == '\n') g.push(s, 1);
1770 }
1771 else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here
1772 {
1773 *g.flush(s) = 0;
1774
1775 return s + (s[2] == '>' ? 3 : 2);
1776 }
1777 else if (*s == 0)
1778 {
1779 return 0;
1780 }
1781 else ++s;
1782 }
1783 }
1784
1785 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
1786 {
1787 gap g;
1788
1789 while (true)
1790 {
1791 while (!PUGI__IS_CHARTYPE(*s, ct_parse_cdata)) ++s;
1792
1793 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1794 {
1795 *s++ = '\n'; // replace first one with 0x0a
1796
1797 if (*s == '\n') g.push(s, 1);
1798 }
1799 else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here
1800 {
1801 *g.flush(s) = 0;
1802
1803 return s + 1;
1804 }
1805 else if (*s == 0)
1806 {
1807 return 0;
1808 }
1809 else ++s;
1810 }
1811 }
1812
1813 typedef char_t* (*strconv_pcdata_t)(char_t*);
1814
1815 template <typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
1816 {
1817 static char_t* parse(char_t* s)
1818 {
1819 gap g;
1820
1821 while (true)
1822 {
1823 while (!PUGI__IS_CHARTYPE(*s, ct_parse_pcdata)) ++s;
1824
1825 if (*s == '<') // PCDATA ends here
1826 {
1827 *g.flush(s) = 0;
1828
1829 return s + 1;
1830 }
1831 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1832 {
1833 *s++ = '\n'; // replace first one with 0x0a
1834
1835 if (*s == '\n') g.push(s, 1);
1836 }
1837 else if (opt_escape::value && *s == '&')
1838 {
1839 s = strconv_escape(s, g);
1840 }
1841 else if (*s == 0)
1842 {
1843 return s;
1844 }
1845 else ++s;
1846 }
1847 }
1848 };
1849
1850 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
1851 {
1852 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20);
1853
1854 switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes)
1855 {
1856 case 0: return strconv_pcdata_impl<opt_false, opt_false>::parse;
1857 case 1: return strconv_pcdata_impl<opt_false, opt_true>::parse;
1858 case 2: return strconv_pcdata_impl<opt_true, opt_false>::parse;
1859 case 3: return strconv_pcdata_impl<opt_true, opt_true>::parse;
1860 default: return 0; // should not get here
1861 }
1862 }
1863
1864 typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
1865
1866 template <typename opt_escape> struct strconv_attribute_impl
1867 {
1868 static char_t* parse_wnorm(char_t* s, char_t end_quote)
1869 {
1870 gap g;
1871
1872 // trim leading whitespaces
1873 if (PUGI__IS_CHARTYPE(*s, ct_space))
1874 {
1875 char_t* str = s;
1876
1877 do ++str;
1878 while (PUGI__IS_CHARTYPE(*str, ct_space));
1879
1880 g.push(s, str - s);
1881 }
1882
1883 while (true)
1884 {
1885 while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s;
1886
1887 if (*s == end_quote)
1888 {
1889 char_t* str = g.flush(s);
1890
1891 do *str-- = 0;
1892 while (PUGI__IS_CHARTYPE(*str, ct_space));
1893
1894 return s + 1;
1895 }
1896 else if (PUGI__IS_CHARTYPE(*s, ct_space))
1897 {
1898 *s++ = ' ';
1899
1900 if (PUGI__IS_CHARTYPE(*s, ct_space))
1901 {
1902 char_t* str = s + 1;
1903 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
1904
1905 g.push(s, str - s);
1906 }
1907 }
1908 else if (opt_escape::value && *s == '&')
1909 {
1910 s = strconv_escape(s, g);
1911 }
1912 else if (!*s)
1913 {
1914 return 0;
1915 }
1916 else ++s;
1917 }
1918 }
1919
1920 static char_t* parse_wconv(char_t* s, char_t end_quote)
1921 {
1922 gap g;
1923
1924 while (true)
1925 {
1926 while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s;
1927
1928 if (*s == end_quote)
1929 {
1930 *g.flush(s) = 0;
1931
1932 return s + 1;
1933 }
1934 else if (PUGI__IS_CHARTYPE(*s, ct_space))
1935 {
1936 if (*s == '\r')
1937 {
1938 *s++ = ' ';
1939
1940 if (*s == '\n') g.push(s, 1);
1941 }
1942 else *s++ = ' ';
1943 }
1944 else if (opt_escape::value && *s == '&')
1945 {
1946 s = strconv_escape(s, g);
1947 }
1948 else if (!*s)
1949 {
1950 return 0;
1951 }
1952 else ++s;
1953 }
1954 }
1955
1956 static char_t* parse_eol(char_t* s, char_t end_quote)
1957 {
1958 gap g;
1959
1960 while (true)
1961 {
1962 while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;
1963
1964 if (*s == end_quote)
1965 {
1966 *g.flush(s) = 0;
1967
1968 return s + 1;
1969 }
1970 else if (*s == '\r')
1971 {
1972 *s++ = '\n';
1973
1974 if (*s == '\n') g.push(s, 1);
1975 }
1976 else if (opt_escape::value && *s == '&')
1977 {
1978 s = strconv_escape(s, g);
1979 }
1980 else if (!*s)
1981 {
1982 return 0;
1983 }
1984 else ++s;
1985 }
1986 }
1987
1988 static char_t* parse_simple(char_t* s, char_t end_quote)
1989 {
1990 gap g;
1991
1992 while (true)
1993 {
1994 while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;
1995
1996 if (*s == end_quote)
1997 {
1998 *g.flush(s) = 0;
1999
2000 return s + 1;
2001 }
2002 else if (opt_escape::value && *s == '&')
2003 {
2004 s = strconv_escape(s, g);
2005 }
2006 else if (!*s)
2007 {
2008 return 0;
2009 }
2010 else ++s;
2011 }
2012 }
2013 };
2014
2015 PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
2016 {
2017 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2018
2019 switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
2020 {
2021 case 0: return strconv_attribute_impl<opt_false>::parse_simple;
2022 case 1: return strconv_attribute_impl<opt_true>::parse_simple;
2023 case 2: return strconv_attribute_impl<opt_false>::parse_eol;
2024 case 3: return strconv_attribute_impl<opt_true>::parse_eol;
2025 case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
2026 case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
2027 case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
2028 case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
2029 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
2030 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
2031 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
2032 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
2033 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
2034 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
2035 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
2036 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
2037 default: return 0; // should not get here
2038 }
2039 }
2040
2041 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2042 {
2043 xml_parse_result result;
2044 result.status = status;
2045 result.offset = offset;
2046
2047 return result;
2048 }
2049
2050 struct xml_parser
2051 {
2052 xml_allocator alloc;
2053 char_t* error_offset;
2054 xml_parse_status error_status;
2055
2056 // Parser utilities.
2057 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2058 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
2059 #define PUGI__PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2060 #define PUGI__POPNODE() { cursor = cursor->parent; }
2061 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
2062 #define PUGI__SCANWHILE(X) { while ((X)) ++s; }
2063 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
2064 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
2065 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2066
2067 xml_parser(const xml_allocator& alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
2068 {
2069 }
2070
2071 // DOCTYPE consists of nested sections of the following possible types:
2072 // <!-- ... -->, <? ... ?>, "...", '...'
2073 // <![...]]>
2074 // <!...>
2075 // First group can not contain nested groups
2076 // Second group can contain nested groups of the same type
2077 // Third group can contain all other groups
2078 char_t* parse_doctype_primitive(char_t* s)
2079 {
2080 if (*s == '"' || *s == '\'')
2081 {
2082 // quoted string
2083 char_t ch = *s++;
2084 PUGI__SCANFOR(*s == ch);
2085 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2086
2087 s++;
2088 }
2089 else if (s[0] == '<' && s[1] == '?')
2090 {
2091 // <? ... ?>
2092 s += 2;
2093 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2094 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2095
2096 s += 2;
2097 }
2098 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2099 {
2100 s += 4;
2101 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2102 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2103
2104 s += 4;
2105 }
2106 else PUGI__THROW_ERROR(status_bad_doctype, s);
2107
2108 return s;
2109 }
2110
2111 char_t* parse_doctype_ignore(char_t* s)
2112 {
2113 assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2114 s++;
2115
2116 while (*s)
2117 {
2118 if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2119 {
2120 // nested ignore section
2121 s = parse_doctype_ignore(s);
2122 if (!s) return s;
2123 }
2124 else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2125 {
2126 // ignore section end
2127 s += 3;
2128
2129 return s;
2130 }
2131 else s++;
2132 }
2133
2134 PUGI__THROW_ERROR(status_bad_doctype, s);
2135 }
2136
2137 char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel)
2138 {
2139 assert(s[0] == '<' && s[1] == '!');
2140 s++;
2141
2142 while (*s)
2143 {
2144 if (s[0] == '<' && s[1] == '!' && s[2] != '-')
2145 {
2146 if (s[2] == '[')
2147 {
2148 // ignore
2149 s = parse_doctype_ignore(s);
2150 if (!s) return s;
2151 }
2152 else
2153 {
2154 // some control group
2155 s = parse_doctype_group(s, endch, false);
2156 if (!s) return s;
2157 }
2158 }
2159 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
2160 {
2161 // unknown tag (forbidden), or some primitive group
2162 s = parse_doctype_primitive(s);
2163 if (!s) return s;
2164 }
2165 else if (*s == '>')
2166 {
2167 s++;
2168
2169 return s;
2170 }
2171 else s++;
2172 }
2173
2174 if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
2175
2176 return s;
2177 }
2178
2179 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
2180 {
2181 // parse node contents, starting with exclamation mark
2182 ++s;
2183
2184 if (*s == '-') // '<!-...'
2185 {
2186 ++s;
2187
2188 if (*s == '-') // '<!--...'
2189 {
2190 ++s;
2191
2192 if (PUGI__OPTSET(parse_comments))
2193 {
2194 PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
2195 cursor->value = s; // Save the offset.
2196 }
2197
2198 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
2199 {
2200 s = strconv_comment(s, endch);
2201
2202 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
2203 }
2204 else
2205 {
2206 // Scan for terminating '-->'.
2207 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>'));
2208 PUGI__CHECK_ERROR(status_bad_comment, s);
2209
2210 if (PUGI__OPTSET(parse_comments))
2211 *s = 0; // Zero-terminate this segment at the first terminating '-'.
2212
2213 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
2214 }
2215 }
2216 else PUGI__THROW_ERROR(status_bad_comment, s);
2217 }
2218 else if (*s == '[')
2219 {
2220 // '<![CDATA[...'
2221 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
2222 {
2223 ++s;
2224
2225 if (PUGI__OPTSET(parse_cdata))
2226 {
2227 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
2228 cursor->value = s; // Save the offset.
2229
2230 if (PUGI__OPTSET(parse_eol))
2231 {
2232 s = strconv_cdata(s, endch);
2233
2234 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
2235 }
2236 else
2237 {
2238 // Scan for terminating ']]>'.
2239 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
2240 PUGI__CHECK_ERROR(status_bad_cdata, s);
2241
2242 *s++ = 0; // Zero-terminate this segment.
2243 }
2244 }
2245 else // Flagged for discard, but we still have to scan for the terminator.
2246 {
2247 // Scan for terminating ']]>'.
2248 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
2249 PUGI__CHECK_ERROR(status_bad_cdata, s);
2250
2251 ++s;
2252 }
2253
2254 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
2255 }
2256 else PUGI__THROW_ERROR(status_bad_cdata, s);
2257 }
2258 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E'))
2259 {
2260 s -= 2;
2261
2262 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
2263
2264 char_t* mark = s + 9;
2265
2266 s = parse_doctype_group(s, endch, true);
2267 if (!s) return s;
2268
2269 if (PUGI__OPTSET(parse_doctype))
2270 {
2271 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
2272
2273 PUGI__PUSHNODE(node_doctype);
2274
2275 cursor->value = mark;
2276
2277 assert((s[0] == 0 && endch == '>') || s[-1] == '>');
2278 s[*s == 0 ? 0 : -1] = 0;
2279
2280 PUGI__POPNODE();
2281 }
2282 }
2283 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
2284 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
2285 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
2286
2287 return s;
2288 }
2289
2290 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
2291 {
2292 // load into registers
2293 xml_node_struct* cursor = ref_cursor;
2294 char_t ch = 0;
2295
2296 // parse node contents, starting with question mark
2297 ++s;
2298
2299 // read PI target
2300 char_t* target = s;
2301
2302 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
2303
2304 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
2305 PUGI__CHECK_ERROR(status_bad_pi, s);
2306
2307 // determine node type; stricmp / strcasecmp is not portable
2308 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
2309
2310 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
2311 {
2312 if (declaration)
2313 {
2314 // disallow non top-level declarations
2315 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
2316
2317 PUGI__PUSHNODE(node_declaration);
2318 }
2319 else
2320 {
2321 PUGI__PUSHNODE(node_pi);
2322 }
2323
2324 cursor->name = target;
2325
2326 PUGI__ENDSEG();
2327
2328 // parse value/attributes
2329 if (ch == '?')
2330 {
2331 // empty node
2332 if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
2333 s += (*s == '>');
2334
2335 PUGI__POPNODE();
2336 }
2337 else if (PUGI__IS_CHARTYPE(ch, ct_space))
2338 {
2339 PUGI__SKIPWS();
2340
2341 // scan for tag end
2342 char_t* value = s;
2343
2344 PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
2345 PUGI__CHECK_ERROR(status_bad_pi, s);
2346
2347 if (declaration)
2348 {
2349 // replace ending ? with / so that 'element' terminates properly
2350 *s = '/';
2351
2352 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
2353 s = value;
2354 }
2355 else
2356 {
2357 // store value and step over >
2358 cursor->value = value;
2359 PUGI__POPNODE();
2360
2361 PUGI__ENDSEG();
2362
2363 s += (*s == '>');
2364 }
2365 }
2366 else PUGI__THROW_ERROR(status_bad_pi, s);
2367 }
2368 else
2369 {
2370 // scan for tag end
2371 PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
2372 PUGI__CHECK_ERROR(status_bad_pi, s);
2373
2374 s += (s[1] == '>' ? 2 : 1);
2375 }
2376
2377 // store from registers
2378 ref_cursor = cursor;
2379
2380 return s;
2381 }
2382
2383 char_t* parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch)
2384 {
2385 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
2386 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
2387
2388 char_t ch = 0;
2389 xml_node_struct* cursor = xmldoc;
2390 char_t* mark = s;
2391
2392 while (*s != 0)
2393 {
2394 if (*s == '<')
2395 {
2396 ++s;
2397
2398 LOC_TAG:
2399 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
2400 {
2401 PUGI__PUSHNODE(node_element); // Append a new node to the tree.
2402
2403 cursor->name = s;
2404
2405 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
2406 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
2407
2408 if (ch == '>')
2409 {
2410 // end of tag
2411 }
2412 else if (PUGI__IS_CHARTYPE(ch, ct_space))
2413 {
2414 LOC_ATTRIBUTES:
2415 while (true)
2416 {
2417 PUGI__SKIPWS(); // Eat any whitespace.
2418
2419 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
2420 {
2421 xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute.
2422 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
2423
2424 a->name = s; // Save the offset.
2425
2426 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
2427 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2428
2429 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
2430 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2431
2432 if (PUGI__IS_CHARTYPE(ch, ct_space))
2433 {
2434 PUGI__SKIPWS(); // Eat any whitespace.
2435 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2436
2437 ch = *s;
2438 ++s;
2439 }
2440
2441 if (ch == '=') // '<... #=...'
2442 {
2443 PUGI__SKIPWS(); // Eat any whitespace.
2444
2445 if (*s == '"' || *s == '\'') // '<... #="...'
2446 {
2447 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
2448 ++s; // Step over the quote.
2449 a->value = s; // Save the offset.
2450
2451 s = strconv_attribute(s, ch);
2452
2453 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
2454
2455 // After this line the loop continues from the start;
2456 // Whitespaces, / and > are ok, symbols and EOF are wrong,
2457 // everything else will be detected
2458 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
2459 }
2460 else PUGI__THROW_ERROR(status_bad_attribute, s);
2461 }
2462 else PUGI__THROW_ERROR(status_bad_attribute, s);
2463 }
2464 else if (*s == '/')
2465 {
2466 ++s;
2467
2468 if (*s == '>')
2469 {
2470 PUGI__POPNODE();
2471 s++;
2472 break;
2473 }
2474 else if (*s == 0 && endch == '>')
2475 {
2476 PUGI__POPNODE();
2477 break;
2478 }
2479 else PUGI__THROW_ERROR(status_bad_start_element, s);
2480 }
2481 else if (*s == '>')
2482 {
2483 ++s;
2484
2485 break;
2486 }
2487 else if (*s == 0 && endch == '>')
2488 {
2489 break;
2490 }
2491 else PUGI__THROW_ERROR(status_bad_start_element, s);
2492 }
2493
2494 // !!!
2495 }
2496 else if (ch == '/') // '<#.../'
2497 {
2498 if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
2499
2500 PUGI__POPNODE(); // Pop.
2501
2502 s += (*s == '>');
2503 }
2504 else if (ch == 0)
2505 {
2506 // we stepped over null terminator, backtrack & handle closing tag
2507 --s;
2508
2509 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
2510 }
2511 else PUGI__THROW_ERROR(status_bad_start_element, s);
2512 }
2513 else if (*s == '/')
2514 {
2515 ++s;
2516
2517 char_t* name = cursor->name;
2518 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
2519
2520 while (PUGI__IS_CHARTYPE(*s, ct_symbol))
2521 {
2522 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
2523 }
2524
2525 if (*name)
2526 {
2527 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
2528 else PUGI__THROW_ERROR(status_end_element_mismatch, s);
2529 }
2530
2531 PUGI__POPNODE(); // Pop.
2532
2533 PUGI__SKIPWS();
2534
2535 if (*s == 0)
2536 {
2537 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
2538 }
2539 else
2540 {
2541 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
2542 ++s;
2543 }
2544 }
2545 else if (*s == '?') // '<?...'
2546 {
2547 s = parse_question(s, cursor, optmsk, endch);
2548 if (!s) return s;
2549
2550 assert(cursor);
2551 if ((cursor->header & xml_memory_page_type_mask) + 1 == node_declaration) goto LOC_ATTRIBUTES;
2552 }
2553 else if (*s == '!') // '<!...'
2554 {
2555 s = parse_exclamation(s, cursor, optmsk, endch);
2556 if (!s) return s;
2557 }
2558 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
2559 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
2560 }
2561 else
2562 {
2563 mark = s; // Save this offset while searching for a terminator.
2564
2565 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
2566
2567 if (*s == '<')
2568 {
2569 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
2570 assert(mark != s);
2571
2572 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single))
2573 {
2574 continue;
2575 }
2576 else if (PUGI__OPTSET(parse_ws_pcdata_single))
2577 {
2578 if (s[1] != '/' || cursor->first_child) continue;
2579 }
2580 }
2581
2582 s = mark;
2583
2584 if (cursor->parent)
2585 {
2586 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
2587 cursor->value = s; // Save the offset.
2588
2589 s = strconv_pcdata(s);
2590
2591 PUGI__POPNODE(); // Pop since this is a standalone.
2592
2593 if (!*s) break;
2594 }
2595 else
2596 {
2597 PUGI__SCANFOR(*s == '<'); // '...<'
2598 if (!*s) break;
2599
2600 ++s;
2601 }
2602
2603 // We're after '<'
2604 goto LOC_TAG;
2605 }
2606 }
2607
2608 // check that last tag is closed
2609 if (cursor != xmldoc) PUGI__THROW_ERROR(status_end_element_mismatch, s);
2610
2611 return s;
2612 }
2613
2614 static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* root, unsigned int optmsk)
2615 {
2616 xml_document_struct* xmldoc = static_cast<xml_document_struct*>(root);
2617
2618 // store buffer for offset_debug
2619 xmldoc->buffer = buffer;
2620
2621 // early-out for empty documents
2622 if (length == 0) return make_parse_result(status_ok);
2623
2624 // create parser on stack
2625 xml_parser parser(*xmldoc);
2626
2627 // save last character and make buffer zero-terminated (speeds up parsing)
2628 char_t endch = buffer[length - 1];
2629 buffer[length - 1] = 0;
2630
2631 // perform actual parsing
2632 parser.parse(buffer, xmldoc, optmsk, endch);
2633
2634 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
2635 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
2636
2637 // update allocator state
2638 *static_cast<xml_allocator*>(xmldoc) = parser.alloc;
2639
2640 // since we removed last character, we have to handle the only possible false positive
2641 if (result && endch == '<')
2642 {
2643 // there's no possible well-formed document with < at the end
2644 return make_parse_result(status_unrecognized_tag, length);
2645 }
2646
2647 return result;
2648 }
2649 };
2650
2651 // Output facilities
2652 PUGI__FN xml_encoding get_write_native_encoding()
2653 {
2654 #ifdef PUGIXML_WCHAR_MODE
2655 return get_wchar_encoding();
2656 #else
2657 return encoding_utf8;
2658 #endif
2659 }
2660
2661 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
2662 {
2663 // replace wchar encoding with utf implementation
2664 if (encoding == encoding_wchar) return get_wchar_encoding();
2665
2666 // replace utf16 encoding with utf16 with specific endianness
2667 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2668
2669 // replace utf32 encoding with utf32 with specific endianness
2670 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2671
2672 // only do autodetection if no explicit encoding is requested
2673 if (encoding != encoding_auto) return encoding;
2674
2675 // assume utf8 encoding
2676 return encoding_utf8;
2677 }
2678
2679#ifdef PUGIXML_WCHAR_MODE
2680 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
2681 {
2682 assert(length > 0);
2683
2684 // discard last character if it's the lead of a surrogate pair
2685 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
2686 }
2687
2688 PUGI__FN size_t convert_buffer(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
2689 {
2690 // only endian-swapping is required
2691 if (need_endian_swap_utf(encoding, get_wchar_encoding()))
2692 {
2693 convert_wchar_endian_swap(r_char, data, length);
2694
2695 return length * sizeof(char_t);
2696 }
2697
2698 // convert to utf8
2699 if (encoding == encoding_utf8)
2700 {
2701 uint8_t* dest = r_u8;
2702 uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(data, length, dest);
2703
2704 return static_cast<size_t>(end - dest);
2705 }
2706
2707 // convert to utf16
2708 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2709 {
2710 uint16_t* dest = r_u16;
2711
2712 // convert to native utf16
2713 uint16_t* end = utf_decoder<utf16_writer>::decode_wchar_block(data, length, dest);
2714
2715 // swap if necessary
2716 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2717
2718 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2719
2720 return static_cast<size_t>(end - dest) * sizeof(uint16_t);
2721 }
2722
2723 // convert to utf32
2724 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2725 {
2726 uint32_t* dest = r_u32;
2727
2728 // convert to native utf32
2729 uint32_t* end = utf_decoder<utf32_writer>::decode_wchar_block(data, length, dest);
2730
2731 // swap if necessary
2732 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2733
2734 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2735
2736 return static_cast<size_t>(end - dest) * sizeof(uint32_t);
2737 }
2738
2739 // convert to latin1
2740 if (encoding == encoding_latin1)
2741 {
2742 uint8_t* dest = r_u8;
2743 uint8_t* end = utf_decoder<latin1_writer>::decode_wchar_block(data, length, dest);
2744
2745 return static_cast<size_t>(end - dest);
2746 }
2747
2748 assert(!"Invalid encoding");
2749 return 0;
2750 }
2751#else
2752 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
2753 {
2754 assert(length > 4);
2755
2756 for (size_t i = 1; i <= 4; ++i)
2757 {
2758 uint8_t ch = static_cast<uint8_t>(data[length - i]);
2759
2760 // either a standalone character or a leading one
2761 if ((ch & 0xc0) != 0x80) return length - i;
2762 }
2763
2764 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
2765 return length;
2766 }
2767
2768 PUGI__FN size_t convert_buffer(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
2769 {
2770 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2771 {
2772 uint16_t* dest = r_u16;
2773
2774 // convert to native utf16
2775 uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2776
2777 // swap if necessary
2778 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2779
2780 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2781
2782 return static_cast<size_t>(end - dest) * sizeof(uint16_t);
2783 }
2784
2785 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2786 {
2787 uint32_t* dest = r_u32;
2788
2789 // convert to native utf32
2790 uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2791
2792 // swap if necessary
2793 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2794
2795 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2796
2797 return static_cast<size_t>(end - dest) * sizeof(uint32_t);
2798 }
2799
2800 if (encoding == encoding_latin1)
2801 {
2802 uint8_t* dest = r_u8;
2803 uint8_t* end = utf_decoder<latin1_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2804
2805 return static_cast<size_t>(end - dest);
2806 }
2807
2808 assert(!"Invalid encoding");
2809 return 0;
2810 }
2811#endif
2812
2813 class xml_buffered_writer
2814 {
2815 xml_buffered_writer(const xml_buffered_writer&);
2816 xml_buffered_writer& operator=(const xml_buffered_writer&);
2817
2818 public:
2819 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
2820 {
2821 PUGI__STATIC_ASSERT(bufcapacity >= 8);
2822 }
2823
2824 ~xml_buffered_writer()
2825 {
2826 flush();
2827 }
2828
2829 void flush()
2830 {
2831 flush(buffer, bufsize);
2832 bufsize = 0;
2833 }
2834
2835 void flush(const char_t* data, size_t size)
2836 {
2837 if (size == 0) return;
2838
2839 // fast path, just write data
2840 if (encoding == get_write_native_encoding())
2841 writer.write(data, size * sizeof(char_t));
2842 else
2843 {
2844 // convert chunk
2845 size_t result = convert_buffer(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
2846 assert(result <= sizeof(scratch));
2847
2848 // write data
2849 writer.write(scratch.data_u8, result);
2850 }
2851 }
2852
2853 void write(const char_t* data, size_t length)
2854 {
2855 if (bufsize + length > bufcapacity)
2856 {
2857 // flush the remaining buffer contents
2858 flush();
2859
2860 // handle large chunks
2861 if (length > bufcapacity)
2862 {
2863 if (encoding == get_write_native_encoding())
2864 {
2865 // fast path, can just write data chunk
2866 writer.write(data, length * sizeof(char_t));
2867 return;
2868 }
2869
2870 // need to convert in suitable chunks
2871 while (length > bufcapacity)
2872 {
2873 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
2874 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
2875 size_t chunk_size = get_valid_length(data, bufcapacity);
2876
2877 // convert chunk and write
2878 flush(data, chunk_size);
2879
2880 // iterate
2881 data += chunk_size;
2882 length -= chunk_size;
2883 }
2884
2885 // small tail is copied below
2886 bufsize = 0;
2887 }
2888 }
2889
2890 memcpy(buffer + bufsize, data, length * sizeof(char_t));
2891 bufsize += length;
2892 }
2893
2894 void write(const char_t* data)
2895 {
2896 write(data, strlength(data));
2897 }
2898
2899 void write(char_t d0)
2900 {
2901 if (bufsize + 1 > bufcapacity) flush();
2902
2903 buffer[bufsize + 0] = d0;
2904 bufsize += 1;
2905 }
2906
2907 void write(char_t d0, char_t d1)
2908 {
2909 if (bufsize + 2 > bufcapacity) flush();
2910
2911 buffer[bufsize + 0] = d0;
2912 buffer[bufsize + 1] = d1;
2913 bufsize += 2;
2914 }
2915
2916 void write(char_t d0, char_t d1, char_t d2)
2917 {
2918 if (bufsize + 3 > bufcapacity) flush();
2919
2920 buffer[bufsize + 0] = d0;
2921 buffer[bufsize + 1] = d1;
2922 buffer[bufsize + 2] = d2;
2923 bufsize += 3;
2924 }
2925
2926 void write(char_t d0, char_t d1, char_t d2, char_t d3)
2927 {
2928 if (bufsize + 4 > bufcapacity) flush();
2929
2930 buffer[bufsize + 0] = d0;
2931 buffer[bufsize + 1] = d1;
2932 buffer[bufsize + 2] = d2;
2933 buffer[bufsize + 3] = d3;
2934 bufsize += 4;
2935 }
2936
2937 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
2938 {
2939 if (bufsize + 5 > bufcapacity) flush();
2940
2941 buffer[bufsize + 0] = d0;
2942 buffer[bufsize + 1] = d1;
2943 buffer[bufsize + 2] = d2;
2944 buffer[bufsize + 3] = d3;
2945 buffer[bufsize + 4] = d4;
2946 bufsize += 5;
2947 }
2948
2949 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
2950 {
2951 if (bufsize + 6 > bufcapacity) flush();
2952
2953 buffer[bufsize + 0] = d0;
2954 buffer[bufsize + 1] = d1;
2955 buffer[bufsize + 2] = d2;
2956 buffer[bufsize + 3] = d3;
2957 buffer[bufsize + 4] = d4;
2958 buffer[bufsize + 5] = d5;
2959 bufsize += 6;
2960 }
2961
2962 // utf8 maximum expansion: x4 (-> utf32)
2963 // utf16 maximum expansion: x2 (-> utf32)
2964 // utf32 maximum expansion: x1
2965 enum
2966 {
2967 bufcapacitybytes =
2968 #ifdef PUGIXML_MEMORY_OUTPUT_STACK
2969 PUGIXML_MEMORY_OUTPUT_STACK
2970 #else
2971 10240
2972 #endif
2973 ,
2974 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
2975 };
2976
2977 char_t buffer[bufcapacity];
2978
2979 union
2980 {
2981 uint8_t data_u8[4 * bufcapacity];
2982 uint16_t data_u16[2 * bufcapacity];
2983 uint32_t data_u32[bufcapacity];
2984 char_t data_char[bufcapacity];
2985 } scratch;
2986
2987 xml_writer& writer;
2988 size_t bufsize;
2989 xml_encoding encoding;
2990 };
2991
2992 PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
2993 {
2994 while (*s)
2995 {
2996 const char_t* prev = s;
2997
2998 // While *s is a usual symbol
2999 while (!PUGI__IS_CHARTYPEX(*s, type)) ++s;
3000
3001 writer.write(prev, static_cast<size_t>(s - prev));
3002
3003 switch (*s)
3004 {
3005 case 0: break;
3006 case '&':
3007 writer.write('&', 'a', 'm', 'p', ';');
3008 ++s;
3009 break;
3010 case '<':
3011 writer.write('&', 'l', 't', ';');
3012 ++s;
3013 break;
3014 case '>':
3015 writer.write('&', 'g', 't', ';');
3016 ++s;
3017 break;
3018 case '"':
3019 writer.write('&', 'q', 'u', 'o', 't', ';');
3020 ++s;
3021 break;
3022 default: // s is not a usual symbol
3023 {
3024 unsigned int ch = static_cast<unsigned int>(*s++);
3025 assert(ch < 32);
3026
3027 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3028 }
3029 }
3030 }
3031 }
3032
3033 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3034 {
3035 if (flags & format_no_escapes)
3036 writer.write(s);
3037 else
3038 text_output_escaped(writer, s, type);
3039 }
3040
3041 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3042 {
3043 do
3044 {
3045 writer.write('<', '!', '[', 'C', 'D');
3046 writer.write('A', 'T', 'A', '[');
3047
3048 const char_t* prev = s;
3049
3050 // look for ]]> sequence - we can't output it as is since it terminates CDATA
3051 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3052
3053 // skip ]] if we stopped at ]]>, > will go to the next CDATA section
3054 if (*s) s += 2;
3055
3056 writer.write(prev, static_cast<size_t>(s - prev));
3057
3058 writer.write(']', ']', '>');
3059 }
3060 while (*s);
3061 }
3062
3063 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, const xml_node& node, unsigned int flags)
3064 {
3065 const char_t* default_name = PUGIXML_TEXT(":anonymous");
3066
3067 for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute())
3068 {
3069 writer.write(' ');
3070 writer.write(a.name()[0] ? a.name() : default_name);
3071 writer.write('=', '"');
3072
3073 text_output(writer, a.value(), ctx_special_attr, flags);
3074
3075 writer.write('"');
3076 }
3077 }
3078
3079 PUGI__FN void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth)
3080 {
3081 const char_t* default_name = PUGIXML_TEXT(":anonymous");
3082
3083 if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
3084 for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
3085
3086 switch (node.type())
3087 {
3088 case node_document:
3089 {
3090 for (xml_node n = node.first_child(); n; n = n.next_sibling())
3091 node_output(writer, n, indent, flags, depth);
3092 break;
3093 }
3094
3095 case node_element:
3096 {
3097 const char_t* name = node.name()[0] ? node.name() : default_name;
3098
3099 writer.write('<');
3100 writer.write(name);
3101
3102 node_output_attributes(writer, node, flags);
3103
3104 if (flags & format_raw)
3105 {
3106 if (!node.first_child())
3107 writer.write(' ', '/', '>');
3108 else
3109 {
3110 writer.write('>');
3111
3112 for (xml_node n = node.first_child(); n; n = n.next_sibling())
3113 node_output(writer, n, indent, flags, depth + 1);
3114
3115 writer.write('<', '/');
3116 writer.write(name);
3117 writer.write('>');
3118 }
3119 }
3120 else if (!node.first_child())
3121 writer.write(' ', '/', '>', '\n');
3122 else if (node.first_child() == node.last_child() && (node.first_child().type() == node_pcdata || node.first_child().type() == node_cdata))
3123 {
3124 writer.write('>');
3125
3126 if (node.first_child().type() == node_pcdata)
3127 text_output(writer, node.first_child().value(), ctx_special_pcdata, flags);
3128 else
3129 text_output_cdata(writer, node.first_child().value());
3130
3131 writer.write('<', '/');
3132 writer.write(name);
3133 writer.write('>', '\n');
3134 }
3135 else
3136 {
3137 writer.write('>', '\n');
3138
3139 for (xml_node n = node.first_child(); n; n = n.next_sibling())
3140 node_output(writer, n, indent, flags, depth + 1);
3141
3142 if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
3143 for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
3144
3145 writer.write('<', '/');
3146 writer.write(name);
3147 writer.write('>', '\n');
3148 }
3149
3150 break;
3151 }
3152
3153 case node_pcdata:
3154 text_output(writer, node.value(), ctx_special_pcdata, flags);
3155 if ((flags & format_raw) == 0) writer.write('\n');
3156 break;
3157
3158 case node_cdata:
3159 text_output_cdata(writer, node.value());
3160 if ((flags & format_raw) == 0) writer.write('\n');
3161 break;
3162
3163 case node_comment:
3164 writer.write('<', '!', '-', '-');
3165 writer.write(node.value());
3166 writer.write('-', '-', '>');
3167 if ((flags & format_raw) == 0) writer.write('\n');
3168 break;
3169
3170 case node_pi:
3171 case node_declaration:
3172 writer.write('<', '?');
3173 writer.write(node.name()[0] ? node.name() : default_name);
3174
3175 if (node.type() == node_declaration)
3176 {
3177 node_output_attributes(writer, node, flags);
3178 }
3179 else if (node.value()[0])
3180 {
3181 writer.write(' ');
3182 writer.write(node.value());
3183 }
3184
3185 writer.write('?', '>');
3186 if ((flags & format_raw) == 0) writer.write('\n');
3187 break;
3188
3189 case node_doctype:
3190 writer.write('<', '!', 'D', 'O', 'C');
3191 writer.write('T', 'Y', 'P', 'E');
3192
3193 if (node.value()[0])
3194 {
3195 writer.write(' ');
3196 writer.write(node.value());
3197 }
3198
3199 writer.write('>');
3200 if ((flags & format_raw) == 0) writer.write('\n');
3201 break;
3202
3203 default:
3204 assert(!"Invalid node type");
3205 }
3206 }
3207
3208 inline bool has_declaration(const xml_node& node)
3209 {
3210 for (xml_node child = node.first_child(); child; child = child.next_sibling())
3211 {
3212 xml_node_type type = child.type();
3213
3214 if (type == node_declaration) return true;
3215 if (type == node_element) return false;
3216 }
3217
3218 return false;
3219 }
3220
3221 inline bool allow_insert_child(xml_node_type parent, xml_node_type child)
3222 {
3223 if (parent != node_document && parent != node_element) return false;
3224 if (child == node_document || child == node_null) return false;
3225 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
3226
3227 return true;
3228 }
3229
3230 PUGI__FN void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip)
3231 {
3232 assert(dest.type() == source.type());
3233
3234 switch (source.type())
3235 {
3236 case node_element:
3237 {
3238 dest.set_name(source.name());
3239
3240 for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
3241 dest.append_attribute(a.name()).set_value(a.value());
3242
3243 for (xml_node c = source.first_child(); c; c = c.next_sibling())
3244 {
3245 if (c == skip) continue;
3246
3247 xml_node cc = dest.append_child(c.type());
3248 assert(cc);
3249
3250 recursive_copy_skip(cc, c, skip);
3251 }
3252
3253 break;
3254 }
3255
3256 case node_pcdata:
3257 case node_cdata:
3258 case node_comment:
3259 case node_doctype:
3260 dest.set_value(source.value());
3261 break;
3262
3263 case node_pi:
3264 dest.set_name(source.name());
3265 dest.set_value(source.value());
3266 break;
3267
3268 case node_declaration:
3269 {
3270 dest.set_name(source.name());
3271
3272 for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
3273 dest.append_attribute(a.name()).set_value(a.value());
3274
3275 break;
3276 }
3277
3278 default:
3279 assert(!"Invalid node type");
3280 }
3281 }
3282
3283 inline bool is_text_node(xml_node_struct* node)
3284 {
3285 xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);
3286
3287 return type == node_pcdata || type == node_cdata;
3288 }
3289
3290 // get value with conversion functions
3291 PUGI__FN int get_value_int(const char_t* value, int def)
3292 {
3293 if (!value) return def;
3294
3295 #ifdef PUGIXML_WCHAR_MODE
3296 return static_cast<int>(wcstol(value, 0, 10));
3297 #else
3298 return static_cast<int>(strtol(value, 0, 10));
3299 #endif
3300 }
3301
3302 PUGI__FN unsigned int get_value_uint(const char_t* value, unsigned int def)
3303 {
3304 if (!value) return def;
3305
3306 #ifdef PUGIXML_WCHAR_MODE
3307 return static_cast<unsigned int>(wcstoul(value, 0, 10));
3308 #else
3309 return static_cast<unsigned int>(strtoul(value, 0, 10));
3310 #endif
3311 }
3312
3313 PUGI__FN double get_value_double(const char_t* value, double def)
3314 {
3315 if (!value) return def;
3316
3317 #ifdef PUGIXML_WCHAR_MODE
3318 return wcstod(value, 0);
3319 #else
3320 return strtod(value, 0);
3321 #endif
3322 }
3323
3324 PUGI__FN float get_value_float(const char_t* value, float def)
3325 {
3326 if (!value) return def;
3327
3328 #ifdef PUGIXML_WCHAR_MODE
3329 return static_cast<float>(wcstod(value, 0));
3330 #else
3331 return static_cast<float>(strtod(value, 0));
3332 #endif
3333 }
3334
3335 PUGI__FN bool get_value_bool(const char_t* value, bool def)
3336 {
3337 if (!value) return def;
3338
3339 // only look at first char
3340 char_t first = *value;
3341
3342 // 1*, t* (true), T* (True), y* (yes), Y* (YES)
3343 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
3344 }
3345
3346 // set value with conversion functions
3347 PUGI__FN bool set_value_buffer(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128])
3348 {
3349 #ifdef PUGIXML_WCHAR_MODE
3350 char_t wbuf[128];
3351 impl::widen_ascii(wbuf, buf);
3352
3353 return strcpy_insitu(dest, header, header_mask, wbuf);
3354 #else
3355 return strcpy_insitu(dest, header, header_mask, buf);
3356 #endif
3357 }
3358
3359 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, int value)
3360 {
3361 char buf[128];
3362 sprintf(buf, "%d", value);
3363
3364 return set_value_buffer(dest, header, header_mask, buf);
3365 }
3366
3367 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int value)
3368 {
3369 char buf[128];
3370 sprintf(buf, "%u", value);
3371
3372 return set_value_buffer(dest, header, header_mask, buf);
3373 }
3374
3375 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, double value)
3376 {
3377 char buf[128];
3378 sprintf(buf, "%g", value);
3379
3380 return set_value_buffer(dest, header, header_mask, buf);
3381 }
3382
3383 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, bool value)
3384 {
3385 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
3386 }
3387
3388 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
3389 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
3390 {
3391 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
3392 // there are 64-bit versions of fseek/ftell, let's use them
3393 typedef __int64 length_type;
3394
3395 _fseeki64(file, 0, SEEK_END);
3396 length_type length = _ftelli64(file);
3397 _fseeki64(file, 0, SEEK_SET);
3398 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && !defined(__STRICT_ANSI__)
3399 // there are 64-bit versions of fseek/ftell, let's use them
3400 typedef off64_t length_type;
3401
3402 fseeko64(file, 0, SEEK_END);
3403 length_type length = ftello64(file);
3404 fseeko64(file, 0, SEEK_SET);
3405 #else
3406 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
3407 typedef long length_type;
3408
3409 fseek(file, 0, SEEK_END);
3410 length_type length = ftell(file);
3411 fseek(file, 0, SEEK_SET);
3412 #endif
3413
3414 // check for I/O errors
3415 if (length < 0) return status_io_error;
3416
3417 // check for overflow
3418 size_t result = static_cast<size_t>(length);
3419
3420 if (static_cast<length_type>(result) != length) return status_out_of_memory;
3421
3422 // finalize
3423 out_result = result;
3424
3425 return status_ok;
3426 }
3427
3428 PUGI__FN xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding)
3429 {
3430 if (!file) return make_parse_result(status_file_not_found);
3431
3432 // get file size (can result in I/O errors)
3433 size_t size = 0;
3434 xml_parse_status size_status = get_file_size(file, size);
3435
3436 if (size_status != status_ok)
3437 {
3438 fclose(file);
3439 return make_parse_result(size_status);
3440 }
3441
3442 // allocate buffer for the whole file
3443 char* contents = static_cast<char*>(xml_memory::allocate(size > 0 ? size : 1));
3444
3445 if (!contents)
3446 {
3447 fclose(file);
3448 return make_parse_result(status_out_of_memory);
3449 }
3450
3451 // read file in memory
3452 size_t read_size = fread(contents, 1, size, file);
3453 fclose(file);
3454
3455 if (read_size != size)
3456 {
3457 xml_memory::deallocate(contents);
3458 return make_parse_result(status_io_error);
3459 }
3460
3461 return doc.load_buffer_inplace_own(contents, size, options, encoding);
3462 }
3463
3464#ifndef PUGIXML_NO_STL
3465 template <typename T