summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeandro Pereira <leandro@profusion.mobi>2012-09-06 20:41:26 +0000
committerLeandro Pereira <leandro@profusion.mobi>2012-09-06 20:41:26 +0000
commitc0a10667c3235bf552fbc43b35884efd7162bb1a (patch)
treec42c50c58fd1701565bfe07a1494d0751c2048da
geneet: Generates eet boilerplate using a simple DSL
SVN revision: 76261
-rw-r--r--AUTHORS4
-rw-r--r--COPYING32
-rw-r--r--README82
-rw-r--r--TODO5
-rwxr-xr-xgeneet.py1115
-rw-r--r--history.geneet18
-rw-r--r--phonebook.geneet32
-rw-r--r--pyparsing.py3707
-rwxr-xr-xsetup.py7
-rw-r--r--test-phonebook.c107
10 files changed, 5109 insertions, 0 deletions
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..d087dbd
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,4 @@
1Bruno Dilly
2Flavio Ceolin
3Eduardo Lima
4Leandro Pereira
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..a7abc65
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,32 @@
1
2Permission is hereby granted, free of charge, to any person obtaining a copy
3of this software and associated documentation files (the "Software"), to
4deal in the Software without restriction, including without limitation the
5rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
6sell copies of the Software, and to permit persons to whom the Software is
7furnished to do so, subject to the following conditions:
8
9The above copyright notice and this permission notice shall be included in
10all copies of the Software and its Copyright notices. In addition publicly
11documented acknowledgment must be given that this software has been used if no
12source code of this software is made available publicly. Making the source
13available publicly means including the source for this software with the
14distribution, or a method to get this software via some reasonable mechanism
15(electronic transfer via a network or media) as well as making an offer to
16supply the source on request. This Copyright notice serves as an offer to
17supply the source on on request as well. Instead of this, supplying
18acknowledgments of use of this software in either Copyright notices, Manuals,
19Publicity and Marketing documents or any documentation provided with any
20product containing this software. This License does not apply to any software
21that links to the libraries provided by this software (statically or
22dynamically), but only to the software provided.
23
24Please see EET's COPYING-PLAIN for a plain-english explanation of this notice
25and its intent.
26
27THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
30THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
31IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README b/README
new file mode 100644
index 0000000..6909899
--- /dev/null
+++ b/README
@@ -0,0 +1,82 @@
1geneet
2Generator for EFL's pickler/unpickler library, EET
3
4
5Introduction
6------------
7
8Given a simple description about a data structure, geneet will
9generate the C source code so it can be serialized/deserialized
10by using the EET library.
11
12Even though it is possible to write code to use EET by hand, it
13can be pretty boring and error-prone. Geneet's structure syntax
14is as simple as it gets and, in the future, will be possible to
15use it for other languages as well.
16
17Geneet supports currently:
18 - Fundamental types
19 - str
20 - int, uint
21 - float, double
22 - Enumeration
23 - Images
24 - Lists
25 - Hashes
26
27Generated code can:
28 - Initialize/shutdown the EET descriptor structures
29 - Allocate/free a structure
30 - Load/save a structure to a file
31
32
33Geneet Syntax
34-------------
35
36<Structure_Name> {
37 <field_name> : <type> [<type arguments>] [noencode];
38}
39
40- <Structure_Name> and <field_name> can be any valid C identifier;
41- <type> can be one of:
42 - str
43 - int
44 - uint
45 - float
46 - double
47 - list
48 - hash
49 - enum
50- <type arguments> varies depending on the <type> used; it can be either
51 optional, or mandatory, according to the following table:
52
53 Type Optional Argument
54 ---- -------- --------
55 str Yes default "Default value"
56 list No of <Structure_Name>
57 hash No of <Structure_Name> by <field_name>
58 enum No of <NAME1>, <NAME2>, ..., <NAMEN>
59- noencode might be used to create in-memory only fields; they won't be
60 encoded in the EET file but will be available as structures members
61
62Usage
63-----
64
65Usage is pretty simple; just call geneet with a .geneet file with the
66structure definition you want. Refer to included .geneet files for examples.
67
68Running:
69 $ geneet phonebook.geneet
70
71Will create phonebook.c and phonebook.h. See the included test-phonebook.c
72to see an example of how to use the generated files.
73
74Installing
75----------
76To install install geneet.py script:
77
78 $ python setup.py install
79
80or if a non-standard install directory is wanted:
81
82 $ python setup.py install --prefix=$PREFIX
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..0a859eb
--- /dev/null
+++ b/TODO
@@ -0,0 +1,5 @@
1- Generate code in more than one language
2 - C++?
3 - Python?
4- Nested structures
5- Version checking in structure load/save
diff --git a/geneet.py b/geneet.py
new file mode 100755
index 0000000..31d35b7
--- /dev/null
+++ b/geneet.py
@@ -0,0 +1,1115 @@
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# install pyparsing with: easy_install pyparsing
5#
6# eet.py
7# Creates all the C boilerplate to use with the EET library.
8#
9# Author: Leandro A. F. Pereira <leandro@profusion.mobi>
10# Copyright (C) 2010 ProFUSION Embedded Systems
11# Licensed under the same terms as EET itself; see COPYING for
12# details.
13#
14
15from pyparsing import *
16import os
17import string
18import sys
19
20identifier = Word(string.letters + "_")
21digits = Word(string.digits)
22default_value = Keyword('default').suppress() + QuotedString(quoteChar = '"', endQuoteChar = '"')
23enum = ZeroOrMore(identifier + Literal(',').suppress()) + identifier
24type = Keyword('str') + Optional(default_value) | \
25 Keyword('int') | \
26 Keyword('uint') | \
27 Keyword('float') | \
28 Keyword('double') | \
29 Keyword('char') | \
30 Keyword('uchar') | \
31 Keyword('enum') + Keyword('of').suppress() + enum | \
32 Keyword('image') + (Keyword('raw') | (Keyword('compress') + digits) | Keyword('lossy') + digits) | \
33 Keyword('list') + Keyword('of').suppress() + identifier | \
34 Keyword('hash') + Keyword('of').suppress() + identifier + Keyword('by').suppress() + identifier | \
35 Keyword('pointer') + Keyword('to').suppress() + identifier
36definition = Group(identifier + \
37 Literal(':').suppress() + \
38 type + \
39 Optional(Keyword('noencode')) + \
40 Literal(';').suppress())
41block = OneOrMore( \
42 Group(identifier + \
43 Literal('{').suppress() + \
44 Group(OneOrMore(definition)) + \
45 Literal('}').suppress()) + \
46 Optional(Keyword(';').suppress()))
47block.ignore(cppStyleComment)
48
49EINA_HASH_FUNCTIONS = {
50 "str" : "eina_hash_stringshared_new",
51 "int" : "eina_hash_int32_new",
52 "enum" : "eina_hash_int32_new",
53 "uint" : "eina_hash_int32_new",
54 "char" : "eina_hash_int32_new",
55 "uchar" : "eina_hash_int32_new",
56 "image" : "eina_hash_pointer_new",
57}
58
59FIELD_CONVERTER = {
60 "str" : "const char *",
61 "int" : "int",
62 "uint" : "unsigned int",
63 "float" : "float",
64 "double" : "double",
65 "list" : "Eina_List *",
66 "hash" : "Eina_Hash *",
67 "char" : "char",
68 "uchar" : "unsigned char",
69 "image" : "Evas_Object *",
70 "enum" : "unsigned int",
71}
72
73FIELD_CONVERTER_EET = {
74 "str" : "EET_T_STRING",
75 "int" : "EET_T_INT",
76 "uint" : "EET_T_UINT",
77 "float" : "EET_T_FLOAT",
78 "double" : "EET_T_DOUBLE",
79 "char" : "EET_T_CHAR",
80 "uchar" : "EET_T_UCHAR",
81 "enum" : "EET_T_UINT",
82}
83
84__images = set()
85__deps = set()
86__structs = {}
87__subtypes = {}
88
89def check(base_name, parsed_block):
90 blocks = set()
91 for name, fields in parsed_block:
92 if name in blocks:
93 raise NameError('Block %s already defined' % name)
94
95 blocks.add(name)
96 for field in fields:
97 if field[1] in ('list', 'hash', 'pointer'):
98 if not field[2] in blocks:
99 raise NameError('Block %s is not defined' % field[2])
100
101 __deps.add((name, field[2], field[0]))
102
103 for block in blocks:
104 if base_name.lower() == block.lower():
105 raise NameError('Block %s has the same name as the output file' % base_name)
106
107def _write_headers(header, impl):
108 print >> header, "/* This file has been automatically generated by geneet.py */"
109 print >> header, "/* DO NOT MODIFY */\n"
110 print >> impl, "/* This file has been automatically generated by geneet.py */"
111 print >> impl, "/* DO NOT MODIFY */\n"
112
113def _write_includes(parsed_block, base_name, header, impl):
114 print >> impl, """#include <limits.h>
115#include <stdio.h>
116#include <sys/stat.h>
117#include <sys/types.h>
118#include <unistd.h>
119
120#include "%s"
121""" % header.name
122
123 print >> header, """#ifndef __%s_H__
124#define __%s_H__
125
126#include <Eina.h>
127#include <Eet.h>""" % (
128 base_name.upper(),
129 base_name.upper()
130 )
131
132 if any(field[1] == 'image' for name, fields in parsed_block for field in fields):
133 print >> header, "#include <Evas.h>"
134 print >> header, ""
135
136def _write_structs(parsed_block, header, impl):
137 for name, fields in parsed_block:
138 __structs[name] = {}
139 __subtypes[name] = {}
140
141 print >> impl, """struct _%s {""" % name
142 for field in fields:
143 __structs[name][field[0]] = field[1]
144
145 if field[1] == 'hash':
146 __subtypes[name][field[0]] = (field[2], field[3])
147 elif field[1] == 'list':
148 __subtypes[name][field[0]] = (field[2], None)
149
150 if field[1] == 'enum':
151 print >> impl, " %s_%s %s;" % (name, field[0].title(), field[0])
152 elif field[1] == 'pointer':
153 print >> impl, " %s *%s;" % (field[2], field[2].lower())
154 else:
155 print >> impl, " %s %s;" % (FIELD_CONVERTER[field[1]], field[0])
156 if field[1] == 'image':
157 print >> impl, " unsigned int %s__id;" % field[0]
158
159 if not any(dep[1] == name for dep in __deps):
160 print >> impl, " const char *__eet_filename;";
161
162 print >> impl, """};\n"""
163
164 print >> header, "typedef struct _%s %s;" % (name, name)
165
166 for field in fields:
167 if field[1] == 'enum':
168 print >> header, "typedef enum {"
169 for value in field[2:]:
170 print >> header, " %s_%s_%s%s," % (name.upper(), field[0].upper(), value.upper(), value is field[2] and " = 0" or "")
171 print >> header, " %s_%s_LAST_VALUE" % (name.upper(), field[0].upper())
172 print >> header, "} %s_%s;" % (name, field[0].title())
173
174def _write_reprs(parsed_block, header, impl):
175 have_reprs = False
176 for name, fields in parsed_block:
177 for field in fields:
178 if field[1] == 'enum':
179 print >> impl, "static const char *%s_%s_REPR[] = { %s, NULL };" % (
180 name.upper(), field[0].upper(),
181 ", ".join('"%s"' % f for f in field[2:])
182 )
183 have_reprs = True
184 if have_reprs:
185 print >> impl, ""
186
187def _write_entry_names(parsed_block, header, impl):
188 for name, fields in parsed_block:
189 print >> impl, """static const char %s_ENTRY[] = "%s";""" % (name.upper(), name.lower())
190 print >> impl, ""
191
192def _write_descriptors(parsed_block, header, impl):
193 for name, fields in parsed_block:
194 print >> impl, """static Eet_Data_Descriptor *_%s_descriptor = NULL;""" % (name.lower())
195
196def _write_initializers(name, fields, header, impl):
197 print >> impl, """\nstatic inline void
198_%s_init(void)
199{
200 Eet_Data_Descriptor_Class eddc;
201
202 if (_%s_descriptor) return;
203
204 EET_EINA_STREAM_DATA_DESCRIPTOR_CLASS_SET(&eddc, %s);
205 _%s_descriptor = eet_data_descriptor_stream_new(&eddc);
206""" % (name.lower(), name.lower(), name, name.lower())
207
208 for field in fields:
209 if field[-1] == 'noencode':
210 continue
211
212 if field[1] == 'list':
213 print >> impl, """ EET_DATA_DESCRIPTOR_ADD_LIST(_%s_descriptor, %s, "%s", %s, _%s_descriptor);""" % (
214 name.lower(),
215 name,
216 field[0],
217 field[0],
218 field[2].lower()
219 )
220 elif field[1] == 'hash':
221 print >> impl, """ EET_DATA_DESCRIPTOR_ADD_HASH(_%s_descriptor, %s, "%s", %s, _%s_descriptor);""" % (
222 name.lower(),
223 name,
224 field[0],
225 field[0],
226 field[2].lower()
227 )
228 elif field[1] == 'image':
229 __images.add(name)
230 print >> impl, """ EET_DATA_DESCRIPTOR_ADD_BASIC(_%s_descriptor, %s, "%s", %s, EET_T_UINT);""" % (
231 name.lower(),
232 name,
233 field[0] + '__id',
234 field[0] + '__id'
235 )
236 elif field[1] == 'pointer':
237 print >> impl, """ EET_DATA_DESCRIPTOR_ADD_SUB(_%s_descriptor, %s, "%s", %s, _%s_descriptor);""" % (
238 name.lower(),
239 name,
240 field[0],
241 field[0],
242 field[2].lower()
243 )
244 else:
245 type = FIELD_CONVERTER_EET[field[1]]
246 print >> impl, """ EET_DATA_DESCRIPTOR_ADD_BASIC(_%s_descriptor, %s, "%s", %s, %s);""" % (
247 name.lower(),
248 name,
249 field[0],
250 field[0],
251 type
252 )
253 print >> impl, """}\n"""
254
255def _write_shutdowns(name, fields, header, impl):
256 print >> impl, """static inline void
257_%s_shutdown(void)
258{
259 if (!_%s_descriptor) return;
260 eet_data_descriptor_free(_%s_descriptor);
261 _%s_descriptor = NULL;
262}\n""" % (name.lower(), name.lower(), name.lower(), name.lower())
263
264def _write_allocators(name, fields, header, impl):
265 arg_fields = []
266 for field in fields:
267 if field[1] == 'hash':
268 continue
269 elif field[1] == 'image':
270 arg_fields.append('Evas_Object * %s' % field[0])
271 elif field[1] == 'enum':
272 arg_fields.append('%s_%s %s' % (
273 name,
274 field[0].title(),
275 field[0]
276 ))
277 elif field[1] == 'pointer':
278 arg_fields.append('%s *%s' % (
279 field[2],
280 field[2].lower(),
281 ))
282 else:
283 type = FIELD_CONVERTER[field[1]]
284 arg_fields.append('%s %s' % (type, field[0]))
285
286 print >> header, """%s *%s_new(%s);""" % (name, name.lower(), ', '.join(arg_fields))
287 print >> impl, """%s *
288%s_new(%s)
289{
290 %s *%s = calloc(1, sizeof(%s));
291
292 if (!%s)
293 {
294 fprintf(stderr, "ERROR: could not calloc %s\\n");
295 return NULL;
296 }
297""" % (
298 name,
299 name.lower(),
300 ', '.join(arg_fields),
301 name,
302 name.lower(),
303 name,
304 name.lower(),
305 name)
306
307 for field in fields:
308 if field[1] == 'str':
309 if len(field) > 2:
310 print >> impl, """ %s->%s = eina_stringshare_add(%s ? %s : "%s");""" % (
311 name.lower(),
312 field[0],
313 field[0],
314 field[0],
315 field[2]
316 )
317 else:
318 print >> impl, """ %s->%s = eina_stringshare_add(%s);""" % (
319 name.lower(),
320 field[0],
321 field[0]
322 )
323 elif field[1] == 'hash':
324 hashed_type = field[2]
325 hashed_field = field[3]
326
327 if hashed_field in __structs[hashed_type]:
328 print >> impl, """ %s->%s = %s(EINA_FREE_CB(%s_free));""" % (
329 name.lower(),
330 field[0],
331 EINA_HASH_FUNCTIONS[__structs[hashed_type][hashed_field]],
332 hashed_type.lower()
333 )
334 else:
335 print >> impl, """ %s->%s = NULL;""" % (
336 name.lower(),
337 field[0]
338 )
339 elif field[1] == 'list':
340 print >> impl, """ %s->%s = %s;""" % (
341 name.lower(),
342 field[0],
343 field[0]
344 )
345 elif field[1] == 'image':
346 print >> impl, """ %s->%s = %s;""" % (
347 name.lower(),
348 field[0],
349 field[0]
350 )
351 print >> impl, """ %s->%s__id = 0;""" % (
352 name.lower(),
353 field[0],
354 )
355 elif field[1] == 'pointer':
356 print >> impl, """ %s->%s = %s;""" % (
357 name.lower(),
358 field[2].lower(),
359 field[2].lower()
360 )
361 else:
362 print >> impl, """ %s->%s = %s;""" % (
363 name.lower(),
364 field[0],
365 field[0]
366 )
367 print >> impl, "\n return %s;\n}\n" % name.lower()
368
369def _write_deallocators(name, fields, header, impl):
370 has_pointer = any(field[1] == 'pointer' for field in fields)
371 if has_pointer:
372 print >> header, """void %s_free(%s *%s, Eina_Bool free_contents);""" % (name.lower(), name, name.lower())
373 print >> impl, """void
374%s_free(%s *%s, Eina_Bool free_contents)
375{""" % (
376 name.lower(),
377 name,
378 name.lower()
379 )
380 else:
381 print >> header, """void %s_free(%s *%s);""" % (name.lower(), name, name.lower())
382 print >> impl, """void
383%s_free(%s *%s)
384{""" % (
385 name.lower(),
386 name,
387 name.lower()
388 )
389
390 for field in fields:
391 if isinstance(field[1], str):
392 if field[1] == 'str':
393 print >> impl, """ eina_stringshare_del(%s->%s);""" % (
394 name.lower(),
395 field[0]
396 )
397 elif field[1] == 'pointer':
398 if any(field == 'pointer' for field in __structs[field[2]].values()):
399 print >> impl, """ if (free_contents) %s_free(%s->%s, free_contents);""" % (field[2].lower(), name.lower(), field[2].lower())
400 else:
401 print >> impl, """ if (free_contents) %s_free(%s->%s);""" % (field[2].lower(), name.lower(), field[2].lower())
402 elif field[1] == 'list':
403 print >> impl, """ if (%s->%s)
404 {
405 %s *%s_elem;
406 EINA_LIST_FREE(%s->%s, %s_elem)
407 %s_free(%s_elem);
408 }""" % (
409 name.lower(),
410 field[0],
411 field[2],
412 field[0],
413 name.lower(),
414 field[0],
415 field[0],
416 field[2].lower(),
417 field[0]
418 )
419 elif field[1] == 'image':
420 print >> impl, """ if (%s->%s) evas_object_del(%s->%s);""" %(
421 name.lower(),
422 field[0],
423 name.lower(),
424 field[0]
425 )
426 elif field[1] == 'hash':
427 print >> impl, """ if (%s->%s) eina_hash_free(%s->%s);""" %(
428 name.lower(),
429 field[0],
430 name.lower(),
431 field[0]
432 )
433
434 print >> impl, " free(%s);\n}\n" % name.lower()
435
436def _write_loaders(name, fields, header, impl):
437 extra = ''
438 for field in fields:
439 if field[-1] == 'noencode':
440 continue
441 if field[1] == 'image':
442 extra = 'Evas *evas, '
443 break
444
445 print >> header, """%s *%s_load(%sconst char *filename);""" % (name, name.lower(), extra)
446 print >> impl, """%s *
447%s_load(%sconst char *filename)
448{
449 %s *%s = NULL;
450 Eet_File *ef = eet_open(filename, EET_FILE_MODE_READ);
451 if (!ef)
452 {
453 fprintf(stderr, "ERROR: could not open '%%s' for read\\n", filename);
454 return NULL;
455 }
456
457 %s = eet_data_read(ef, _%s_descriptor, %s_ENTRY);
458 if (!%s) goto end;""" % (
459 name,
460 name.lower(),
461 extra,
462 name,
463 name.lower(),
464 name.lower(),
465 name.lower(),
466 name.upper(),
467 name.lower())
468
469 print >> impl, " %s->__eet_filename = eina_stringshare_add(filename);" % name.lower();
470
471 for field in fields:
472 if field[-1] == 'noencode':
473 continue
474 if field[1] == 'pointer':
475 for pointee_field, pointee_type in __structs[field[2]].items():
476 if pointee_type != 'hash': continue
477 hashed_type, hashed_field = __subtypes[field[2]][pointee_field]
478 allocator = "%s(EINA_FREE_CB(%s_free))" % (
479 EINA_HASH_FUNCTIONS[__structs[hashed_type][hashed_field]],
480 hashed_type.lower()
481 )
482 if pointee_type == 'hash':
483 print >> impl, """ if (!%(pname)s->%(bname)s->%(fname)s) %(pname)s->%(bname)s->%(fname)s = %(allocator)s;""" % {
484 "bname" : field[0],
485 "fname" : pointee_field,
486 "pname" : name.lower(),
487 "allocator" : allocator
488 }
489 elif field[1] == 'hash':
490 hashed_type = field[2]
491 hashed_field = field[3]
492
493 if hashed_field in __structs[hashed_type]:
494 allocator = "%s(EINA_FREE_CB(%s_free))" % (
495 EINA_HASH_FUNCTIONS[__structs[hashed_type][hashed_field]],
496 hashed_type.lower()
497 )
498 print >> impl, """\n if (!%(bname)s->%(fname)s) %(bname)s->%(fname)s = %(allocator)s;""" % {
499 "bname" : name.lower(),
500 "fname" : field[0],
501 "allocator" : allocator
502 }
503 elif field[1] == 'image':
504 print >> impl, """\n if (%(bname)s->%(fname)s__id)
505 {
506 char %(fname)s_buf[256];
507 unsigned int %(fname)s_w, %(fname)s_h;
508 int %(fname)s_alpha, %(fname)s_compress, %(fname)s_quality, %(fname)s_lossy;
509 void *%(fname)s_data;
510 sprintf(%(fname)s_buf, "/image/%(perc)sd", %(bname)s->%(fname)s__id);
511 %(fname)s_data = eet_data_image_read(ef, %(fname)s_buf, &%(fname)s_w, &%(fname)s_h, &%(fname)s_alpha, &%(fname)s_compress, &%(fname)s_quality, &%(fname)s_lossy);
512 if (%(fname)s_data)
513 {
514 %(bname)s->%(fname)s = evas_object_image_add(evas);
515 evas_object_image_size_set(%(bname)s->%(fname)s, %(fname)s_w, %(fname)s_h);
516 evas_object_image_alpha_set(%(bname)s->%(fname)s, %(fname)s_alpha);
517 evas_object_image_data_set(%(bname)s->%(fname)s, %(fname)s_data);
518 }
519 }""" % {
520 "bname": name.lower(),
521 "fname": field[0],
522 "perc": "%"
523 }
524
525 print >> impl, "\nend:"
526 print >> impl, " eet_close(ef);"
527 print >> impl, " return %s;\n}\n" % name.lower();
528
529def _write_dumpers(name, fields, header, impl):
530 print >> header, """Eina_Bool %s_save(%s *%s, const char *filename);""" % (name.lower(), name, name.lower())
531 print >> impl, """Eina_Bool
532%s_save(%s *%s, const char *filename)
533{
534 Eet_File *ef;
535 Eina_Bool ret;
536
537 if (filename) eina_stringshare_replace(&(%s->__eet_filename), filename);
538 else if (%s->__eet_filename) filename = %s->__eet_filename;
539 else return EINA_FALSE;
540
541 ef = eet_open(filename, EET_FILE_MODE_READ_WRITE);
542 if (!ef)
543 {
544 fprintf(stderr, "ERROR: could not open '%%s' for write\\n", filename);
545 return EINA_FALSE;
546 }
547""" % (
548 name.lower(),
549 name,
550 name.lower(),
551 name.lower(),
552 name.lower(),
553 name.lower(),
554)
555
556 has_images = any(field[-1] != 'noencode' and field[1] == 'image' for field in fields)
557 dep_has_images = any(dep in __images for struct, dep, depee in __deps)
558
559 if has_images or dep_has_images:
560 print >> impl, " i = 1;"
561
562 if has_images:
563 print >> impl, " i = _write_%s_images(%s, ef, i);" % (
564 name.lower(), name.lower()
565 )
566
567 if dep_has_images:
568 for struct, dep, depee in __deps:
569 if dep in __images:
570 if __structs[name][depee] == 'list':
571 print >> impl, """ if (%s->%s)
572 {
573 %s *%s;
574 Eina_List *%s_list;
575 EINA_LIST_FOREACH(%s->%s, %s_list, %s)
576 i = _write_%s_images(%s, ef, i);
577 }""" % (
578 name.lower(), depee,
579 dep, dep.lower(),
580 dep.lower(),
581 name.lower(), depee, dep.lower(), dep.lower(),
582 dep.lower(), dep.lower()
583 )
584 elif __structs[name][depee] == 'hash':
585 # Too lazy right now
586 pass
587
588 print >> impl, """ ret = !!eet_data_write(ef, _%s_descriptor, %s_ENTRY, %s, EINA_TRUE);
589 eet_close(ef);
590
591 return ret;
592}""" % (
593 name.lower(),
594 name.upper(),
595 name.lower()
596 )
597
598def _write_getters_setters(name, fields, header, impl):
599 for field in fields:
600 if field[1] == 'pointer':
601 print >> header, """void %s_%s_set(%s *%s, %s *%s);
602%s *%s_%s_get(%s *%s);""" % (
603 name.lower(), field[2].lower(), name, name.lower(), field[2], field[2].lower(),
604 field[2], name.lower(), field[2].lower(), name, name.lower()
605 )
606 print >> impl, """void %s_%s_set(%s *%s, %s *%s)
607{
608 EINA_SAFETY_ON_NULL_RETURN(%s);
609 %s->%s = %s;
610}
611
612%s *%s_%s_get(%s *%s)
613{
614 EINA_SAFETY_ON_NULL_RETURN_VAL(%s, NULL);
615 return %s->%s;
616}
617""" % (
618 name.lower(), field[2].lower(), name, name.lower(), field[2], field[2].lower(),
619 name.lower(),
620 name.lower(), field[2].lower(), field[2].lower(),
621 field[2], name.lower(), field[2].lower(), name, name.lower(),
622 name.lower(),
623 name.lower(), field[2].lower()
624 )
625 elif field[1] == 'list':
626 print >> header, """void %s_%s_add(%s *%s, %s *%s);
627void %s_%s_del(%s *%s, %s *%s);
628%s *%s_%s_get(const %s *%s, unsigned int nth);
629unsigned int %s_%s_count(const %s *%s);
630Eina_List *%s_%s_list_get(const %s *%s);
631void %s_%s_list_clear(%s *%s);
632void %s_%s_list_set(%s *%s, Eina_List *list);""" % (
633 name.lower(), field[0], name, name.lower(), field[2],
634 field[2].lower(), name.lower(), field[0], name,
635 name.lower(), field[2], field[2].lower(), field[2],
636 name.lower(), field[0], name, name.lower(),
637 name.lower(), field[0], name, name.lower(),
638 name.lower(), field[0], name, name.lower(),
639 name.lower(), field[0], name, name.lower(),
640 name.lower(), field[0], name, name.lower()
641 )
642 print >> impl, """inline void
643%s_%s_add(%s *%s, %s *%s)
644{
645 EINA_SAFETY_ON_NULL_RETURN(%s);
646 %s->%s = eina_list_append(%s->%s, %s);
647}
648""" % (
649 name.lower(), field[0], name, name.lower(),
650 field[2], field[2].lower(), name.lower(),
651 name.lower(), field[0], name.lower(),
652 field[0], field[2].lower()
653 )
654
655 print >> impl, """inline void
656%s_%s_del(%s *%s, %s *%s)
657{
658 EINA_SAFETY_ON_NULL_RETURN(%s);
659 %s->%s = eina_list_remove(%s->%s, %s);
660}
661""" % (
662 name.lower(), field[0], name, name.lower(), field[2],
663 field[2].lower(), name.lower(), name.lower(), field[0],
664 name.lower(), field[0], field[2].lower()
665 )
666
667 print >> impl, """inline %s *
668%s_%s_get(const %s *%s, unsigned int nth)
669{
670 EINA_SAFETY_ON_NULL_RETURN_VAL(%s, NULL);
671 return eina_list_nth(%s->%s, nth);
672}
673""" % (
674 field[2], name.lower(), field[0], name, name.lower(),
675 name.lower(), name.lower(), field[0]
676 )
677
678 print >> impl, """inline unsigned int
679%s_%s_count(const %s *%s)
680{
681 EINA_SAFETY_ON_NULL_RETURN_VAL(%s, 0);
682 return eina_list_count(%s->%s);
683}
684""" % (
685 name.lower(), field[0], name, name.lower(),
686 name.lower(),
687 name.lower(), field[0]
688 )
689
690 print >> impl, """void
691%s_%s_list_clear(%s *%s)
692{
693 EINA_SAFETY_ON_NULL_RETURN(%s);
694 %s *data;
695 EINA_LIST_FREE(%s->%s, data) %s_free(data);
696}
697""" % (
698 name.lower(), field[0], name, name.lower(),
699 name.lower(),
700 field[2],
701 name.lower(), field[0], field[2].lower(),
702 )
703
704 print >> impl, """inline Eina_List *
705%s_%s_list_get(const %s *%s)
706{
707 EINA_SAFETY_ON_NULL_RETURN_VAL(%s, NULL);
708 return %s->%s;
709}
710""" % (
711 name.lower(), field[0], name, name.lower(),
712 name.lower(),
713 name.lower(), field[0]
714 )
715 print >> impl, """inline void
716%s_%s_list_set(%s *%s, Eina_List *list)
717{
718 EINA_SAFETY_ON_NULL_RETURN(%s);
719 %s->%s = list;
720}
721""" % (
722 name.lower(), field[0], name, name.lower(),
723 name.lower(),
724 name.lower(), field[0]
725 )
726 elif field[1] == 'hash':
727 hashed_type = field[2]
728 hashed_field = field[3]
729
730 print >> header, """void %s_%s_add(%s *%s, %s %s, %s *%s);
731void %s_%s_del(%s *%s, %s %s);
732%s *%s_%s_get(const %s *%s, %s key);
733Eina_Hash *%s_%s_hash_get(const %s *%s);
734void %s_%s_modify(%s *%s, %s key, void *value);""" % (
735 name.lower(), field[0], name, name.lower(),
736 FIELD_CONVERTER[__structs[hashed_type][hashed_field]], hashed_field,
737 hashed_type, hashed_type.lower(), name.lower(), field[0], name,
738 name.lower(), FIELD_CONVERTER[__structs[hashed_type][hashed_field]], hashed_field,
739 hashed_type, name.lower(), field[0], name, name.lower(), FIELD_CONVERTER[__structs[hashed_type][hashed_field]],
740 name.lower(), field[0], name, name.lower(),
741 name.lower(), field[0], name, name.lower(), FIELD_CONVERTER[__structs[hashed_type][hashed_field]]
742 )
743 print >> impl, """void
744%s_%s_add(%s *%s, %s %s, %s *%s)
745{
746 EINA_SAFETY_ON_NULL_RETURN(%s);
747 eina_hash_add(%s->%s, %s, %s);
748}
749""" % (
750 name.lower(), field[0], name, name.lower(),
751 FIELD_CONVERTER[__structs[hashed_type][hashed_field]], hashed_field,
752 hashed_type, hashed_type.lower(), name.lower(), name.lower(), field[0],
753 hashed_field, hashed_type.lower()
754 )
755
756 print >> impl, """void
757%s_%s_del(%s *%s, %s %s)
758{
759 EINA_SAFETY_ON_NULL_RETURN(%s);
760 eina_hash_del(%s->%s, %s, NULL);
761}
762""" % (
763 name.lower(), field[0], name, name.lower(),
764 FIELD_CONVERTER[__structs[hashed_type][hashed_field]], hashed_field,
765 name.lower(), name.lower(), field[0], hashed_field
766 )
767
768 print >> impl, """inline %s *
769%s_%s_get(const %s *%s, %s %s)
770{
771 EINA_SAFETY_ON_NULL_RETURN_VAL(%s, NULL);
772 return eina_hash_find(%s->%s, %s);
773}
774""" % (
775 hashed_type, name.lower(), field[0], name, name.lower(),
776 FIELD_CONVERTER[__structs[hashed_type][hashed_field]], hashed_field,
777 name.lower(), name.lower(), field[0], hashed_field
778 )
779
780 print >> impl, """inline Eina_Hash *
781%s_%s_hash_get(const %s *%s)
782{
783 EINA_SAFETY_ON_NULL_RETURN_VAL(%s, NULL);
784 return %s->%s;
785}
786""" % (
787 name.lower(), field[0], name, name.lower(),
788 name.lower(),
789 name.lower(), field[0]
790 )
791
792 print >> impl, """void
793%s_%s_modify(%s *%s, %s key, void *value)
794{
795 EINA_SAFETY_ON_NULL_RETURN(%s);
796 eina_hash_modify(%s->%s, key, value);
797}
798""" % (
799 name.lower(), field[0], name, name.lower(), FIELD_CONVERTER[__structs[hashed_type][hashed_field]],
800 name.lower(),
801 name.lower(), field[0]
802 )
803
804 elif field[1] == 'image':
805 print >> header, """void %s_%s_set(%s *%s, Evas_Object *%s);
806Evas_Object *%s_%s_get(const %s *%s, Evas *evas, const char *eet_file);""" % (
807 name.lower(), field[0], name, name.lower(), field[0],
808 name.lower(), field[0], name, name.lower()
809 )
810 print >> impl, """void
811%s_%s_set(%s *%s, Evas_Object *%s)
812{
813 EINA_SAFETY_ON_NULL_RETURN(%s);
814 if (%s->%s) evas_object_del(%s->%s);
815 %s->%s__id = 0;
816 %s->%s = %s;
817}
818""" % (
819 name.lower(), field[0], name, name.lower(), field[0],
820 name.lower(),
821 name.lower(), field[0], name.lower(), field[0],
822 name.lower(), field[0],
823 name.lower(), field[0], field[0]
824 )
825 print >> impl, """Evas_Object *
826%s_%s_get(const %s *%s, Evas *evas, const char *eet_file)
827{
828 EINA_SAFETY_ON_NULL_RETURN_VAL(%s, NULL);
829 if (%s->%s) return %s->%s;
830 _load_%s_images(%s, evas, eet_file);
831 return %s->%s;
832}
833""" % (
834 name.lower(), field[0], name, name.lower(),
835 name.lower(),
836 name.lower(), field[0], name.lower(), field[0],
837 name.lower(), name.lower(),
838 name.lower(), field[0]
839 )
840 elif field[1] == 'enum':
841 print >> header, """void %s_%s_set(%s *%s, %s %s);
842%s %s_%s_get(const %s *%s);
843const char * %s_%s_repr_get(const %s *%s);
844const char * %s_%s_str_get(%s %s);""" % (
845 name.lower(), field[0], name, name.lower(), name + '_' + field[0].title(),
846 field[0], name + '_' + field[0].title(), name.lower(), field[0],
847 name, name.lower(),
848 name.lower(), field[0], name, name.lower(),
849 name.lower(), field[0], name + '_' + field[0].title(), field[0]
850 )
851 print >> impl, """inline %s
852%s_%s_get(const %s *%s)
853{
854 return %s->%s;
855}
856""" % (
857 name + '_' + field[0].title(),
858 name.lower(), field[0],
859 name, name.lower(),
860 name.lower(), field[0]
861 )
862 print >> impl, """inline const char *
863%s_%s_repr_get(const %s *%s)
864{
865 return %s_%s_REPR[%s->%s];
866}
867""" % (
868 name.lower(), field[0],
869 name, name.lower(),
870 name.upper(), field[0].upper(),
871 name.lower(), field[0]
872 )
873
874 print >> impl, """inline void
875%s_%s_set(%s *%s, %s %s)
876{
877 EINA_SAFETY_ON_NULL_RETURN(%s);
878 if (%s >= %s_%s_LAST_VALUE) return;
879 %s->%s = %s;
880}
881""" % (
882 name.lower(), field[0], name, name.lower(),
883 name + '_' + field[0].title(), field[0],
884 name.lower(),
885 field[0], name.upper(), field[0].upper(),
886 name.lower(), field[0], field[0]
887 )
888
889 print >> impl, """inline const char *
890%s_%s_str_get(%s %s)
891{
892 if (%s >= %s_%s_LAST_VALUE) return NULL;
893 return %s_%s_REPR[%s];
894}
895""" % (
896 name.lower(), field[0],
897 name + '_' + field[0].title(), field[0],
898 field[0], name.upper(), field[0].upper(),
899 name.upper(), field[0].upper(), field[0]
900 )
901 else:
902 print >> header, """void %s_%s_set(%s *%s, %s %s);
903%s %s_%s_get(const %s *%s);""" % (
904 name.lower(), field[0], name, name.lower(), FIELD_CONVERTER[field[1]],
905 field[0], FIELD_CONVERTER[field[1]], name.lower(), field[0], name, name.lower(),
906 )
907
908 print >> impl, """inline %s
909%s_%s_get(const %s *%s)
910{
911 return %s->%s;
912}
913""" % (
914 FIELD_CONVERTER[field[1]],
915 name.lower(), field[0],
916 name, name.lower(),
917 name.lower(), field[0]
918 )
919 if field[1] == 'str':
920 print >> impl, """inline void
921%s_%s_set(%s *%s, const char *%s)
922{
923 EINA_SAFETY_ON_NULL_RETURN(%s);
924 eina_stringshare_replace(&(%s->%s), %s);
925}
926""" % (
927 name.lower(), field[0], name, name.lower(), field[0],
928 name.lower(),
929 name.lower(), field[0], field[0]
930 )
931 else:
932 print >> impl, """inline void
933%s_%s_set(%s *%s, %s %s)
934{
935 EINA_SAFETY_ON_NULL_RETURN(%s);
936 %s->%s = %s;
937}
938""" % (
939 name.lower(), field[0], name, name.lower(), FIELD_CONVERTER[field[1]], field[0],
940 name.lower(),
941 name.lower(), field[0], field[0]
942 )
943
944def _write_image_loaders(name, fields, impl):
945 if not any(field[1] == 'image' for field in fields):
946 return
947
948 print >> impl, """static void
949_load_%s_images(%s *%s, Evas *evas, const char *filename)
950{
951 Eet_File *ef = eet_open(filename, EET_FILE_MODE_READ);
952 if (!ef)
953 {
954 fprintf(stderr, "ERROR: could not open '%%s' for read\\n", filename);
955 return;
956 }""" % (
957 name.lower(), name, name.lower()
958 )
959
960 for field in (f for f in fields if f[1] == 'image'):
961 print >> impl, """\n if (%(bname)s->%(fname)s__id)
962 {
963 char %(fname)s_buf[256];
964 unsigned int %(fname)s_w, %(fname)s_h;
965 int %(fname)s_alpha, %(fname)s_compress, %(fname)s_quality, %(fname)s_lossy;
966 void *%(fname)s_data;
967 sprintf(%(fname)s_buf, "/image/%(perc)sd", %(bname)s->%(fname)s__id);
968 %(fname)s_data = eet_data_image_read(ef, %(fname)s_buf, &%(fname)s_w, &%(fname)s_h, &%(fname)s_alpha, &%(fname)s_compress, &%(fname)s_quality, &%(fname)s_lossy);
969 if (%(fname)s_data)
970 {
971 %(bname)s->%(fname)s = evas_object_image_add(evas);
972 evas_object_image_size_set(%(bname)s->%(fname)s, %(fname)s_w, %(fname)s_h);
973 evas_object_image_alpha_set(%(bname)s->%(fname)s, %(fname)s_alpha);
974 evas_object_image_data_set(%(bname)s->%(fname)s, %(fname)s_data);
975 }
976 }""" % {
977 "bname": name.lower(),
978 "fname": field[0],
979 "perc": "%"
980 }
981
982 print >> impl, "\n eet_close(ef);\n}\n"
983
984def _write_image_savers(name, fields, impl):
985 if not any(field[1] == 'image' for field in fields):
986 return
987
988 print >> impl, """static int
989_write_%s_images(%s *%s, Eet_File *ef, int image_id)
990{""" % (
991 name.lower(), name, name.lower()
992 )
993
994 for field in fields:
995 if field[-1] == 'noencode':
996 continue
997
998 if field[1] == 'image':
999 if field[2] == "raw":
1000 compress = False
1001 lossy = False
1002 quality = 100
1003 elif field[2] == "lossy":
1004 compress = True
1005 lossy = True
1006 quality = int(field[3])
1007 else:
1008 compress = True
1009 lossy = False
1010 quality = int(field[3])
1011
1012 print >> impl, """ if (%(bname)s->%(fname)s)
1013 {
1014 char %(fname)s_buf[256];
1015 int %(fname)s_w, %(fname)s_h;
1016 int %(fname)s_alpha;
1017 void *%(fname)s_data;
1018 %(bname)s->%(fname)s__id = image_id;
1019 sprintf(%(fname)s_buf, "/image/%(perc)sd", image_id++);
1020 evas_object_image_size_get(%(bname)s->%(fname)s, &%(fname)s_w, &%(fname)s_h);
1021 %(fname)s_alpha = evas_object_image_alpha_get(%(bname)s->%(fname)s);
1022 %(fname)s_data = evas_object_image_data_get(%(bname)s->%(fname)s, EINA_FALSE);
1023 eet_data_image_write(ef, %(fname)s_buf, %(fname)s_data, %(fname)s_w, %(fname)s_h, %(fname)s_alpha, %(compress)d, %(quality)d, %(lossy)d);
1024 }""" % {
1025 "bname": name.lower(),
1026 "fname": field[0],
1027 "perc": "%",
1028 "lossy": lossy,
1029 "compress": compress,
1030 "quality": quality
1031 }
1032
1033 print >> impl, """ return image_id;
1034}
1035"""
1036
1037def _write_block(parsed_block, header, impl):
1038 print >> header, ""
1039
1040 for name, fields in parsed_block:
1041 print >> header, """/* %s */""" % name
1042 _write_initializers(name, fields, header, impl)
1043 _write_shutdowns(name, fields, header, impl)
1044
1045 _write_allocators(name, fields, header, impl)
1046 _write_deallocators(name, fields, header, impl)
1047
1048 print >> header, ""
1049 _write_image_loaders(name, fields, impl)
1050 _write_image_savers(name, fields, impl)
1051 _write_getters_setters(name, fields, header, impl)
1052
1053 if not any(dep[1] == name for dep in __deps):
1054 print >> header, ""
1055 _write_loaders(name, fields, header, impl)
1056 _write_dumpers(name, fields, header, impl)
1057
1058 print >> header, ""
1059
1060def _write_global_initializers(parsed_block, base_name, header, impl):
1061 print >> header, "/* Global initializer / shutdown functions */"
1062 print >> header, "void %s_init(void);" % base_name
1063 print >> header, "void %s_shutdown(void);" % base_name
1064 print >> header, ""
1065
1066 print >> impl, """\nvoid
1067%s_init(void)
1068{""" % base_name
1069 for name, fields in parsed_block:
1070 print >> impl, " _%s_init();" % name.lower()
1071 print >> impl, "}\n"
1072
1073 print >> impl, """void
1074%s_shutdown(void)
1075{""" % base_name
1076 for name, fields in parsed_block:
1077 print >> impl, " _%s_shutdown();" % name.lower()
1078 print >> impl, "}\n"
1079
1080def write_output(parsed_block, base_name, dir_name):
1081 header = file(dir_name + base_name + '.h', 'w')
1082 impl = file(dir_name + base_name + '.c', 'w')
1083
1084 _write_headers(header, impl)
1085 _write_includes(parsed_block, base_name, header, impl)
1086 _write_structs(parsed_block, header, impl)
1087 _write_reprs(parsed_block, header, impl)
1088 _write_entry_names(parsed_block, header, impl)
1089 _write_descriptors(parsed_block, header, impl)
1090 _write_block(parsed_block, header, impl)
1091 _write_global_initializers(parsed_block, base_name, header, impl)
1092
1093 print >> header, """#endif /* __%s_H__ */""" % base_name.upper()
1094
1095 header.close()
1096 impl.close()
1097
1098if __name__ == '__main__':
1099 if len(sys.argv) < 2:
1100 print 'Usage: %s file.geneet' % sys.argv[0]
1101 else:
1102 dir_name = os.path.dirname(sys.argv[1])
1103 if dir_name:
1104 dir_name = dir_name + '/'
1105 base_name = os.path.basename('.'.join(sys.argv[1].split('.')[:-1]))
1106
1107 try:
1108 parsed_block = block.parseFile(sys.argv[1])
1109 except ParseException, e:
1110 print 'Syntax error:', e
1111 sys.exit(1)
1112
1113 check(base_name, parsed_block)
1114 write_output(parsed_block, base_name, dir_name)
1115
diff --git a/history.geneet b/history.geneet
new file mode 100644
index 0000000..768284d
--- /dev/null
+++ b/history.geneet
@@ -0,0 +1,18 @@
1History_Item {
2 title : str default "Untitled";
3 url : str default "about:blank";
4 visit : uint ;
5 favicon: image compress 95; // compression ranges from 0 (no compression) to 9 (max compression)
6}
7Bookmark_Item {
8 title : str;
9 url : str;
10 visit_count : uint;
11 last_visit : uint;
12 favicon: image compress 95;
13}
14History_Bookmark_Item {
15 version : uint;
16 history_entries : list of History_Item;
17 bookmark_entries : list of Bookmark_Item;
18}
diff --git a/phonebook.geneet b/phonebook.geneet
new file mode 100644
index 0000000..e9e6276
--- /dev/null
+++ b/phonebook.geneet
@@ -0,0 +1,32 @@
1Phone_Number {
2 number : str;
3 type : enum of HOME, WORK, MOBILE;
4}
5
6Email_Address {
7 address : str;
8 type : enum of PERSONAL, WORK;
9}
10
11Address {
12 street : str;
13 number : int;
14 zip_code: str;
15 state : enum of AC, AL, AP, AM, BA, CE, DF, GO, ES, MA, MT, MS, MG, PA, PB, PR, PE, PI, RJ, RN, RS, RO, RR, SP, SC, SE, TO;
16 type : enum of PERSONAL, WORK;
17}
18
19Person {
20 first_name : str;
21 last_name : str;
22 phones : list of Phone_Number;
23 emails : list of Email_Address;
24 addresses : list of Address;
25 photo : image compress 95;
26}
27
28Book {
29 version: int;
30 people : list of Person;
31}
32
diff --git a/pyparsing.py b/pyparsing.py
new file mode 100644
index 0000000..06b11d9
--- /dev/null
+++ b/pyparsing.py
@@ -0,0 +1,3707 @@
1# module pyparsing.py
2#
3# Copyright (c) 2003-2009 Paul T. McGuire
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23#
24#from __future__ import generators
25
26__doc__ = \
27"""
28pyparsing module - Classes and methods to define and execute parsing grammars
29
30The pyparsing module is an alternative approach to creating and executing simple grammars,
31vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33provides a library of classes that you use to construct the grammar directly in Python.
34
35Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
36
37 from pyparsing import Word, alphas
38
39 # define grammar of a greeting
40 greet = Word( alphas ) + "," + Word( alphas ) + "!"
41
42 hello = "Hello, World!"
43 print hello, "->", greet.parseString( hello )
44
45The program outputs the following::
46
47 Hello, World! -> ['Hello', ',', 'World', '!']
48
49The Python representation of the grammar is quite readable, owing to the self-explanatory
50class names, and the use of '+', '|' and '^' operators.
51
52The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
53object with named attributes.
54
55The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
57 - quoted strings
58 - embedded comments
59"""
60
61__version__ = "1.5.2"
62__versionTime__ = "17 February 2009 19:45"
63__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
64
65import string
66from weakref import ref as wkref
67import copy
68import sys
69import warnings
70import re
71import sre_constants
72#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
73
74__all__ = [
75'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
76'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
77'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
78'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
79'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
80'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
81'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
82'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
83'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
84'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
85'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
86'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
87'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
88'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
89'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
90'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
91'indentedBlock', 'originalTextFor',
92]
93
94
95"""
96Detect if we are running version 3.X and make appropriate changes
97Robert A. Clark
98"""
99if sys.version_info[0] > 2:
100 _PY3K = True
101 _MAX_INT = sys.maxsize
102 basestring = str
103else:
104 _PY3K = False
105 _MAX_INT = sys.maxint
106
107if not _PY3K:
108 def _ustr(obj):
109 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
110 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
111 then < returns the unicode object | encodes it with the default encoding | ... >.
112 """
113 if isinstance(obj,unicode):
114 return obj
115
116 try:
117 # If this works, then _ustr(obj) has the same behaviour as str(obj), so
118 # it won't break any existing code.
119 return str(obj)
120
121 except UnicodeEncodeError:
122 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
123 # state that "The return value must be a string object". However, does a
124 # unicode object (being a subclass of basestring) count as a "string
125 # object"?
126 # If so, then return a unicode object:
127 return unicode(obj)
128 # Else encode it... but how? There are many choices... :)
129 # Replace unprintables with escape codes?
130 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
131 # Replace unprintables with question marks?
132 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
133 # ...
134else:
135 _ustr = str
136 unichr = chr
137
138if not _PY3K:
139 def _str2dict(strg):
140 return dict( [(c,0) for c in strg] )
141else:
142 _str2dict = set
143
144def _xml_escape(data):
145 """Escape &, <, >, ", ', etc. in a string of data."""
146
147 # ampersand must be replaced first
148 from_symbols = '&><"\''
149 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
150 for from_,to_ in zip(from_symbols, to_symbols):
151 data = data.replace(from_, to_)
152 return data
153
154class _Constants(object):
155 pass
156
157if not _PY3K:
158 alphas = string.lowercase + string.uppercase
159else:
160 alphas = string.ascii_lowercase + string.ascii_uppercase
161nums = string.digits
162hexnums = nums + "ABCDEFabcdef"
163alphanums = alphas + nums
164_bslash = chr(92)
165printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
166
167class ParseBaseException(Exception):
168 """base exception class for all parsing runtime exceptions"""
169 # Performance tuning: we construct a *lot* of these, so keep this
170 # constructor as small and fast as possible
171 def __init__( self, pstr, loc=0, msg=None, elem=None ):
172 self.loc = loc
173 if msg is None:
174 self.msg = pstr
175 self.pstr = ""
176 else:
177 self.msg = msg
178 self.pstr = pstr
179 self.parserElement = elem
180
181 def __getattr__( self, aname ):
182 """supported attributes by name are:
183 - lineno - returns the line number of the exception text
184 - col - returns the column number of the exception text
185 - line - returns the line containing the exception text
186 """
187 if( aname == "lineno" ):
188 return lineno( self.loc, self.pstr )
189 elif( aname in ("col", "column") ):
190 return col( self.loc, self.pstr )
191 elif( aname == "line" ):
192 return line( self.loc, self.pstr )
193 else:
194 raise AttributeError(aname)
195
196 def __str__( self ):
197 return "%s (at char %d), (line:%d, col:%d)" % \
198 ( self.msg, self.loc, self.lineno, self.column )
199 def __repr__( self ):
200 return _ustr(self)
201 def markInputline( self, markerString = ">!<" ):
202 """Extracts the exception line from the input string, and marks
203 the location of the exception with a special symbol.
204 """
205 line_str = self.line
206 line_column = self.column - 1
207 if markerString:
208 line_str = "".join( [line_str[:line_column],
209 markerString, line_str[line_column:]])
210 return line_str.strip()
211 def __dir__(self):
212 return "loc msg pstr parserElement lineno col line " \
213 "markInputLine __str__ __repr__".split()
214
215class ParseException(ParseBaseException):
216 """exception thrown when parse expressions don't match class;
217 supported attributes by name are:
218 - lineno - returns the line number of the exception text
219 - col - returns the column number of the exception text
220 - line - returns the line containing the exception text
221 """
222 pass
223
224class ParseFatalException(ParseBaseException):
225 """user-throwable exception thrown when inconsistent parse content
226 is found; stops all parsing immediately"""
227 pass
228
229class ParseSyntaxException(ParseFatalException):
230 """just like ParseFatalException, but thrown internally when an
231 ErrorStop indicates that parsing is to stop immediately because
232 an unbacktrackable syntax error has been found"""
233 def __init__(self, pe):
234 super(ParseSyntaxException, self).__init__(
235 pe.pstr, pe.loc, pe.msg, pe.parserElement)
236
237#~ class ReparseException(ParseBaseException):
238 #~ """Experimental class - parse actions can raise this exception to cause
239 #~ pyparsing to reparse the input string:
240 #~ - with a modified input string, and/or
241 #~ - with a modified start location
242 #~ Set the values of the ReparseException in the constructor, and raise the
243 #~ exception in a parse action to cause pyparsing to use the new string/location.
244 #~ Setting the values as None causes no change to be made.
245 #~ """
246 #~ def __init_( self, newstring, restartLoc ):
247 #~ self.newParseText = newstring
248 #~ self.reparseLoc = restartLoc
249
250class RecursiveGrammarException(Exception):
251 """exception thrown by validate() if the grammar could be improperly recursive"""
252 def __init__( self, parseElementList ):
253 self.parseElementTrace = parseElementList
254
255 def __str__( self ):
256 return "RecursiveGrammarException: %s" % self.parseElementTrace
257
258class _ParseResultsWithOffset(object):
259 def __init__(self,p1,p2):
260 self.tup = (p1,p2)
261 def __getitem__(self,i):
262 return self.tup[i]
263 def __repr__(self):
264 return repr(self.tup)
265 def setOffset(self,i):
266 self.tup = (self.tup[0],i)
267
268class ParseResults(object):
269 """Structured parse results, to provide multiple means of access to the parsed data:
270 - as a list (len(results))
271 - by list index (results[0], results[1], etc.)
272 - by attribute (results.<resultsName>)
273 """
274 __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
275 def __new__(cls, toklist, name=None, asList=True, modal=True ):
276 if isinstance(toklist, cls):
277 return toklist
278 retobj = object.__new__(cls)
279 retobj.__doinit = True
280 return retobj
281
282 # Performance tuning: we construct a *lot* of these, so keep this
283 # constructor as small and fast as possible
284 def __init__( self, toklist, name=None, asList=True, modal=True ):
285 if self.__doinit:
286 self.__doinit = False
287 self.__name = None
288 self.__parent = None
289 self.__accumNames = {}
290 if isinstance(toklist, list):
291 self.__toklist = toklist[:]
292 else:
293 self.__toklist = [toklist]
294 self.__tokdict = dict()
295
296 if name:
297 if not modal:
298 self.__accumNames[name] = 0
299 if isinstance(name,int):
300 name = _ustr(name) # will always return a str, but use _ustr for consistency
301 self.__name = name
302 if not toklist in (None,'',[]):
303 if isinstance(toklist,basestring):
304 toklist = [ toklist ]
305 if asList:
306 if isinstance(toklist,ParseResults):
307 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
308 else:
309 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
310 self[name].__name = name
311 else:
312 try:
313 self[name] = toklist[0]
314 except (KeyError,TypeError,IndexError):
315 self[name] = toklist
316
317 def __getitem__( self, i ):
318 if isinstance( i, (int,slice) ):
319 return self.__toklist[i]
320 else:
321 if i not in self.__accumNames:
322 return self.__tokdict[i][-1][0]
323 else:
324 return ParseResults([ v[0] for v in self.__tokdict[i] ])
325
326 def __setitem__( self, k, v ):
327 if isinstance(v,_ParseResultsWithOffset):
328 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
329 sub = v[0]
330 elif isinstance(k,int):
331 self.__toklist[k] = v
332 sub = v
333 else:
334 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
335 sub = v
336 if isinstance(sub,ParseResults):
337 sub.__parent = wkref(self)
338
339 def __delitem__( self, i ):
340 if isinstance(i,(int,slice)):
341 mylen = len( self.__toklist )
342 del self.__toklist[i]
343
344 # convert int to slice
345 if isinstance(i, int):
346 if i < 0:
347 i += mylen
348 i = slice(i, i+1)
349 # get removed indices
350 removed = list(range(*i.indices(mylen)))
351 removed.reverse()
352 # fixup indices in token dictionary
353 for name in self.__tokdict:
354 occurrences = self.__tokdict[name]
355 for j in removed:
356 for k, (value, position) in enumerate(occurrences):
357 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
358 else:
359 del self.__tokdict[i]
360
361 def __contains__( self, k ):
362 return k in self.__tokdict
363
364 def __len__( self ): return len( self.__toklist )
365 def __bool__(self): return len( self.__toklist ) > 0
366 __nonzero__ = __bool__
367 def __iter__( self ): return iter( self.__toklist )
368 def __reversed__( self ): return iter( reversed(self.__toklist) )
369 def keys( self ):
370 """Returns all named result keys."""
371 return self.__tokdict.keys()
372
373 def pop( self, index=-1 ):
374 """Removes and returns item at specified index (default=last).
375 Will work with either numeric indices or dict-key indicies."""
376 ret = self[index]
377 del self[index]
378 return ret
379
380 def get(self, key, defaultValue=None):
381 """Returns named result matching the given key, or if there is no
382 such name, then returns the given defaultValue or None if no
383 defaultValue is specified."""
384 if key in self:
385 return self[key]
386 else:
387 return defaultValue
388
389 def insert( self, index, insStr ):
390 self.__toklist.insert(index, insStr)
391 # fixup indices in token dictionary
392 for name in self.__tokdict:
393 occurrences = self.__tokdict[name]
394 for k, (value, position) in enumerate(occurrences):
395 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
396
397 def items( self ):
398 """Returns all named result keys and values as a list of tuples."""
399 return [(k,self[k]) for k in self.__tokdict]
400
401 def values( self ):
402 """Returns all named result values."""
403 return [ v[-1][0] for v in self.__tokdict.values() ]
404
405 def __getattr__( self, name ):
406 if name not in self.__slots__:
407 if name in self.__tokdict:
408 if name not in self.__accumNames:
409 return self.__tokdict[name][-1][0]
410 else:
411 return ParseResults([ v[0] for v in self.__tokdict[name] ])
412 else:
413 return ""
414 return None
415
416 def __add__( self, other ):
417 ret = self.copy()
418 ret += other
419 return ret
420
421 def __iadd__( self, other ):
422 if other.__tokdict:
423 offset = len(self.__toklist)
424 addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
425 otheritems = other.__tokdict.items()
426 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
427 for (k,vlist) in otheritems for v in vlist]
428 for k,v in otherdictitems:
429 self[k] = v
430 if isinstance(v[0],ParseResults):
431 v[0].__parent = wkref(self)
432
433 self.__toklist += other.__toklist
434 self.__accumNames.update( other.__accumNames )
435 del other
436 return self
437
438 def __repr__( self ):
439 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
440
441 def __str__( self ):
442 out = "["
443 sep = ""
444 for i in self.__toklist:
445 if isinstance(i, ParseResults):
446 out += sep + _ustr(i)
447 else:
448 out += sep + repr(i)
449 sep = ", "
450 out += "]"
451 return out
452
453 def _asStringList( self, sep='' ):
454 out = []
455 for item in self.__toklist:
456 if out and sep:
457 out.append(sep)
458 if isinstance( item, ParseResults ):
459 out += item._asStringList()
460 else:
461 out.append( _ustr(item) )
462 return out
463
464 def asList( self ):
465 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
466 out = []
467 for res in self.__toklist:
468 if isinstance(res,ParseResults):
469 out.append( res.asList() )
470 else:
471 out.append( res )
472 return out
473
474 def asDict( self ):
475 """Returns the named parse results as dictionary."""
476 return dict( self.items() )
477
478 def copy( self ):
479 """Returns a new copy of a ParseResults object."""
480 ret = ParseResults( self.__toklist )
481 ret.__tokdict = self.__tokdict.copy()
482 ret.__parent = self.__parent
483 ret.__accumNames.update( self.__accumNames )
484 ret.__name = self.__name
485 return ret
486
487 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
488 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
489 nl = "\n"
490 out = []
491 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
492 for v in vlist ] )
493 nextLevelIndent = indent + " "
494
495 # collapse out indents if formatting is not desired
496 if not formatted:
497 indent = ""
498 nextLevelIndent = ""
499 nl = ""
500
501 selfTag = None
502 if doctag is not None:
503 selfTag = doctag
504 else:
505 if self.__name:
506 selfTag = self.__name
507
508 if not selfTag:
509 if namedItemsOnly:
510 return ""
511 else:
512 selfTag = "ITEM"
513
514 out += [ nl, indent, "<", selfTag, ">" ]
515
516 worklist = self.__toklist
517 for i,res in enumerate(worklist):
518 if isinstance(res,ParseResults):
519 if i in namedItems:
520 out += [ res.asXML(namedItems[i],
521 namedItemsOnly and doctag is None,
522 nextLevelIndent,
523 formatted)]
524 else:
525 out += [ res.asXML(None,
526 namedItemsOnly and doctag is None,
527 nextLevelIndent,
528 formatted)]
529 else:
530 # individual token, see if there is a name for it
531 resTag = None
532 if i in namedItems:
533 resTag = namedItems[i]
534 if not resTag:
535 if namedItemsOnly:
536 continue
537 else:
538 resTag = "ITEM"
539 xmlBodyText = _xml_escape(_ustr(res))
540 out += [ nl, nextLevelIndent, "<", resTag, ">",
541 xmlBodyText,
542 "</", resTag, ">" ]
543
544 out += [ nl, indent, "</", selfTag, ">" ]
545 return "".join(out)
546
547 def __lookup(self,sub):
548 for k,vlist in self.__tokdict.items():
549 for v,loc in vlist:
550 if sub is v:
551 return k
552 return None
553
554 def getName(self):
555 """Returns the results name for this token expression."""
556 if self.__name:
557 return self.__name
558 elif self.__parent:
559 par = self.__parent()
560 if par:
561 return par.__lookup(self)
562 else:
563 return None
564 elif (len(self) == 1 and
565 len(self.__tokdict) == 1 and
566 self.__tokdict.values()[0][0][1] in (0,-1)):
567 return self.__tokdict.keys()[0]
568 else:
569 return None
570
571 def dump(self,indent='',depth=0):
572 """Diagnostic method for listing out the contents of a ParseResults.
573 Accepts an optional indent argument so that this string can be embedded
574 in a nested display of other data."""
575 out = []
576 out.append( indent+_ustr(self.asList()) )
577 keys = self.items()
578 keys.sort()
579 for k,v in keys:
580 if out:
581 out.append('\n')
582 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
583 if isinstance(v,ParseResults):
584 if v.keys():
585 #~ out.append('\n')
586 out.append( v.dump(indent,depth+1) )
587 #~ out.append('\n')
588 else:
589 out.append(_ustr(v))
590 else:
591 out.append(_ustr(v))
592 #~ out.append('\n')
593 return "".join(out)
594
595 # add support for pickle protocol
596 def __getstate__(self):
597 return ( self.__toklist,
598 ( self.__tokdict.copy(),
599 self.__parent is not None and self.__parent() or None,
600 self.__accumNames,
601 self.__name ) )
602
603 def __setstate__(self,state):
604 self.__toklist = state[0]
605 self.__tokdict, \
606 par, \
607 inAccumNames, \
608 self.__name = state[1]
609 self.__accumNames = {}
610 self.__accumNames.update(inAccumNames)
611 if par is not None:
612 self.__parent = wkref(par)
613 else:
614 self.__parent = None
615
616 def __dir__(self):
617 return dir(super(ParseResults,self)) + self.keys()
618
619def col (loc,strg):
620 """Returns current column within a string, counting newlines as line separators.
621 The first column is number 1.
622
623 Note: the default parsing behavior is to expand tabs in the input string
624 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
625 on parsing strings containing <TAB>s, and suggested methods to maintain a
626 consistent view of the parsed string, the parse location, and line and column
627 positions within the parsed string.
628 """
629 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
630
631def lineno(loc,strg):
632 """Returns current line number within a string, counting newlines as line separators.
633 The first line is number 1.
634
635 Note: the default parsing behavior is to expand tabs in the input string
636 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
637 on parsing strings containing <TAB>s, and suggested methods to maintain a
638 consistent view of the parsed string, the parse location, and line and column
639 positions within the parsed string.
640 """
641 return strg.count("\n",0,loc) + 1
642
643def line( loc, strg ):
644 """Returns the line of text containing loc within a string, counting newlines as line separators.
645 """
646 lastCR = strg.rfind("\n", 0, loc)
647 nextCR = strg.find("\n", loc)
648 if nextCR > 0:
649 return strg[lastCR+1:nextCR]
650 else:
651 return strg[lastCR+1:]
652
653def _defaultStartDebugAction( instring, loc, expr ):
654 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
655
656def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
657 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
658
659def _defaultExceptionDebugAction( instring, loc, expr, exc ):
660 print ("Exception raised:" + _ustr(exc))
661
662def nullDebugAction(*args):
663 """'Do-nothing' debug action, to suppress debugging output during parsing."""
664 pass
665
666class ParserElement(object):
667 """Abstract base level parser element class."""
668 DEFAULT_WHITE_CHARS = " \n\t\r"
669
670 def setDefaultWhitespaceChars( chars ):
671 """Overrides the default whitespace chars
672 """
673 ParserElement.DEFAULT_WHITE_CHARS = chars
674 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
675
676 def __init__( self, savelist=False ):
677 self.parseAction = list()
678 self.failAction = None
679 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
680 self.strRepr = None
681 self.resultsName = None
682 self.saveAsList = savelist
683 self.skipWhitespace = True
684 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
685 self.copyDefaultWhiteChars = True
686 self.mayReturnEmpty = False # used when checking for left-recursion
687 self.keepTabs = False
688 self.ignoreExprs = list()
689 self.debug = False
690 self.streamlined = False
691 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
692 self.errmsg = ""
693 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
694 self.debugActions = ( None, None, None ) #custom debug actions
695 self.re = None
696 self.callPreparse = True # used to avoid redundant calls to preParse
697 self.callDuringTry = False
698
699 def copy( self ):
700 """Make a copy of this ParserElement. Useful for defining different parse actions
701 for the same parsing pattern, using copies of the original parse element."""
702 cpy = copy.copy( self )
703 cpy.parseAction = self.parseAction[:]
704 cpy.ignoreExprs = self.ignoreExprs[:]
705 if self.copyDefaultWhiteChars:
706 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
707 return cpy
708
709 def setName( self, name ):
710 """Define name for this expression, for use in debugging."""
711 self.name = name
712 self.errmsg = "Expected " + self.name
713 if hasattr(self,"exception"):
714 self.exception.msg = self.errmsg
715 return self
716
717 def setResultsName( self, name, listAllMatches=False ):
718 """Define name for referencing matching tokens as a nested attribute
719 of the returned parse results.
720 NOTE: this returns a *copy* of the original ParserElement object;
721 this is so that the client can define a basic element, such as an
722 integer, and reference it in multiple places with different names.
723 """
724 newself = self.copy()
725 newself.resultsName = name
726 newself.modalResults = not listAllMatches
727 return newself
728
729 def setBreak(self,breakFlag = True):
730 """Method to invoke the Python pdb debugger when this element is
731 about to be parsed. Set breakFlag to True to enable, False to
732 disable.
733 """
734 if breakFlag:
735 _parseMethod = self._parse
736 def breaker(instring, loc, doActions=True, callPreParse=True):
737 import pdb
738 pdb.set_trace()
739 return _parseMethod( instring, loc, doActions, callPreParse )
740 breaker._originalParseMethod = _parseMethod
741 self._parse = breaker
742 else:
743 if hasattr(self._parse,"_originalParseMethod"):
744 self._parse = self._parse._originalParseMethod
745 return self
746
747 def _normalizeParseActionArgs( f ):
748 """Internal method used to decorate parse actions that take fewer than 3 arguments,
749 so that all parse actions can be called as f(s,l,t)."""
750 STAR_ARGS = 4
751
752 try:
753 restore = None
754 if isinstance(f,type):
755 restore = f
756 f = f.__init__
757 if not _PY3K:
758 codeObj = f.func_code
759 else:
760 codeObj = f.code
761 if codeObj.co_flags & STAR_ARGS:
762 return f
763 numargs = codeObj.co_argcount
764 if not _PY3K:
765 if hasattr(f,"im_self"):
766 numargs -= 1
767 else:
768 if hasattr(f,"__self__"):
769 numargs -= 1
770 if restore:
771 f = restore
772 except AttributeError:
773 try:
774 if not _PY3K:
775 call_im_func_code = f.__call__.im_func.func_code
776 else:
777 call_im_func_code = f.__code__
778
779 # not a function, must be a callable object, get info from the
780 # im_func binding of its bound __call__ method
781 if call_im_func_code.co_flags & STAR_ARGS:
782 return f
783 numargs = call_im_func_code.co_argcount
784 if not _PY3K:
785 if hasattr(f.__call__,"im_self"):
786 numargs -= 1
787 else:
788 if hasattr(f.__call__,"__self__"):
789 numargs -= 0
790 except AttributeError:
791 if not _PY3K:
792 call_func_code = f.__call__.func_code
793 else:
794 call_func_code = f.__call__.__code__
795 # not a bound method, get info directly from __call__ method
796 if call_func_code.co_flags & STAR_ARGS:
797 return f
798 numargs = call_func_code.co_argcount
799 if not _PY3K:
800 if hasattr(f.__call__,"im_self"):
801 numargs -= 1
802 else:
803 if hasattr(f.__call__,"__self__"):
804 numargs -= 1
805
806
807 #~ print ("adding function %s with %d args" % (f.func_name,numargs))
808 if numargs == 3:
809 return f
810 else:
811 if numargs > 3:
812 def tmp(s,l,t):
813 return f(f.__call__.__self__, s,l,t)
814 if numargs == 2:
815 def tmp(s,l,t):
816 return f(l,t)
817 elif numargs == 1:
818 def tmp(s,l,t):
819 return f(t)
820 else: #~ numargs == 0:
821 def tmp(s,l,t):
822 return f()
823 try:
824 tmp.__name__ = f.__name__
825 except (AttributeError,TypeError):
826 # no need for special handling if attribute doesnt exist
827 pass
828 try:
829 tmp.__doc__ = f.__doc__
830 except (AttributeError,TypeError):
831 # no need for special handling if attribute doesnt exist
832 pass
833 try:
834 tmp.__dict__.update(f.__dict__)
835 except (AttributeError,TypeError):
836 # no need for special handling if attribute doesnt exist
837 pass
838 return tmp
839 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
840
841 def setParseAction( self, *fns, **kwargs ):
842 """Define action to perform when successfully matching parse element definition.
843 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
844 fn(loc,toks), fn(toks), or just fn(), where:
845 - s = the original string being parsed (see note below)
846 - loc = the location of the matching substring
847 - toks = a list of the matched tokens, packaged as a ParseResults object
848 If the functions in fns modify the tokens, they can return them as the return
849 value from fn, and the modified list of tokens will replace the original.
850 Otherwise, fn does not need to return any value.
851
852 Note: the default parsing behavior is to expand tabs in the input string
853 before starting the parsing process. See L{I{parseString}<parseString>} for more information
854 on parsing strings containing <TAB>s, and suggested methods to maintain a
855 consistent view of the parsed string, the parse location, and line and column
856 positions within the parsed string.
857 """
858 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
859 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
860 return self
861
862 def addParseAction( self, *fns, **kwargs ):
863 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
864 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
865 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
866 return self
867
868 def setFailAction( self, fn ):
869 """Define action to perform if parsing fails at this expression.
870 Fail acton fn is a callable function that takes the arguments
871 fn(s,loc,expr,err) where:
872 - s = string being parsed
873 - loc = location where expression match was attempted and failed
874 - expr = the parse expression that failed
875 - err = the exception thrown
876 The function returns no value. It may throw ParseFatalException
877 if it is desired to stop parsing immediately."""
878 self.failAction = fn
879 return self
880
881 def _skipIgnorables( self, instring, loc ):
882 exprsFound = True
883 while exprsFound:
884 exprsFound = False
885 for e in self.ignoreExprs:
886 try:
887 while 1:
888 loc,dummy = e._parse( instring, loc )
889 exprsFound = True
890 except ParseException:
891 pass
892 return loc
893
894 def preParse( self, instring, loc ):
895 if self.ignoreExprs:
896 loc = self._skipIgnorables( instring, loc )
897
898 if self.skipWhitespace:
899 wt = self.whiteChars
900 instrlen = len(instring)
901 while loc < instrlen and instring[loc] in wt:
902 loc += 1
903
904 return loc
905
906 def parseImpl( self, instring, loc, doActions=True ):
907 return loc, []
908
909 def postParse( self, instring, loc, tokenlist ):
910 return tokenlist
911
912 #~ @profile
913 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
914 debugging = ( self.debug ) #and doActions )
915
916 if debugging or self.failAction:
917 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
918 if (self.debugActions[0] ):
919 self.debugActions[0]( instring, loc, self )
920 if callPreParse and self.callPreparse:
921 preloc = self.preParse( instring, loc )
922 else:
923 preloc = loc
924 tokensStart = loc
925 try:
926 try:
927 loc,tokens = self.parseImpl( instring, preloc, doActions )
928 except IndexError:
929 raise ParseException( instring, len(instring), self.errmsg, self )
930 except ParseBaseException, err:
931 #~ print ("Exception raised:", err)
932 if self.debugActions[2]:
933 self.debugActions[2]( instring, tokensStart, self, err )
934 if self.failAction:
935 self.failAction( instring, tokensStart, self, err )
936 raise
937 else:
938 if callPreParse and self.callPreparse:
939 preloc = self.preParse( instring, loc )
940 else:
941 preloc = loc
942 tokensStart = loc
943 if self.mayIndexError or loc >= len(instring):
944 try:
945 loc,tokens = self.parseImpl( instring, preloc, doActions )
946 except IndexError:
947 raise ParseException( instring, len(instring), self.errmsg, self )
948 else:
949 loc,tokens = self.parseImpl( instring, preloc, doActions )
950
951 tokens = self.postParse( instring, loc, tokens )
952
953 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
954 if self.parseAction and (doActions or self.callDuringTry):
955 if debugging:
956 try:
957 for fn in self.parseAction:
958 tokens = fn( instring, tokensStart, retTokens )
959 if tokens is not None:
960 retTokens = ParseResults( tokens,
961 self.resultsName,
962 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
963 modal=self.modalResults )
964 except ParseBaseException, err:
965 #~ print "Exception raised in user parse action:", err
966 if (self.debugActions[2] ):
967 self.debugActions[2]( instring, tokensStart, self, err )
968 raise
969 else:
970 for fn in self.parseAction:
971 tokens = fn( instring, tokensStart, retTokens )
972 if tokens is not None:
973 retTokens = ParseResults( tokens,
974 self.resultsName,
975 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
976 modal=self.modalResults )
977
978 if debugging:
979 #~ print ("Matched",self,"->",retTokens.asList())
980 if (self.debugActions[1] ):
981 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
982
983 return loc, retTokens
984
985 def tryParse( self, instring, loc ):
986 try:
987 return self._parse( instring, loc, doActions=False )[0]
988 except ParseFatalException:
989 raise ParseException( instring, loc, self.errmsg, self)
990
991 # this method gets repeatedly called during backtracking with the same arguments -
992 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
993 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
994 lookup = (self,instring,loc,callPreParse,doActions)
995 if lookup in ParserElement._exprArgCache:
996 value = ParserElement._exprArgCache[ lookup ]
997 if isinstance(value,Exception):
998 raise value
999 return value
1000 else:
1001 try:
1002 value = self._parseNoCache( instring, loc, doActions, callPreParse )
1003 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
1004 return value
1005 except ParseBaseException, pe:
1006 ParserElement._exprArgCache[ lookup ] = pe
1007 raise
1008
1009 _parse = _parseNoCache
1010
1011 # argument cache for optimizing repeated calls when backtracking through recursive expressions
1012 _exprArgCache = {}
1013 def resetCache():
1014 ParserElement._exprArgCache.clear()
1015 resetCache = staticmethod(resetCache)
1016
1017 _packratEnabled = False
1018 def enablePackrat():
1019 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1020 Repeated parse attempts at the same string location (which happens
1021 often in many complex grammars) can immediately return a cached value,
1022 instead of re-executing parsing/validating code. Memoizing is done of
1023 both valid results and parsing exceptions.
1024
1025 This speedup may break existing programs that use parse actions that
1026 have side-effects. For this reason, packrat parsing is disabled when
1027 you first import pyparsing. To activate the packrat feature, your
1028 program must call the class method ParserElement.enablePackrat(). If
1029 your program uses psyco to "compile as you go", you must call
1030 enablePackrat before calling psyco.full(). If you do not do this,
1031 Python will crash. For best results, call enablePackrat() immediately
1032 after importing pyparsing.
1033 """
1034 if not ParserElement._packratEnabled:
1035 ParserElement._packratEnabled = True
1036 ParserElement._parse = ParserElement._parseCache
1037 enablePackrat = staticmethod(enablePackrat)
1038
1039 def parseString( self, instring, parseAll=False ):
1040 """Execute the parse expression with the given string.
1041 This is the main interface to the client code, once the complete
1042 expression has been built.
1043
1044 If you want the grammar to require that the entire input string be
1045 successfully parsed, then set parseAll to True (equivalent to ending
1046 the grammar with StringEnd()).
1047
1048 Note: parseString implicitly calls expandtabs() on the input string,
1049 in order to report proper column numbers in parse actions.
1050 If the input string contains tabs and
1051 the grammar uses parse actions that use the loc argument to index into the
1052 string being parsed, you can ensure you have a consistent view of the input
1053 string by:
1054 - calling parseWithTabs on your grammar before calling parseString
1055 (see L{I{parseWithTabs}<parseWithTabs>})
1056 - define your parse action using the full (s,loc,toks) signature, and
1057 reference the input string using the parse action's s argument
1058 - explictly expand the tabs in your input string before calling
1059 parseString
1060 """
1061 ParserElement.resetCache()
1062 if not self.streamlined:
1063 self.streamline()
1064 #~ self.saveAsList = True
1065 for e in self.ignoreExprs:
1066 e.streamline()
1067 if not self.keepTabs:
1068 instring = instring.expandtabs()
1069 try:
1070 loc, tokens = self._parse( instring, 0 )
1071 if parseAll:
1072 loc = self.preParse( instring, loc )
1073 StringEnd()._parse( instring, loc )
1074 except ParseBaseException, exc:
1075 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1076 raise exc
1077 else:
1078 return tokens
1079
1080 def scanString( self, instring, maxMatches=_MAX_INT ):
1081 """Scan the input string for expression matches. Each match will return the
1082 matching tokens, start location, and end location. May be called with optional
1083 maxMatches argument, to clip scanning after 'n' matches are found.
1084
1085 Note that the start and end locations are reported relative to the string
1086 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1087 strings with embedded tabs."""
1088 if not self.streamlined:
1089 self.streamline()
1090 for e in self.ignoreExprs:
1091 e.streamline()
1092
1093 if not self.keepTabs:
1094 instring = _ustr(instring).expandtabs()
1095 instrlen = len(instring)
1096 loc = 0
1097 preparseFn = self.preParse
1098 parseFn = self._parse
1099 ParserElement.resetCache()
1100 matches = 0
1101 try:
1102 while loc <= instrlen and matches < maxMatches:
1103 try:
1104 preloc = preparseFn( instring, loc )
1105 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1106 except ParseException:
1107 loc = preloc+1
1108 else:
1109 matches += 1
1110 yield tokens, preloc, nextLoc
1111 loc = nextLoc
1112 except ParseBaseException, pe:
1113 raise pe
1114
1115 def transformString( self, instring ):
1116 """Extension to scanString, to modify matching text with modified tokens that may
1117 be returned from a parse action. To use transformString, define a grammar and
1118 attach a parse action to it that modifies the returned token list.
1119 Invoking transformString() on a target string will then scan for matches,
1120 and replace the matched text patterns according to the logic in the parse
1121 action. transformString() returns the resulting transformed string."""
1122 out = []
1123 lastE = 0
1124 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1125 # keep string locs straight between transformString and scanString
1126 self.keepTabs = True
1127 try:
1128 for t,s,e in self.scanString( instring ):
1129 out.append( instring[lastE:s] )
1130 if t:
1131 if isinstance(t,ParseResults):
1132 out += t.asList()
1133 elif isinstance(t,list):
1134 out += t
1135 else:
1136 out.append(t)
1137 lastE = e
1138 out.append(instring[lastE:])
1139 return "".join(map(_ustr,out))
1140 except ParseBaseException, pe:
1141 raise pe
1142
1143 def searchString( self, instring, maxMatches=_MAX_INT ):
1144 """Another extension to scanString, simplifying the access to the tokens found
1145 to match the given parse expression. May be called with optional
1146 maxMatches argument, to clip searching after 'n' matches are found.
1147 """
1148 try:
1149 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1150 except ParseBaseException, pe:
1151 raise pe
1152
1153 def __add__(self, other ):
1154 """Implementation of + operator - returns And"""
1155 if isinstance( other, basestring ):
1156 other = Literal( other )
1157 if not isinstance( other, ParserElement ):
1158 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1159 SyntaxWarning, stacklevel=2)
1160 return None
1161 return And( [ self, other ] )
1162
1163 def __radd__(self, other ):
1164 """Implementation of + operator when left operand is not a ParserElement"""
1165 if isinstance( other, basestring ):
1166 other = Literal( other )
1167 if not isinstance( other, ParserElement ):
1168 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1169 SyntaxWarning, stacklevel=2)
1170 return None
1171 return other + self
1172
1173 def __sub__(self, other):
1174 """Implementation of - operator, returns And with error stop"""
1175 if isinstance( other, basestring ):
1176 other = Literal( other )
1177 if not isinstance( other, ParserElement ):
1178 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1179 SyntaxWarning, stacklevel=2)
1180 return None
1181 return And( [ self, And._ErrorStop(), other ] )
1182
1183 def __rsub__(self, other ):
1184 """Implementation of - operator when left operand is not a ParserElement"""
1185 if isinstance( other, basestring ):
1186 other = Literal( other )
1187 if not isinstance( other, ParserElement ):
1188 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1189 SyntaxWarning, stacklevel=2)
1190 return None
1191 return other - self
1192
1193 def __mul__(self,other):
1194 if isinstance(other,int):
1195 minElements, optElements = other,0
1196 elif isinstance(other,tuple):
1197 other = (other + (None, None))[:2]
1198 if other[0] is None:
1199 other = (0, other[1])
1200 if isinstance(other[0],int) and other[1] is None:
1201 if other[0] == 0:
1202 return ZeroOrMore(self)
1203 if other[0] == 1:
1204 return OneOrMore(self)
1205 else:
1206 return self*other[0] + ZeroOrMore(self)
1207 elif isinstance(other[0],int) and isinstance(other[1],int):
1208 minElements, optElements = other
1209 optElements -= minElements
1210 else:
1211 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1212 else:
1213 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1214
1215 if minElements < 0:
1216 raise ValueError("cannot multiply ParserElement by negative value")
1217 if optElements < 0:
1218 raise ValueError("second tuple value must be greater or equal to first tuple value")
1219 if minElements == optElements == 0:
1220 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1221
1222 if (optElements):
1223 def makeOptionalList(n):
1224 if n>1:
1225 return Optional(self + makeOptionalList(n-1))
1226 else:
1227 return Optional(self)
1228 if minElements:
1229 if minElements == 1:
1230 ret = self + makeOptionalList(optElements)
1231 else:
1232 ret = And([self]*minElements) + makeOptionalList(optElements)
1233 else:
1234 ret = makeOptionalList(optElements)
1235 else:
1236 if minElements == 1:
1237 ret = self
1238 else:
1239 ret = And([self]*minElements)
1240 return ret
1241
1242 def __rmul__(self, other):
1243 return self.__mul__(other)
1244
1245 def __or__(self, other ):
1246 """Implementation of | operator - returns MatchFirst"""
1247 if isinstance( other, basestring ):
1248 other = Literal( other )
1249 if not isinstance( other, ParserElement ):
1250 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1251 SyntaxWarning, stacklevel=2)
1252 return None
1253 return MatchFirst( [ self, other ] )
1254
1255 def __ror__(self, other ):
1256 """Implementation of | operator when left operand is not a ParserElement"""
1257 if isinstance( other, basestring ):
1258 other = Literal( other )
1259 if not isinstance( other, ParserElement ):
1260 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1261 SyntaxWarning, stacklevel=2)
1262 return None
1263 return other | self
1264
1265 def __xor__(self, other ):
1266 """Implementation of ^ operator - returns Or"""
1267 if isinstance( other, basestring ):
1268 other = Literal( other )
1269 if not isinstance( other, ParserElement ):
1270 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1271 SyntaxWarning, stacklevel=2)
1272 return None
1273 return Or( [ self, other ] )
1274
1275 def __rxor__(self, other ):
1276 """Implementation of ^ operator when left operand is not a ParserElement"""
1277 if isinstance( other, basestring ):
1278 other = Literal( other )
1279 if not isinstance( other, ParserElement ):
1280 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1281 SyntaxWarning, stacklevel=2)
1282 return None
1283 return other ^ self
1284
1285 def __and__(self, other ):
1286 """Implementation of & operator - returns Each"""
1287 if isinstance( other, basestring ):
1288 other = Literal( other )
1289 if not isinstance( other, ParserElement ):
1290 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1291 SyntaxWarning, stacklevel=2)
1292 return None
1293 return Each( [ self, other ] )
1294
1295 def __rand__(self, other ):
1296 """Implementation of & operator when left operand is not a ParserElement"""
1297 if isinstance( other, basestring ):
1298 other = Literal( other )
1299 if not isinstance( other, ParserElement ):
1300 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1301 SyntaxWarning, stacklevel=2)
1302 return None
1303 return other & self
1304
1305 def __invert__( self ):
1306 """Implementation of ~ operator - returns NotAny"""
1307 return NotAny( self )
1308
1309 def __call__(self, name):
1310 """Shortcut for setResultsName, with listAllMatches=default::
1311 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1312 could be written as::
1313 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1314 """
1315 return self.setResultsName(name)
1316
1317 def suppress( self ):
1318 """Suppresses the output of this ParserElement; useful to keep punctuation from
1319 cluttering up returned output.
1320 """
1321 return Suppress( self )
1322
1323 def leaveWhitespace( self ):
1324 """Disables the skipping of whitespace before matching the characters in the
1325 ParserElement's defined pattern. This is normally only used internally by
1326 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1327 """
1328 self.skipWhitespace = False
1329 return self
1330
1331 def setWhitespaceChars( self, chars ):
1332 """Overrides the default whitespace chars
1333 """
1334 self.skipWhitespace = True
1335 self.whiteChars = chars
1336 self.copyDefaultWhiteChars = False
1337 return self
1338
1339 def parseWithTabs( self ):
1340 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
1341 Must be called before parseString when the input grammar contains elements that
1342 match <TAB> characters."""
1343 self.keepTabs = True
1344 return self
1345
1346 def ignore( self, other ):
1347 """Define expression to be ignored (e.g., comments) while doing pattern
1348 matching; may be called repeatedly, to define multiple comment or other
1349 ignorable patterns.
1350 """
1351 if isinstance( other, Suppress ):
1352 if other not in self.ignoreExprs:
1353 self.ignoreExprs.append( other )
1354 else:
1355 self.ignoreExprs.append( Suppress( other ) )
1356 return self
1357
1358 def setDebugActions( self, startAction, successAction, exceptionAction ):
1359 """Enable display of debugging messages while doing pattern matching."""
1360 self.debugActions = (startAction or _defaultStartDebugAction,
1361 successAction or _defaultSuccessDebugAction,
1362 exceptionAction or _defaultExceptionDebugAction)
1363 self.debug = True
1364 return self
1365
1366 def setDebug( self, flag=True ):
1367 """Enable display of debugging messages while doing pattern matching.
1368 Set flag to True to enable, False to disable."""
1369 if flag:
1370 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1371 else:
1372 self.debug = False
1373 return self
1374
1375 def __str__( self ):
1376 return self.name
1377
1378 def __repr__( self ):
1379 return _ustr(self)
1380
1381 def streamline( self ):
1382 self.streamlined = True
1383 self.strRepr = None
1384 return self
1385
1386 def checkRecursion( self, parseElementList ):
1387 pass
1388
1389 def validate( self, validateTrace=[] ):
1390 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1391 self.checkRecursion( [] )
1392
1393 def parseFile( self, file_or_filename, parseAll=False ):
1394 """Execute the parse expression on the given file or filename.
1395 If a filename is specified (instead of a file object),
1396 the entire file is opened, read, and closed before parsing.
1397 """
1398 try:
1399 file_contents = file_or_filename.read()
1400 except AttributeError:
1401 f = open(file_or_filename, "rb")
1402 file_contents = f.read()
1403 f.close()
1404 try:
1405 return self.parseString(file_contents, parseAll)
1406 except ParseBaseException, exc:
1407 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1408 raise exc
1409
1410 def getException(self):
1411 return ParseException("",0,self.errmsg,self)
1412
1413 def __getattr__(self,aname):
1414 if aname == "myException":
1415 self.myException = ret = self.getException();
1416 return ret;
1417 else:
1418 raise AttributeError("no such attribute " + aname)
1419
1420 def __eq__(self,other):
1421 if isinstance(other, ParserElement):
1422 return self is other or self.__dict__ == other.__dict__
1423 elif isinstance(other, basestring):
1424 try:
1425 self.parseString(_ustr(other), parseAll=True)
1426 return True
1427 except ParseBaseException:
1428 return False
1429 else:
1430 return super(ParserElement,self)==other
1431
1432 def __ne__(self,other):
1433 return not (self == other)
1434
1435 def __hash__(self):
1436 return hash(id(self))
1437
1438 def __req__(self,other):
1439 return self == other
1440
1441 def __rne__(self,other):
1442 return not (self == other)
1443
1444
1445class Token(ParserElement):
1446 """Abstract ParserElement subclass, for defining atomic matching patterns."""
1447 def __init__( self ):
1448 super(Token,self).__init__( savelist=False )
1449 #self.myException = ParseException("",0,"",self)
1450
1451 def setName(self, name):
1452 s = super(Token,self).setName(name)
1453 self.errmsg = "Expected " + self.name
1454 #s.myException.msg = self.errmsg
1455 return s
1456
1457
1458class Empty(Token):
1459 """An empty token, will always match."""
1460 def __init__( self ):
1461 super(Empty,self).__init__()
1462 self.name = "Empty"
1463 self.mayReturnEmpty = True
1464 self.mayIndexError = False
1465
1466
1467class NoMatch(Token):
1468 """A token that will never match."""
1469 def __init__( self ):
1470 super(NoMatch,self).__init__()
1471 self.name = "NoMatch"
1472 self.mayReturnEmpty = True
1473 self.mayIndexError = False
1474 self.errmsg = "Unmatchable token"
1475 #self.myException.msg = self.errmsg
1476
1477 def parseImpl( self, instring, loc, doActions=True ):
1478 exc = self.myException
1479 exc.loc = loc
1480 exc.pstr = instring
1481 raise exc
1482
1483
1484class Literal(Token):
1485 """Token to exactly match a specified string."""
1486 def __init__( self, matchString ):
1487 super(Literal,self).__init__()
1488 self.match = matchString
1489 self.matchLen = len(matchString)
1490 try:
1491 self.firstMatchChar = matchString[0]
1492 except IndexError:
1493 warnings.warn("null string passed to Literal; use Empty() instead",
1494 SyntaxWarning, stacklevel=2)
1495 self.__class__ = Empty
1496 self.name = '"%s"' % _ustr(self.match)
1497 self.errmsg = "Expected " + self.name
1498 self.mayReturnEmpty = False
1499 #self.myException.msg = self.errmsg
1500 self.mayIndexError = False
1501
1502 # Performance tuning: this routine gets called a *lot*
1503 # if this is a single character match string and the first character matches,
1504 # short-circuit as quickly as possible, and avoid calling startswith
1505 #~ @profile
1506 def parseImpl( self, instring, loc, doActions=True ):
1507 if (instring[loc] == self.firstMatchChar and
1508 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1509 return loc+self.matchLen, self.match
1510 #~ raise ParseException( instring, loc, self.errmsg )
1511 exc = self.myException
1512 exc.loc = loc
1513 exc.pstr = instring
1514 raise exc
1515_L = Literal
1516
1517class Keyword(Token):
1518 """Token to exactly match a specified string as a keyword, that is, it must be
1519 immediately followed by a non-keyword character. Compare with Literal::
1520 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1521 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1522 Accepts two optional constructor arguments in addition to the keyword string:
1523 identChars is a string of characters that would be valid identifier characters,
1524 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
1525 matching, default is False.
1526 """
1527 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1528
1529 def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1530 super(Keyword,self).__init__()
1531 self.match = matchString
1532 self.matchLen = len(matchString)
1533 try:
1534 self.firstMatchChar = matchString[0]
1535 except IndexError:
1536 warnings.warn("null string passed to Keyword; use Empty() instead",
1537 SyntaxWarning, stacklevel=2)
1538 self.name = '"%s"' % self.match
1539 self.errmsg = "Expected " + self.name
1540 self.mayReturnEmpty = False
1541 #self.myException.msg = self.errmsg
1542 self.mayIndexError = False
1543 self.caseless = caseless
1544 if caseless:
1545 self.caselessmatch = matchString.upper()
1546 identChars = identChars.upper()
1547 self.identChars = _str2dict(identChars)
1548
1549 def parseImpl( self, instring, loc, doActions=True ):
1550 if self.caseless:
1551 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1552 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1553 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1554 return loc+self.matchLen, self.match
1555 else:
1556 if (instring[loc] == self.firstMatchChar and
1557 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1558 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1559 (loc == 0 or instring[loc-1] not in self.identChars) ):
1560 return loc+self.matchLen, self.match
1561 #~ raise ParseException( instring, loc, self.errmsg )
1562 exc = self.myException
1563 exc.loc = loc
1564 exc.pstr = instring
1565 raise exc
1566
1567 def copy(self):
1568 c = super(Keyword,self).copy()
1569 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
1570 return c
1571
1572 def setDefaultKeywordChars( chars ):
1573 """Overrides the default Keyword chars
1574 """
1575 Keyword.DEFAULT_KEYWORD_CHARS = chars
1576 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1577
1578class CaselessLiteral(Literal):
1579 """Token to match a specified string, ignoring case of letters.
1580 Note: the matched results will always be in the case of the given
1581 match string, NOT the case of the input text.
1582 """
1583 def __init__( self, matchString ):
1584 super(CaselessLiteral,self).__init__( matchString.upper() )
1585 # Preserve the defining literal.
1586 self.returnString = matchString
1587 self.name = "'%s'" % self.returnString
1588 self.errmsg = "Expected " + self.name
1589 #self.myException.msg = self.errmsg
1590
1591 def parseImpl( self, instring, loc, doActions=True ):
1592 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1593 return loc+self.matchLen, self.returnString
1594 #~ raise ParseException( instring, loc, self.errmsg )
1595 exc = self.myException
1596 exc.loc = loc
1597 exc.pstr = instring
1598 raise exc
1599
1600class CaselessKeyword(Keyword):
1601 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1602 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1603
1604 def parseImpl( self, instring, loc, doActions=True ):
1605 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1606 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1607 return loc+self.matchLen, self.match
1608 #~ raise ParseException( instring, loc, self.errmsg )
1609 exc = self.myException
1610 exc.loc = loc
1611 exc.pstr = instring
1612 raise exc
1613
1614class Word(Token):
1615 """Token for matching words composed of allowed character sets.
1616 Defined with string containing all allowed initial characters,
1617 an optional string containing allowed body characters (if omitted,
1618 defaults to the initial character set), and an optional minimum,
1619 maximum, and/or exact length. The default value for min is 1 (a
1620 minimum value < 1 is not valid); the default values for max and exact
1621 are 0, meaning no maximum or exact length restriction.
1622 """
1623 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1624 super(Word,self).__init__()
1625 self.initCharsOrig = initChars
1626 self.initChars = _str2dict(initChars)
1627 if bodyChars :
1628 self.bodyCharsOrig = bodyChars
1629 self.bodyChars = _str2dict(bodyChars)
1630 else:
1631 self.bodyCharsOrig = initChars
1632 self.bodyChars = _str2dict(initChars)
1633
1634 self.maxSpecified = max > 0
1635
1636 if min < 1:
1637 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1638
1639 self.minLen = min
1640
1641 if max > 0:
1642 self.maxLen = max
1643 else:
1644 self.maxLen = _MAX_INT
1645
1646 if exact > 0:
1647 self.maxLen = exact
1648 self.minLen = exact
1649
1650 self.name = _ustr(self)
1651 self.errmsg = "Expected " + self.name
1652 #self.myException.msg = self.errmsg
1653 self.mayIndexError = False
1654 self.asKeyword = asKeyword
1655
1656 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1657 if self.bodyCharsOrig == self.initCharsOrig:
1658 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1659 elif len(self.bodyCharsOrig) == 1:
1660 self.reString = "%s[%s]*" % \
1661 (re.escape(self.initCharsOrig),
1662 _escapeRegexRangeChars(self.bodyCharsOrig),)
1663 else:
1664 self.reString = "[%s][%s]*" % \
1665 (_escapeRegexRangeChars(self.initCharsOrig),
1666 _escapeRegexRangeChars(self.bodyCharsOrig),)
1667 if self.asKeyword:
1668 self.reString = r"\b"+self.reString+r"\b"
1669 try:
1670 self.re = re.compile( self.reString )
1671 except:
1672 self.re = None
1673
1674 def parseImpl( self, instring, loc, doActions=True ):
1675 if self.re:
1676 result = self.re.match(instring,loc)
1677 if not result:
1678 exc = self.myException
1679 exc.loc = loc
1680 exc.pstr = instring
1681 raise exc
1682
1683 loc = result.end()
1684 return loc,result.group()
1685
1686 if not(instring[ loc ] in self.initChars):
1687 #~ raise ParseException( instring, loc, self.errmsg )
1688 exc = self.myException
1689 exc.loc = loc
1690 exc.pstr = instring
1691 raise exc
1692 start = loc
1693 loc += 1
1694 instrlen = len(instring)
1695 bodychars = self.bodyChars
1696 maxloc = start + self.maxLen
1697 maxloc = min( maxloc, instrlen )
1698 while loc < maxloc and instring[loc] in bodychars:
1699 loc += 1
1700
1701 throwException = False
1702 if loc - start < self.minLen:
1703 throwException = True
1704 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1705 throwException = True
1706 if self.asKeyword:
1707 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1708 throwException = True
1709
1710 if throwException:
1711 #~ raise ParseException( instring, loc, self.errmsg )
1712 exc = self.myException
1713 exc.loc = loc
1714 exc.pstr = instring
1715 raise exc
1716
1717 return loc, instring[start:loc]
1718
1719 def __str__( self ):
1720 try:
1721 return super(Word,self).__str__()
1722 except:
1723 pass
1724
1725
1726 if self.strRepr is None:
1727
1728 def charsAsStr(s):
1729 if len(s)>4:
1730 return s[:4]+"..."
1731 else:
1732 return s
1733
1734 if ( self.initCharsOrig != self.bodyCharsOrig ):
1735 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1736 else:
1737 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1738
1739 return self.strRepr
1740
1741
1742class Regex(Token):
1743 """Token for matching strings that match a given regular expression.
1744 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1745 """
1746 def __init__( self, pattern, flags=0):
1747 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1748 super(Regex,self).__init__()
1749
1750 if len(pattern) == 0:
1751 warnings.warn("null string passed to Regex; use Empty() instead",
1752 SyntaxWarning, stacklevel=2)
1753
1754 self.pattern = pattern
1755 self.flags = flags
1756
1757 try:
1758 self.re = re.compile(self.pattern, self.flags)
1759 self.reString = self.pattern
1760 except sre_constants.error:
1761 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1762 SyntaxWarning, stacklevel=2)
1763 raise
1764
1765 self.name = _ustr(self)
1766 self.errmsg = "Expected " + self.name
1767 #self.myException.msg = self.errmsg
1768 self.mayIndexError = False
1769 self.mayReturnEmpty = True
1770
1771 def parseImpl( self, instring, loc, doActions=True ):
1772 result = self.re.match(instring,loc)
1773 if not result:
1774 exc = self.myException
1775 exc.loc = loc
1776 exc.pstr = instring
1777 raise exc
1778
1779 loc = result.end()
1780 d = result.groupdict()
1781 ret = ParseResults(result.group())
1782 if d:
1783 for k in d:
1784 ret[k] = d[k]
1785 return loc,ret
1786
1787 def __str__( self ):
1788 try:
1789 return super(Regex,self).__str__()
1790 except:
1791 pass
1792
1793 if self.strRepr is None:
1794 self.strRepr = "Re:(%s)" % repr(self.pattern)
1795
1796 return self.strRepr
1797
1798
1799class QuotedString(Token):
1800 """Token for matching strings that are delimited by quoting characters.
1801 """
1802 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1803 """
1804 Defined with the following parameters:
1805 - quoteChar - string of one or more characters defining the quote delimiting string
1806 - escChar - character to escape quotes, typically backslash (default=None)
1807 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1808 - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1809 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1810 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1811 """
1812 super(QuotedString,self).__init__()
1813
1814 # remove white space from quote chars - wont work anyway
1815 quoteChar = quoteChar.strip()
1816 if len(quoteChar) == 0:
1817 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1818 raise SyntaxError()
1819
1820 if endQuoteChar is None:
1821 endQuoteChar = quoteChar
1822 else:
1823 endQuoteChar = endQuoteChar.strip()
1824 if len(endQuoteChar) == 0:
1825 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1826 raise SyntaxError()
1827
1828 self.quoteChar = quoteChar
1829 self.quoteCharLen = len(quoteChar)
1830 self.firstQuoteChar = quoteChar[0]
1831 self.endQuoteChar = endQuoteChar
1832 self.endQuoteCharLen = len(endQuoteChar)
1833 self.escChar = escChar
1834 self.escQuote = escQuote
1835 self.unquoteResults = unquoteResults
1836
1837 if multiline:
1838 self.flags = re.MULTILINE | re.DOTALL
1839 self.pattern = r'%s(?:[^%s%s]' % \
1840 ( re.escape(self.quoteChar),
1841 _escapeRegexRangeChars(self.endQuoteChar[0]),
1842 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1843 else:
1844 self.flags = 0
1845 self.pattern = r'%s(?:[^%s\n\r%s]' % \
1846 ( re.escape(self.quoteChar),
1847 _escapeRegexRangeChars(self.endQuoteChar[0]),
1848 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1849 if len(self.endQuoteChar) > 1:
1850 self.pattern += (
1851 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1852 _escapeRegexRangeChars(self.endQuoteChar[i]))
1853 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
1854 )
1855 if escQuote:
1856 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1857 if escChar:
1858 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1859 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1860 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1861
1862 try:
1863 self.re = re.compile(self.pattern, self.flags)
1864 self.reString = self.pattern
1865 except sre_constants.error:
1866 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1867 SyntaxWarning, stacklevel=2)
1868 raise
1869
1870 self.name = _ustr(self)
1871 self.errmsg = "Expected " + self.name
1872 #self.myException.msg = self.errmsg
1873 self.mayIndexError = False
1874 self.mayReturnEmpty = True
1875
1876 def parseImpl( self, instring, loc, doActions=True ):
1877 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1878 if not result:
1879 exc = self.myException
1880 exc.loc = loc
1881 exc.pstr = instring
1882 raise exc
1883
1884 loc = result.end()
1885 ret = result.group()
1886
1887 if self.unquoteResults:
1888
1889 # strip off quotes
1890 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1891
1892 if isinstance(ret,basestring):
1893 # replace escaped characters
1894 if self.escChar:
1895 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1896
1897 # replace escaped quotes
1898 if self.escQuote:
1899 ret = ret.replace(self.escQuote, self.endQuoteChar)
1900
1901 return loc, ret
1902
1903 def __str__( self ):
1904 try:
1905 return super(QuotedString,self).__str__()
1906 except:
1907 pass
1908
1909 if self.strRepr is None:
1910 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1911
1912 return self.strRepr
1913
1914
1915class CharsNotIn(Token):
1916 """Token for matching words composed of characters *not* in a given set.
1917 Defined with string containing all disallowed characters, and an optional
1918 minimum, maximum, and/or exact length. The default value for min is 1 (a
1919 minimum value < 1 is not valid); the default values for max and exact
1920 are 0, meaning no maximum or exact length restriction.
1921 """
1922 def __init__( self, notChars, min=1, max=0, exact=0 ):
1923 super(CharsNotIn,self).__init__()
1924 self.skipWhitespace = False
1925 self.notChars = notChars
1926
1927 if min < 1:
1928 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
1929
1930 self.minLen = min
1931
1932 if max > 0:
1933 self.maxLen = max
1934 else:
1935 self.maxLen = _MAX_INT
1936
1937 if exact > 0:
1938 self.maxLen = exact
1939 self.minLen = exact
1940
1941 self.name = _ustr(self)
1942 self.errmsg = "Expected " + self.name
1943 self.mayReturnEmpty = ( self.minLen == 0 )
1944 #self.myException.msg = self.errmsg
1945 self.mayIndexError = False
1946
1947 def parseImpl( self, instring, loc, doActions=True ):
1948 if instring[loc] in self.notChars:
1949 #~ raise ParseException( instring, loc, self.errmsg )
1950 exc = self.myException
1951 exc.loc = loc
1952 exc.pstr = instring
1953 raise exc
1954
1955 start = loc
1956 loc += 1
1957 notchars = self.notChars
1958 maxlen = min( start+self.maxLen, len(instring) )
1959 while loc < maxlen and \
1960 (instring[loc] not in notchars):
1961 loc += 1
1962
1963 if loc - start < self.minLen:
1964 #~ raise ParseException( instring, loc, self.errmsg )
1965 exc = self.myException
1966 exc.loc = loc
1967 exc.pstr = instring
1968 raise exc
1969
1970 return loc, instring[start:loc]
1971
1972 def __str__( self ):
1973 try:
1974 return super(CharsNotIn, self).__str__()
1975 except:
1976 pass
1977
1978 if self.strRepr is None:
1979 if len(self.notChars) > 4:
1980 self.strRepr = "!W:(%s...)" % self.notChars[:4]
1981 else:
1982 self.strRepr = "!W:(%s)" % self.notChars
1983
1984 return self.strRepr
1985
1986class White(Token):
1987 """Special matching class for matching whitespace. Normally, whitespace is ignored
1988 by pyparsing grammars. This class is included when some whitespace structures
1989 are significant. Define with a string containing the whitespace characters to be
1990 matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments,
1991 as defined for the Word class."""
1992 whiteStrs = {
1993 " " : "<SPC>",
1994 "\t": "<TAB>",
1995 "\n": "<LF>",
1996 "\r": "<CR>",
1997 "\f": "<FF>",
1998 }
1999 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2000 super(White,self).__init__()
2001 self.matchWhite = ws
2002 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
2003 #~ self.leaveWhitespace()
2004 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
2005 self.mayReturnEmpty = True
2006 self.errmsg = "Expected " + self.name
2007 #self.myException.msg = self.errmsg
2008
2009 self.minLen = min
2010
2011 if max > 0:
2012 self.maxLen = max
2013 else:
2014 self.maxLen = _MAX_INT
2015
2016 if exact > 0:
2017 self.maxLen = exact
2018 self.minLen = exact
2019
2020 def parseImpl( self, instring, loc, doActions=True ):
2021 if not(instring[ loc ] in self.matchWhite):
2022 #~ raise ParseException( instring, loc, self.errmsg )
2023 exc = self.myException
2024 exc.loc = loc
2025 exc.pstr = instring
2026 raise exc
2027 start = loc
2028 loc += 1
2029 maxloc = start + self.maxLen
2030 maxloc = min( maxloc, len(instring) )
2031 while loc < maxloc and instring[loc] in self.matchWhite:
2032 loc += 1
2033
2034 if loc - start < self.minLen:
2035 #~ raise ParseException( instring, loc, self.errmsg )
2036 exc = self.myException
2037 exc.loc = loc
2038 exc.pstr = instring
2039 raise exc
2040
2041 return loc, instring[start:loc]
2042
2043
2044class _PositionToken(Token):
2045 def __init__( self ):
2046 super(_PositionToken,self).__init__()
2047 self.name=self.__class__.__name__
2048 self.mayReturnEmpty = True
2049 self.mayIndexError = False
2050
2051class GoToColumn(_PositionToken):
2052 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2053 def __init__( self, colno ):
2054 super(GoToColumn,self).__init__()
2055 self.col = colno
2056
2057 def preParse( self, instring, loc ):
2058 if col(loc,instring) != self.col:
2059 instrlen = len(instring)
2060 if self.ignoreExprs:
2061 loc = self._skipIgnorables( instring, loc )
2062 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2063 loc += 1
2064 return loc
2065
2066 def parseImpl( self, instring, loc, doActions=True ):
2067 thiscol = col( loc, instring )
2068 if thiscol > self.col:
2069 raise ParseException( instring, loc, "Text not in expected column", self )
2070 newloc = loc + self.col - thiscol
2071 ret = instring[ loc: newloc ]
2072 return newloc, ret
2073
2074class LineStart(_PositionToken):
2075 """Matches if current position is at the beginning of a line within the parse string"""
2076 def __init__( self ):
2077 super(LineStart,self).__init__()
2078 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
2079 self.errmsg = "Expected start of line"
2080 #self.myException.msg = self.errmsg
2081
2082 def preParse( self, instring, loc ):
2083 preloc = super(LineStart,self).preParse(instring,loc)
2084 if instring[preloc] == "\n":
2085 loc += 1
2086 return loc
2087
2088 def parseImpl( self, instring, loc, doActions=True ):
2089 if not( loc==0 or
2090 (loc == self.preParse( instring, 0 )) or
2091 (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
2092 #~ raise ParseException( instring, loc, "Expected start of line" )
2093 exc = self.myException
2094 exc.loc = loc
2095 exc.pstr = instring
2096 raise exc
2097 return loc, []
2098
2099class LineEnd(_PositionToken):
2100 """Matches if current position is at the end of a line within the parse string"""
2101 def __init__( self ):
2102 super(LineEnd,self).__init__()
2103 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
2104 self.errmsg = "Expected end of line"
2105 #self.myException.msg = self.errmsg
2106
2107 def parseImpl( self, instring, loc, doActions=True ):
2108 if loc<len(instring):
2109 if instring[loc] == "\n":
2110 return loc+1, "\n"
2111 else:
2112 #~ raise ParseException( instring, loc, "Expected end of line" )
2113 exc = self.myException
2114 exc.loc = loc
2115 exc.pstr = instring
2116 raise exc
2117 elif loc == len(instring):
2118 return loc+1, []
2119 else:
2120 exc = self.myException
2121 exc.loc = loc
2122 exc.pstr = instring
2123 raise exc
2124
2125class StringStart(_PositionToken):
2126 """Matches if current position is at the beginning of the parse string"""
2127 def __init__( self ):
2128 super(StringStart,self).__init__()
2129 self.errmsg = "Expected start of text"
2130 #self.myException.msg = self.errmsg
2131
2132 def parseImpl( self, instring, loc, doActions=True ):
2133 if loc != 0:
2134 # see if entire string up to here is just whitespace and ignoreables
2135 if loc != self.preParse( instring, 0 ):
2136 #~ raise ParseException( instring, loc, "Expected start of text" )
2137 exc = self.myException
2138 exc.loc = loc
2139 exc.pstr = instring
2140 raise exc
2141 return loc, []
2142
2143class StringEnd(_PositionToken):
2144 """Matches if current position is at the end of the parse string"""
2145 def __init__( self ):
2146 super(StringEnd,self).__init__()
2147 self.errmsg = "Expected end of text"
2148 #self.myException.msg = self.errmsg
2149
2150 def parseImpl( self, instring, loc, doActions=True ):
2151 if loc < len(instring):
2152 #~ raise ParseException( instring, loc, "Expected end of text" )
2153 exc = self.myException
2154 exc.loc = loc
2155 exc.pstr = instring
2156 raise exc
2157 elif loc == len(instring):
2158 return loc+1, []
2159 elif loc > len(instring):
2160 return loc, []
2161 else:
2162 exc = self.myException
2163 exc.loc = loc
2164 exc.pstr = instring
2165 raise exc
2166
2167class WordStart(_PositionToken):
2168 """Matches if the current position is at the beginning of a Word, and
2169 is not preceded by any character in a given set of wordChars
2170 (default=printables). To emulate the \b behavior of regular expressions,
2171 use WordStart(alphanums). WordStart will also match at the beginning of
2172 the string being parsed, or at the beginning of a line.
2173 """
2174 def __init__(self, wordChars = printables):
2175 super(WordStart,self).__init__()
2176 self.wordChars = _str2dict(wordChars)
2177 self.errmsg = "Not at the start of a word"
2178
2179 def parseImpl(self, instring, loc, doActions=True ):
2180 if loc != 0:
2181 if (instring[loc-1] in self.wordChars or
2182 instring[loc] not in self.wordChars):
2183 exc = self.myException
2184 exc.loc = loc
2185 exc.pstr = instring
2186 raise exc
2187 return loc, []
2188
2189class WordEnd(_PositionToken):
2190 """Matches if the current position is at the end of a Word, and
2191 is not followed by any character in a given set of wordChars
2192 (default=printables). To emulate the \b behavior of regular expressions,
2193 use WordEnd(alphanums). WordEnd will also match at the end of
2194 the string being parsed, or at the end of a line.
2195 """
2196 def __init__(self, wordChars = printables):
2197 super(WordEnd,self).__init__()
2198 self.wordChars = _str2dict(wordChars)
2199 self.skipWhitespace = False
2200 self.errmsg = "Not at the end of a word"
2201
2202 def parseImpl(self, instring, loc, doActions=True ):
2203 instrlen = len(instring)
2204 if instrlen>0 and loc<instrlen:
2205 if (instring[loc] in self.wordChars or
2206 instring[loc-1] not in self.wordChars):
2207 #~ raise ParseException( instring, loc, "Expected end of word" )
2208 exc = self.myException
2209 exc.loc = loc
2210 exc.pstr = instring
2211 raise exc
2212 return loc, []
2213
2214
2215class ParseExpression(ParserElement):
2216 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2217 def __init__( self, exprs, savelist = False ):
2218 super(ParseExpression,self).__init__(savelist)
2219 if isinstance( exprs, list ):
2220 self.exprs = exprs
2221 elif isinstance( exprs, basestring ):
2222 self.exprs = [ Literal( exprs ) ]
2223 else:
2224 try:
2225 self.exprs = list( exprs )
2226 except TypeError:
2227 self.exprs = [ exprs ]
2228 self.callPreparse = False
2229
2230 def __getitem__( self, i ):
2231 return self.exprs[i]
2232
2233 def append( self, other ):
2234 self.exprs.append( other )
2235 self.strRepr = None
2236 return self
2237
2238 def leaveWhitespace( self ):
2239 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
2240 all contained expressions."""
2241 self.skipWhitespace = False
2242 self.exprs = [ e.copy() for e in self.exprs ]
2243 for e in self.exprs:
2244 e.leaveWhitespace()
2245 return self
2246
2247 def ignore( self, other ):
2248 if isinstance( other, Suppress ):
2249 if other not in self.ignoreExprs:
2250 super( ParseExpression, self).ignore( other )
2251 for e in self.exprs:
2252 e.ignore( self.ignoreExprs[-1] )
2253 else:
2254 super( ParseExpression, self).ignore( other )
2255 for e in self.exprs:
2256 e.ignore( self.ignoreExprs[-1] )
2257 return self
2258
2259 def __str__( self ):
2260 try:
2261 return super(ParseExpression,self).__str__()
2262 except:
2263 pass
2264
2265 if self.strRepr is None:
2266 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2267 return self.strRepr
2268
2269 def streamline( self ):
2270 super(ParseExpression,self).streamline()
2271
2272 for e in self.exprs:
2273 e.streamline()
2274
2275 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
2276 # but only if there are no parse actions or resultsNames on the nested And's
2277 # (likewise for Or's and MatchFirst's)
2278 if ( len(self.exprs) == 2 ):
2279 other = self.exprs[0]
2280 if ( isinstance( other, self.__class__ ) and
2281 not(other.parseAction) and
2282 other.resultsName is None and
2283 not other.debug ):
2284 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2285 self.strRepr = None
2286 self.mayReturnEmpty |= other.mayReturnEmpty
2287 self.mayIndexError |= other.mayIndexError
2288
2289 other = self.exprs[-1]
2290 if ( isinstance( other, self.__class__ ) and
2291 not(other.parseAction) and
2292 other.resultsName is None and
2293 not other.debug ):
2294 self.exprs = self.exprs[:-1] + other.exprs[:]
2295 self.strRepr = None
2296 self.mayReturnEmpty |= other.mayReturnEmpty
2297 self.mayIndexError |= other.mayIndexError
2298
2299 return self
2300
2301 def setResultsName( self, name, listAllMatches=False ):
2302 ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
2303 return ret
2304
2305 def validate( self, validateTrace=[] ):
2306 tmp = validateTrace[:]+[self]
2307 for e in self.exprs:
2308 e.validate(tmp)
2309 self.checkRecursion( [] )
2310
2311class And(ParseExpression):
2312 """Requires all given ParseExpressions to be found in the given order.
2313 Expressions may be separated by whitespace.
2314 May be constructed using the '+' operator.
2315 """
2316
2317 class _ErrorStop(Empty):
2318 def __init__(self, *args, **kwargs):
2319 super(Empty,self).__init__(*args, **kwargs)
2320 self.leaveWhitespace()
2321
2322 def __init__( self, exprs, savelist = True ):
2323 super(And,self).__init__(exprs, savelist)
2324 self.mayReturnEmpty = True
2325 for e in self.exprs:
2326 if not e.mayReturnEmpty:
2327 self.mayReturnEmpty = False
2328 break
2329 self.setWhitespaceChars( exprs[0].whiteChars )
2330 self.skipWhitespace = exprs[0].skipWhitespace
2331 self.callPreparse = True
2332
2333 def parseImpl( self, instring, loc, doActions=True ):
2334 # pass False as last arg to _parse for first element, since we already
2335 # pre-parsed the string as part of our And pre-parsing
2336 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2337 errorStop = False
2338 for e in self.exprs[1:]:
2339 if isinstance(e, And._ErrorStop):
2340 errorStop = True
2341 continue
2342 if errorStop:
2343 try:
2344 loc, exprtokens = e._parse( instring, loc, doActions )
2345 except ParseSyntaxException:
2346 raise
2347 except ParseBaseException, pe:
2348 raise ParseSyntaxException(pe)
2349 except IndexError, ie:
2350 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2351 else:
2352 loc, exprtokens = e._parse( instring, loc, doActions )
2353 if exprtokens or exprtokens.keys():
2354 resultlist += exprtokens
2355 return loc, resultlist
2356
2357 def __iadd__(self, other ):
2358 if isinstance( other, basestring ):
2359 other = Literal( other )
2360 return self.append( other ) #And( [ self, other ] )
2361
2362 def checkRecursion( self, parseElementList ):
2363 subRecCheckList = parseElementList[:] + [ self ]
2364 for e in self.exprs:
2365 e.checkRecursion( subRecCheckList )
2366 if not e.mayReturnEmpty:
2367 break
2368
2369 def __str__( self ):
2370 if hasattr(self,"name"):
2371 return self.name
2372
2373 if self.strRepr is None:
2374 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2375
2376 return self.strRepr
2377
2378
2379class Or(ParseExpression):
2380 """Requires that at least one ParseExpression is found.
2381 If two expressions match, the expression that matches the longest string will be used.
2382 May be constructed using the '^' operator.
2383 """
2384 def __init__( self, exprs, savelist = False ):
2385 super(Or,self).__init__(exprs, savelist)
2386 self.mayReturnEmpty = False
2387 for e in self.exprs:
2388 if e.mayReturnEmpty:
2389 self.mayReturnEmpty = True
2390 break
2391
2392 def parseImpl( self, instring, loc, doActions=True ):
2393 maxExcLoc = -1
2394 maxMatchLoc = -1
2395 maxException = None
2396 for e in self.exprs:
2397 try:
2398 loc2 = e.tryParse( instring, loc )
2399 except ParseException, err:
2400 if err.loc > maxExcLoc:
2401 maxException = err
2402 maxExcLoc = err.loc
2403 except IndexError:
2404 if len(instring) > maxExcLoc:
2405 maxException = ParseException(instring,len(instring),e.errmsg,self)
2406 maxExcLoc = len(instring)
2407 else:
2408 if loc2 > maxMatchLoc:
2409 maxMatchLoc = loc2
2410 maxMatchExp = e
2411
2412 if maxMatchLoc < 0:
2413 if maxException is not None:
2414 raise maxException
2415 else:
2416 raise ParseException(instring, loc, "no defined alternatives to match", self)
2417
2418 return maxMatchExp._parse( instring, loc, doActions )
2419
2420 def __ixor__(self, other ):
2421 if isinstance( other, basestring ):
2422 other = Literal( other )
2423 return self.append( other ) #Or( [ self, other ] )
2424
2425 def __str__( self ):
2426 if hasattr(self,"name"):
2427 return self.name
2428
2429 if self.strRepr is None:
2430 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2431
2432 return self.strRepr
2433
2434 def checkRecursion( self, parseElementList ):
2435 subRecCheckList = parseElementList[:] + [ self ]
2436 for e in self.exprs:
2437 e.checkRecursion( subRecCheckList )
2438
2439
2440class MatchFirst(ParseExpression):
2441 """Requires that at least one ParseExpression is found.
2442 If two expressions match, the first one listed is the one that will match.
2443 May be constructed using the '|' operator.
2444 """
2445 def __init__( self, exprs, savelist = False ):
2446 super(MatchFirst,self).__init__(exprs, savelist)
2447 if exprs:
2448 self.mayReturnEmpty = False
2449 for e in self.exprs:
2450 if e.mayReturnEmpty:
2451 self.mayReturnEmpty = True
2452 break
2453 else:
2454 self.mayReturnEmpty = True
2455
2456 def parseImpl( self, instring, loc, doActions=True ):
2457 maxExcLoc = -1
2458 maxException = None
2459 for e in self.exprs:
2460 try:
2461 ret = e._parse( instring, loc, doActions )
2462 return ret
2463 except ParseException, err:
2464 if err.loc > maxExcLoc:
2465 maxException = err
2466 maxExcLoc = err.loc
2467 except IndexError:
2468 if len(instring) > maxExcLoc:
2469 maxException = ParseException(instring,len(instring),e.errmsg,self)
2470 maxExcLoc = len(instring)
2471
2472 # only got here if no expression matched, raise exception for match that made it the furthest
2473 else:
2474 if maxException is not None:
2475 raise maxException
2476 else:
2477 raise ParseException(instring, loc, "no defined alternatives to match", self)
2478
2479 def __ior__(self, other ):
2480 if isinstance( other, basestring ):
2481 other = Literal( other )
2482 return self.append( other ) #MatchFirst( [ self, other ] )
2483
2484 def __str__( self ):
2485 if hasattr(self,"name"):
2486 return self.name
2487
2488 if self.strRepr is None:
2489 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2490
2491 return self.strRepr
2492
2493 def checkRecursion( self, parseElementList ):
2494 subRecCheckList = parseElementList[:] + [ self ]
2495 for e in self.exprs:
2496 e.checkRecursion( subRecCheckList )
2497
2498
2499class Each(ParseExpression):
2500 """Requires all given ParseExpressions to be found, but in any order.
2501 Expressions may be separated by whitespace.
2502 May be constructed using the '&' operator.
2503 """
2504 def __init__( self, exprs, savelist = True ):
2505 super(Each,self).__init__(exprs, savelist)
2506 self.mayReturnEmpty = True
2507 for e in self.exprs:
2508 if not e.mayReturnEmpty:
2509 self.mayReturnEmpty = False
2510 break
2511 self.skipWhitespace = True
2512 self.initExprGroups = True
2513
2514 def parseImpl( self, instring, loc, doActions=True ):
2515 if self.initExprGroups:
2516 self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2517 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2518 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2519 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2520 self.required += self.multirequired
2521 self.initExprGroups = False
2522 tmpLoc = loc
2523 tmpReqd = self.required[:]
2524 tmpOpt = self.optionals[:]
2525 matchOrder = []
2526
2527 keepMatching = True
2528 while keepMatching:
2529 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2530 failed = []
2531 for e in tmpExprs:
2532 try:
2533 tmpLoc = e.tryParse( instring, tmpLoc )
2534 except ParseException:
2535 failed.append(e)
2536 else:
2537 matchOrder.append(e)
2538 if e in tmpReqd:
2539 tmpReqd.remove(e)
2540 elif e in tmpOpt:
2541 tmpOpt.remove(e)
2542 if len(failed) == len(tmpExprs):
2543 keepMatching = False
2544
2545 if tmpReqd:
2546 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
2547 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2548
2549 # add any unmatched Optionals, in case they have default values defined
2550 matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
2551
2552 resultlist = []
2553 for e in matchOrder:
2554 loc,results = e._parse(instring,loc,doActions)
2555 resultlist.append(results)
2556
2557 finalResults = ParseResults([])
2558 for r in resultlist:
2559 dups = {}
2560 for k in r.keys():
2561 if k in finalResults.keys():
2562 tmp = ParseResults(finalResults[k])
2563 tmp += ParseResults(r[k])
2564 dups[k] = tmp
2565 finalResults += ParseResults(r)
2566 for k,v in dups.items():
2567 finalResults[k] = v
2568 return loc, finalResults
2569
2570 def __str__( self ):
2571 if hasattr(self,"name"):
2572 return self.name
2573
2574 if self.strRepr is None:
2575 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2576
2577 return self.strRepr
2578
2579 def checkRecursion( self, parseElementList ):
2580 subRecCheckList = parseElementList[:] + [ self ]
2581 for e in self.exprs:
2582 e.checkRecursion( subRecCheckList )
2583
2584
2585class ParseElementEnhance(ParserElement):
2586 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2587 def __init__( self, expr, savelist=False ):
2588 super(ParseElementEnhance,self).__init__(savelist)
2589 if isinstance( expr, basestring ):
2590 expr = Literal(expr)
2591 self.expr = expr
2592 self.strRepr = None
2593 if expr is not None:
2594 self.mayIndexError = expr.mayIndexError
2595 self.mayReturnEmpty = expr.mayReturnEmpty
2596 self.setWhitespaceChars( expr.whiteChars )
2597 self.skipWhitespace = expr.skipWhitespace
2598 self.saveAsList = expr.saveAsList
2599 self.callPreparse = expr.callPreparse
2600 self.ignoreExprs.extend(expr.ignoreExprs)
2601
2602 def parseImpl( self, instring, loc, doActions=True ):
2603 if self.expr is not None:
2604 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2605 else:
2606 raise ParseException("",loc,self.errmsg,self)
2607
2608 def leaveWhitespace( self ):
2609 self.skipWhitespace = False
2610 self.expr = self.expr.copy()
2611 if self.expr is not None:
2612 self.expr.leaveWhitespace()
2613 return self
2614
2615 def ignore( self, other ):
2616 if isinstance( other, Suppress ):
2617 if other not in self.ignoreExprs:
2618 super( ParseElementEnhance, self).ignore( other )
2619 if self.expr is not None:
2620 self.expr.ignore( self.ignoreExprs[-1] )
2621 else:
2622 super( ParseElementEnhance, self).ignore( other )
2623 if self.expr is not None:
2624 self.expr.ignore( self.ignoreExprs[-1] )
2625 return self
2626
2627 def streamline( self ):
2628 super(ParseElementEnhance,self).streamline()
2629 if self.expr is not None:
2630 self.expr.streamline()
2631 return self
2632
2633 def checkRecursion( self, parseElementList ):
2634 if self in parseElementList:
2635 raise RecursiveGrammarException( parseElementList+[self] )
2636 subRecCheckList = parseElementList[:] + [ self ]
2637 if self.expr is not None:
2638 self.expr.checkRecursion( subRecCheckList )
2639
2640 def validate( self, validateTrace=[] ):
2641 tmp = validateTrace[:]+[self]
2642 if self.expr is not None:
2643 self.expr.validate(tmp)
2644 self.checkRecursion( [] )
2645
2646 def __str__( self ):
2647 try:
2648 return super(ParseElementEnhance,self).__str__()
2649 except:
2650 pass
2651
2652 if self.strRepr is None and self.expr is not None:
2653 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2654 return self.strRepr
2655
2656
2657class FollowedBy(ParseElementEnhance):
2658 """Lookahead matching of the given parse expression. FollowedBy
2659 does *not* advance the parsing position within the input string, it only
2660 verifies that the specified parse expression matches at the current
2661 position. FollowedBy always returns a null token list."""
2662 def __init__( self, expr ):
2663 super(FollowedBy,self).__init__(expr)
2664 self.mayReturnEmpty = True
2665
2666 def parseImpl( self, instring, loc, doActions=True ):
2667 self.expr.tryParse( instring, loc )
2668 return loc, []
2669
2670
2671class NotAny(ParseElementEnhance):
2672 """Lookahead to disallow matching with the given parse expression. NotAny
2673 does *not* advance the parsing position within the input string, it only
2674 verifies that the specified parse expression does *not* match at the current
2675 position. Also, NotAny does *not* skip over leading whitespace. NotAny
2676 always returns a null token list. May be constructed using the '~' operator."""
2677 def __init__( self, expr ):
2678 super(NotAny,self).__init__(expr)
2679 #~ self.leaveWhitespace()
2680 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
2681 self.mayReturnEmpty = True
2682 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2683 #self.myException = ParseException("",0,self.errmsg,self)
2684
2685 def parseImpl( self, instring, loc, doActions=True ):
2686 try:
2687 self.expr.tryParse( instring, loc )
2688 except (ParseException,IndexError):
2689 pass
2690 else:
2691 #~ raise ParseException(instring, loc, self.errmsg )
2692 exc = self.myException
2693 exc.loc = loc
2694 exc.pstr = instring
2695 raise exc
2696 return loc, []
2697
2698 def __str__( self ):
2699 if hasattr(self,"name"):
2700 return self.name
2701
2702 if self.strRepr is None:
2703 self.strRepr = "~{" + _ustr(self.expr) + "}"
2704
2705 return self.strRepr
2706
2707
2708class ZeroOrMore(ParseElementEnhance):
2709 """Optional repetition of zero or more of the given expression."""
2710 def __init__( self, expr ):
2711 super(ZeroOrMore,self).__init__(expr)
2712 self.mayReturnEmpty = True
2713
2714 def parseImpl( self, instring, loc, doActions=True ):
2715 tokens = []
2716 try:
2717 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2718 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2719 while 1:
2720 if hasIgnoreExprs:
2721 preloc = self._skipIgnorables( instring, loc )
2722 else:
2723 preloc = loc
2724 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2725 if tmptokens or tmptokens.keys():
2726 tokens += tmptokens
2727 except (ParseException,IndexError):
2728 pass
2729
2730 return loc, tokens
2731
2732 def __str__( self ):
2733 if hasattr(self,"name"):
2734 return self.name
2735
2736 if self.strRepr is None:
2737 self.strRepr = "[" + _ustr(self.expr) + "]..."
2738
2739 return self.strRepr
2740
2741 def setResultsName( self, name, listAllMatches=False ):
2742 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
2743 ret.saveAsList = True
2744 return ret
2745
2746
2747class OneOrMore(ParseElementEnhance):
2748 """Repetition of one or more of the given expression."""
2749 def parseImpl( self, instring, loc, doActions=True ):
2750 # must be at least one
2751 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2752 try:
2753 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2754 while 1:
2755 if hasIgnoreExprs:
2756 preloc = self._skipIgnorables( instring, loc )
2757 else:
2758 preloc = loc
2759 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2760 if tmptokens or tmptokens.keys():
2761 tokens += tmptokens
2762 except (ParseException,IndexError):
2763 pass
2764
2765 return loc, tokens
2766
2767 def __str__( self ):
2768 if hasattr(self,"name"):
2769 return self.name
2770
2771 if self.strRepr is None:
2772 self.strRepr = "{" + _ustr(self.expr) + "}..."
2773
2774 return self.strRepr
2775
2776 def setResultsName( self, name, listAllMatches=False ):
2777 ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
2778 ret.saveAsList = True
2779 return ret
2780
2781class _NullToken(object):
2782 def __bool__(self):
2783 return False
2784 __nonzero__ = __bool__
2785 def __str__(self):
2786 return ""
2787
2788_optionalNotMatched = _NullToken()
2789class Optional(ParseElementEnhance):
2790 """Optional matching of the given expression.
2791 A default return string can also be specified, if the optional expression
2792 is not found.
2793 """
2794 def __init__( self, exprs, default=_optionalNotMatched ):
2795 super(Optional,self).__init__( exprs, savelist=False )
2796 self.defaultValue = default
2797 self.mayReturnEmpty = True
2798
2799 def parseImpl( self, instring, loc, doActions=True ):
2800 try:
2801 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2802 except (ParseException,IndexError):
2803 if self.defaultValue is not _optionalNotMatched:
2804 if self.expr.resultsName:
2805 tokens = ParseResults([ self.defaultValue ])
2806 tokens[self.expr.resultsName] = self.defaultValue
2807 else:
2808 tokens = [ self.defaultValue ]
2809 else:
2810 tokens = []
2811 return loc, tokens
2812
2813 def __str__( self ):
2814 if hasattr(self,"name"):
2815 return self.name
2816
2817 if self.strRepr is None:
2818 self.strRepr = "[" + _ustr(self.expr) + "]"
2819
2820 return self.strRepr
2821
2822
2823class SkipTo(ParseElementEnhance):
2824 """Token for skipping over all undefined text until the matched expression is found.
2825 If include is set to true, the matched expression is also parsed (the skipped text
2826 and matched expression are returned as a 2-element list). The ignore
2827 argument is used to define grammars (typically quoted strings and comments) that
2828 might contain false matches.
2829 """
2830 def __init__( self, other, include=False, ignore=None, failOn=None ):
2831 super( SkipTo, self ).__init__( other )
2832 self.ignoreExpr = ignore
2833 self.mayReturnEmpty = True
2834 self.mayIndexError = False
2835 self.includeMatch = include
2836 self.asList = False
2837 if failOn is not None and isinstance(failOn, basestring):
2838 self.failOn = Literal(failOn)
2839 else:
2840 self.failOn = failOn
2841 self.errmsg = "No match found for "+_ustr(self.expr)
2842 #self.myException = ParseException("",0,self.errmsg,self)
2843
2844 def parseImpl( self, instring, loc, doActions=True ):
2845 startLoc = loc
2846 instrlen = len(instring)
2847 expr = self.expr
2848 failParse = False
2849 while loc <= instrlen:
2850 try:
2851 if self.failOn:
2852 try:
2853 self.failOn.tryParse(instring, loc)
2854 except ParseBaseException:
2855 pass
2856 else:
2857 failParse = True
2858 raise ParseException(instring, loc, "Found expression " + str(self.failOn))
2859 failParse = False
2860 if self.ignoreExpr is not None:
2861 while 1:
2862 try:
2863 loc = self.ignoreExpr.tryParse(instring,loc)
2864 print "found ignoreExpr, advance to", loc
2865 except ParseBaseException:
2866 break
2867 expr._parse( instring, loc, doActions=False, callPreParse=False )
2868 skipText = instring[startLoc:loc]
2869 if self.includeMatch:
2870 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
2871 if mat:
2872 skipRes = ParseResults( skipText )
2873 skipRes += mat
2874 return loc, [ skipRes ]
2875 else:
2876 return loc, [ skipText ]
2877 else:
2878 return loc, [ skipText ]
2879 except (ParseException,IndexError):
2880 if failParse:
2881 raise
2882 else:
2883 loc += 1
2884 exc = self.myException
2885 exc.loc = loc
2886 exc.pstr = instring
2887 raise exc
2888
2889class Forward(ParseElementEnhance):
2890 """Forward declaration of an expression to be defined later -
2891 used for recursive grammars, such as algebraic infix notation.
2892 When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2893
2894 Note: take care when assigning to Forward not to overlook precedence of operators.
2895 Specifically, '|' has a lower precedence than '<<', so that::
2896 fwdExpr << a | b | c
2897 will actually be evaluated as::
2898 (fwdExpr << a) | b | c
2899 thereby leaving b and c out as parseable alternatives. It is recommended that you
2900 explicitly group the values inserted into the Forward::
2901 fwdExpr << (a | b | c)
2902 """
2903 def __init__( self, other=None ):
2904 super(Forward,self).__init__( other, savelist=False )
2905
2906 def __lshift__( self, other ):
2907 if isinstance( other, basestring ):
2908 other = Literal(other)
2909 self.expr = other
2910 self.mayReturnEmpty = other.mayReturnEmpty
2911 self.strRepr = None
2912 self.mayIndexError = self.expr.mayIndexError
2913 self.mayReturnEmpty = self.expr.mayReturnEmpty
2914 self.setWhitespaceChars( self.expr.whiteChars )
2915 self.skipWhitespace = self.expr.skipWhitespace
2916 self.saveAsList = self.expr.saveAsList
2917 self.ignoreExprs.extend(self.expr.ignoreExprs)
2918 return None
2919
2920 def leaveWhitespace( self ):
2921 self.skipWhitespace = False
2922 return self
2923
2924 def streamline( self ):
2925 if not self.streamlined:
2926 self.streamlined = True
2927 if self.expr is not None:
2928 self.expr.streamline()
2929 return self
2930
2931 def validate( self, validateTrace=[] ):
2932 if self not in validateTrace:
2933 tmp = validateTrace[:]+[self]
2934 if self.expr is not None:
2935 self.expr.validate(tmp)
2936 self.checkRecursion([])
2937
2938 def __str__( self ):
2939 if hasattr(self,"name"):
2940 return self.name
2941
2942 self._revertClass = self.__class__
2943 self.__class__ = _ForwardNoRecurse
2944 try:
2945 if self.expr is not None:
2946 retString = _ustr(self.expr)
2947 else:
2948 retString = "None"
2949 finally:
2950 self.__class__ = self._revertClass
2951 return self.__class__.__name__ + ": " + retString
2952
2953 def copy(self):