Pyolian: implemented doc tokenizer

This commit is contained in:
Davide Andreoli 2018-01-02 13:57:22 +01:00
parent 9bedda14b3
commit 3388077bc3
3 changed files with 68 additions and 4 deletions

View File

@ -24,13 +24,13 @@ a way that this folder will be available on PYTHON_PATH, fe:
"""
from enum import IntEnum
from ctypes import cast, byref, c_char_p, c_void_p
from ctypes import cast, byref, c_char_p, c_void_p, c_int
import ctypes
try:
from .eolian_lib import lib
except ImportError:
from eolian_lib import lib
### Eolian Enums ############################################################
@ -1235,11 +1235,16 @@ class Declaration(EolianBaseObject):
return Variable(c_var) if c_var else None
class _Eolian_Doc_Token_Struct(ctypes.Structure):
_fields_ = [("type", c_int),
("text", c_char_p),
("text_end", c_char_p)]
class Documentation(EolianBaseObject):
# def __repr__(self):
# return "<eolian.Documentation '{0.name}'>".format(self)
# this is too much for py, just use string.split('\n\n')
# this is too much for py, just use string.split('\n\n') instead
# def string_split(self, string):
# c_list = lib.eolian_documentation_string_split
@ -1255,6 +1260,64 @@ class Documentation(EolianBaseObject):
def since(self):
return _str_to_py(lib.eolian_documentation_since_get(self._obj))
@property
def summary_tokens(self):
""" return a list of paragraphs, each one is a list of tokens """
return self._tokenize(self.summary)
@property
def description_tokens(self):
""" return a list of paragraphs, each one is a list of tokens """
return self._tokenize(self.description)
@classmethod
def _tokenize(cls, full_text):
paragraphs = []
if not full_text:
return paragraphs
tok = _Eolian_Doc_Token_Struct()
for paragraph in full_text.split('\n\n'):
tokens = []
c_paragraph = _str_to_bytes(paragraph) # keep c_paragraph alive !
lib.eolian_doc_token_init(byref(tok))
next_chunk = lib.eolian_documentation_tokenize(c_paragraph, byref(tok))
while next_chunk:
typ = lib.eolian_doc_token_type_get(byref(tok))
txt = lib.eolian_doc_token_text_get(byref(tok))
# ref = # TODO ... Stupido parametro '*unit' :(
tokens.append(Documentation_Token(typ, txt))
lib.free(c_void_p(txt))
next_chunk = lib.eolian_documentation_tokenize(c_char_p(next_chunk), byref(tok))
paragraphs.append(tokens)
return paragraphs
class Documentation_Token(object):
def __init__(self, c_token_type, c_text):
self._type = Eolian_Doc_Token_Type(c_token_type)
self._text = _str_to_py(c_text)
self._ref = None # TODO
def __repr__(self):
t = self.text if len(self.text) < 40 else self.text[:40] + '...'
return "<eolian.Doc_Token ({}), text='{}', len={}>".format(
self.type.name, t, len(self.text))
@property
def type(self):
return self._type
@property
def text(self):
return self._text
@property
def ref(self):
return self._ref
### internal string encode/decode ###########################################

View File

@ -737,7 +737,7 @@ lib.eolian_documentation_since_get.restype = c_char_p
# # EAPI const char *eolian_documentation_tokenize(const char *doc, Eolian_Doc_Token *ret);
lib.eolian_documentation_tokenize.argtypes = [c_char_p, c_void_p]
lib.eolian_documentation_tokenize.restype = c_char_p
lib.eolian_documentation_tokenize.restype = c_void_p # this need to be passed back as char*
# EAPI void eolian_doc_token_init(Eolian_Doc_Token *tok);
lib.eolian_doc_token_init.argtypes = [c_void_p,]

View File

@ -127,6 +127,7 @@ class Template(pyratemp.Template):
'Variable': eolian.Variable,
'Declaration': eolian.Declaration,
'Documentation': eolian.Documentation,
'Documentation_Token': eolian.Documentation_Token,
# Eolian Enums
'Eolian_Function_Type': eolian.Eolian_Function_Type,
'Eolian_Parameter_Dir': eolian.Eolian_Parameter_Dir,