From 202b21f4238a0c8faae7a999cfd0b9049779bc13 Mon Sep 17 00:00:00 2001 From: Cedric BAIL Date: Thu, 21 Jun 2018 22:40:46 -0700 Subject: [PATCH] searchpanel: complete rewrite to have low hoverhead when processing search. This search code is more efficient and will research keyword by chunk and backward count the number of line. It will reduce the amount of random access on disk and be access all data sequencially, page after page. A possible next step in optimization would be to allow for the search of multiple keyword at the same time. Another most likely bigger reward would be to have a cache mecanism leveraging the versionning system and only reprocessing file when they do change. This would lead to the biggest improvement. --- src/bin/edi_searchpanel.c | 477 ++++++++++++++++++++++++++++++++------ 1 file changed, 409 insertions(+), 68 deletions(-) diff --git a/src/bin/edi_searchpanel.c b/src/bin/edi_searchpanel.c index c780962..4ba11c8 100644 --- a/src/bin/edi_searchpanel.c +++ b/src/bin/edi_searchpanel.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "edi_file.h" @@ -71,61 +72,375 @@ _edi_searchpanel_line_clicked_cb(void *data EINA_UNUSED, const Efl_Event *event) free(numstr); } -char * -_edi_searchpanel_line_render(Elm_Code_Line *line, const char *path) +static char * +edi_searchpanel_line_render(const Eina_File_Line *line, const char *path, size_t *length) { - unsigned int len, trim = 0; - const char *text; - static char buf[1024]; - static char str[1016]; + Eina_Strbuf *buf = eina_strbuf_new(); + char *r; + const char *text = line->start; + const char *end = line->end; - text = elm_code_line_text_get(line, &len); - if (!text) - return NULL; - - while (trim < len) + while (text < end) { - if (text[trim] != ' ' && text[trim] != '\t') + if (*text != ' ' && *text != '\t') break; - trim++; + text++; } - text += trim; - len -= trim; - if (len > sizeof(str) - 1) - len = sizeof(str) - 1; - snprintf(str, len + 1, "%s", text); - snprintf(buf, sizeof(buf), "%s:%d ->\t%s", ecore_file_file_get(path), line->number, str); + eina_strbuf_append_printf(buf, "%s:%d ->\t", ecore_file_file_get(path), line->index); + eina_strbuf_append_length(buf, text, end - text - 1); - return strdup(buf); + *length = eina_strbuf_length_get(buf); + r = eina_strbuf_string_steal(buf); + eina_strbuf_free(buf); + + return r; +} + +typedef struct _Eina_Iterator_Search Eina_Iterator_Search; + +struct _Eina_Iterator_Search +{ + Eina_Iterator iterator; + + Eina_File *fp; + const char *map; + const char *end; + + Eina_Stringshare *term; + + Eina_File_Line current; + + int boundary; +}; + +// Return the starting of the last line found and update the count +static inline const char * +edi_count_line(const char *start, unsigned int length, const char *line, unsigned int *count) +{ + const char *cr; + const char *lf; + const char *end; + + if (!length) return line; + + lf = memchr(start, '\r', length); + cr = memchr(start, '\n', length); + + if (!cr && !lf) return start; + + end = lf + 1; + (*count)++; + + // \r\n + if (lf && cr == lf + 1) + { + end = cr; + } + // \n + else if (cr) + { + end = cr; + } + // \r + else if (lf) + { + end = lf; + } + + length = length - (end - start); + if (length == 0) return start; + + return edi_count_line(end + 1, length - 1, end, count); +} + +static inline const char * +edi_end_of_line(const char *start, int boundary, const char *end) +{ + const char *cr; + const char *lf; + unsigned long long chunk; + + while (start < end) + { + chunk = start + boundary < end ? boundary : end - start; + lf = memchr(start, '\r', chunk); + cr = memchr(start, '\n', chunk); + + // \r\n + if (lf && cr == lf + 1) + return cr + 1; + // \n + if (cr) + return cr + 1; + // \r + if (lf) + return lf + 1; + + start += chunk; + boundary = 4096; + } + + return end; +} + +static inline const char * +edi_search_term(const char *start, const char *end, int boundary, + Eina_Stringshare *term, Eina_File_Line *line) +{ + char end_of_block = 0; + + while (start < end) + { + const char *lookup; + const char *count; + unsigned long long chunk, cchunk; + const char *search = start; + + cchunk = chunk = start + boundary < end ? boundary : end - start; + do + { + lookup = memchr(search, *term, cchunk); + + // Did we found the right word or not ? + if (lookup && !memcmp(lookup, term, eina_stringshare_strlen(term))) + break ; + + if (!lookup) + break ; + + // We didn't, start looking from where we are at + cchunk -= lookup + 1 - search; + search = lookup + 1; + } + while (cchunk > 0); + + // If not found, we want to count starting from the end all the + // line in this chunk. + count = lookup ? lookup : start + chunk; + + line->start = edi_count_line(start, count - start, line->start, &line->index); + + // Here we post adjust the counter as we may have double counted a line + // if \r\n is exactly at the boundary of a chunk. This also only happen + // when we haven't found what we are looking for yet. + if (end_of_block == '\r' && *start == '\n') + line->index--; + + if (lookup) return lookup; + + end_of_block = *(start + chunk - 1); + start += chunk; + boundary = 4096; + } + + return end; +} + +static Eina_Bool +edi_search_file_iterator_next(Eina_Iterator_Search *it, void **data) +{ + const char *lookup; + int line_boundary; + + if (it->end == it->current.end) return EINA_FALSE; + + // We are starting counting at the end of the line, so we will forget + // to account for the line where we found the term we were looking for, + // manually adjust for it. + // This also work to adjust for the first line as we start at zero + it->current.index++; + + // Account for first iteration when end == NULL + lookup = edi_search_term(it->current.end ? it->current.end : it->current.start, + it->end, it->boundary, it->term, &it->current); + + if (lookup == it->end) return EINA_FALSE; + + line_boundary = (uintptr_t) lookup & 0x3FF; + if (!line_boundary) line_boundary = 4096; + + it->current.end = edi_end_of_line(lookup, line_boundary, it->end); + // We need to adjust the end position of the line for '\r\n', + // in case it is on a cluster boundary. + if (*it->current.end == '\r') + { + if (it->current.end + 1 < it->end && + *(it->current.end + 1) == '\n') + it->current.end += 1; + } + + it->current.length = it->current.end - it->current.start; + + it->boundary = (uintptr_t) it->current.end & 0x3FF; + if (!it->boundary) it->boundary = 4096; + + *data = &it->current; + return EINA_TRUE; +} + +static Eina_File * +edi_search_file_iterator_container(Eina_Iterator_Search *it) +{ + return it->fp; +} + +static void +edi_search_file_iterator_free(Eina_Iterator_Search *it) +{ + eina_file_map_free(it->fp, (void*) it->map); + eina_file_close(it->fp); + + EINA_MAGIC_SET(&it->iterator, 0); + free(it); +} + +static Eina_Iterator * +edi_search_file(Eina_File *file, const char *term) +{ + Eina_Iterator_Search *it; + size_t length; + + if (!file || !term || strlen(term) == 0) return NULL; + + length = eina_file_size_get(file); + + if (!length) return NULL; + + it = calloc(1, sizeof (Eina_Iterator_Search)); + if (!it) return NULL; + + EINA_MAGIC_SET(&it->iterator, EINA_MAGIC_ITERATOR); + + it->map = eina_file_map_all(file, EINA_FILE_SEQUENTIAL); + if (!it->map) + { + free(it); + return NULL; + } + + it->fp = eina_file_dup(it->fp); + it->current.start = it->map; + it->current.end = NULL; + it->current.index = 0; + it->end = it->map + length; + it->term = eina_stringshare_add(term); + it->boundary = 4096; + + it->iterator.version = EINA_ITERATOR_VERSION; + it->iterator.next = FUNC_ITERATOR_NEXT(edi_search_file_iterator_next); + it->iterator.get_container = FUNC_ITERATOR_GET_CONTAINER(edi_search_file_iterator_container); + it->iterator.free = FUNC_ITERATOR_FREE(edi_search_file_iterator_free); + + return &it->iterator; +} + +typedef struct { + char *text; + size_t length; +} Async_Item; + +typedef struct { + Elm_Code *logger; + Eina_File *f; + + Eina_Inarray texts; +} Async_Log; + + +static Eina_Spinlock logs_lock; +static unsigned int logs_count = 0; +static Eina_Trash *logs = NULL; + +static void +main_loop_line_append_async(void *data) +{ + Async_Log *log = data; + Async_Item *item; + + while ((item = eina_inarray_pop(&log->texts))) + { + elm_code_file_line_append(log->logger->file, item->text, item->length, + strdup(eina_file_filename_get(log->f))); + free(item->text); + item->text = NULL; + } + + eina_file_close(log->f); + log->f = NULL; + log->logger = NULL; + // We are keeping the texts array as it won't be touched by Eina_Trash + + eina_spinlock_take(&logs_lock); + if (logs_count < 8) + { + logs_count++; + eina_trash_push(&logs, log); + log = NULL; + } + eina_spinlock_release(&logs_lock); + + if (log) + { + eina_inarray_flush(&log->texts); + free(log); + } } void _edi_searchpanel_search_project_file(const char *path, const char *search_term, Elm_Code *logger) { - Elm_Code *code; - Elm_Code_File *code_file; - Eina_List *item; - Elm_Code_Line *line; - char *text; + Eina_Iterator *it; + Eina_File_Line *l; + Async_Log *log; + Eina_File *f; - code = elm_code_create(); - code_file = elm_code_file_open(code, path); + f = eina_file_open(path, EINA_FALSE); + if (!f) return ; - EINA_LIST_FOREACH(code->file->lines, item, line) + // If the file looks big, check if it is a text file first. + if (eina_file_size_get(f) > 1 * 1024 * 1024 && + strncmp(efreet_mime_type_get(path), "text/", 5)) { - int found = elm_code_line_text_strpos(line, search_term, 0); - if (found != ELM_CODE_TEXT_NOT_FOUND) - { - text = _edi_searchpanel_line_render(line, path); - ecore_thread_main_loop_begin(); - elm_code_file_line_append(logger->file, text, strlen(text), strdup(path)); - ecore_thread_main_loop_end(); - free(text); - } + eina_file_close(f); + return ; } - elm_code_file_close(code_file); + eina_spinlock_take(&logs_lock); + log = eina_trash_pop(&logs); + if (log) logs_count--; + eina_spinlock_release(&logs_lock); + + if (!log) + { + log = calloc(1, sizeof (Async_Log)); + eina_inarray_step_set(&log->texts, sizeof (log->texts), + sizeof (Async_Item), 4); + } + + log->f = eina_file_dup(f); + log->logger = logger; + + it = edi_search_file(f, search_term); + EINA_ITERATOR_FOREACH(it, l) + { + Async_Item *item = eina_inarray_grow(&log->texts, 1); + + item->text = edi_searchpanel_line_render(l, path, &item->length); + } + eina_iterator_free(it); + + if (eina_inarray_count(&log->texts) == 0) + { + eina_inarray_flush(&log->texts); + eina_file_close(log->f); + free(log); + log = NULL; + } + + if (log) ecore_main_loop_thread_safe_call_async(main_loop_line_append_async, log); + + eina_file_close(f); } Eina_Bool @@ -157,28 +472,53 @@ _file_ignore(const char *filename) void _edi_searchpanel_search_project(const char *directory, const char *search_term, Elm_Code *logger) { - Eina_List *files, *item; - char *file; - char *path; + Eina_List *dirs; + char *dir; - files = ecore_file_ls(directory); + dirs = eina_list_append(NULL, strdup(directory)); - EINA_LIST_FOREACH(files, item, file) + EINA_LIST_FREE(dirs, dir) { - if (_file_ignore(file)) continue; + Eina_File_Direct_Info *info; + Eina_Iterator *it; - path = edi_path_append(directory, file); - if (!edi_file_path_hidden(path)) + it = eina_file_stat_ls(dir); + EINA_ITERATOR_FOREACH(it, info) { - if (ecore_file_is_dir(path)) - _edi_searchpanel_search_project(path, search_term, logger); - else - _edi_searchpanel_search_project_file(path, search_term, logger); - } + if (_file_ignore(info->path + info->name_start)) + continue ; - free (path); - if (ecore_thread_check(_search_thread)) return; + if (edi_file_path_hidden(info->path)) + continue ; + + switch (info->type) + { + case EINA_FILE_REG: + { + _edi_searchpanel_search_project_file(info->path, search_term, logger); + break; + } + case EINA_FILE_DIR: + { + dirs = eina_list_append(dirs, strdup(info->path)); + break; + } + default: + // Ignore all other type + break; + } + + if (ecore_thread_check(_search_thread)) break; + } + eina_iterator_free(it); + + if (ecore_thread_check(_search_thread)) break; + free(dir); } + + // Cleanup in case of interuption + EINA_LIST_FREE(dirs, dir) + free(dir); } static void @@ -188,23 +528,12 @@ _search_end_cb(void *data EINA_UNUSED, Ecore_Thread *thread EINA_UNUSED) _searching = EINA_FALSE; } -static void -_search_cancel_cb(void *data EINA_UNUSED, Ecore_Thread *thread EINA_UNUSED) -{ - while ((ecore_thread_wait(_search_thread, 0.1)) != EINA_TRUE); - _searching = EINA_FALSE; -} - static void _search_begin_cb(void *data, Ecore_Thread *thread EINA_UNUSED) { const char *path = data; - _searching = EINA_TRUE; - _edi_searchpanel_search_project(path, _search_text, _elm_code); - - if (ecore_thread_check(_search_thread)) return; } void @@ -214,7 +543,11 @@ edi_searchpanel_find(const char *text) if (!text || strlen(text) == 0) return; - if (_searching) _search_cancel_cb(NULL, _search_thread); + if (_searching) + { + ecore_thread_cancel(_search_thread); + while ((ecore_thread_wait(_search_thread, 0.1)) != EINA_TRUE); + } if (_search_text) free(_search_text); _search_text = strdup(text); @@ -223,9 +556,10 @@ edi_searchpanel_find(const char *text) elm_code_file_clear(_elm_code->file); + _searching = EINA_TRUE; _search_thread = ecore_thread_feedback_run(_search_begin_cb, NULL, - _search_end_cb, _search_cancel_cb, - path, EINA_FALSE); + _search_end_cb, _search_end_cb, + path, EINA_FALSE); } void @@ -253,6 +587,7 @@ edi_searchpanel_add(Evas_Object *parent) _elm_code = code; _info_widget = widget; + eina_spinlock_new(&logs_lock); elm_object_content_set(frame, widget); elm_box_pack_end(parent, frame); @@ -302,8 +637,14 @@ edi_taskspanel_find(void) path = edi_project_get(); + if (_searching) + { + ecore_thread_cancel(_search_thread); + while ((ecore_thread_wait(_search_thread, 0.1)) != EINA_TRUE); + } + _search_thread = ecore_thread_feedback_run(_tasks_begin_cb, NULL, - _search_end_cb, _search_cancel_cb, + _search_end_cb, _search_end_cb, path, EINA_FALSE); }