searchpanel: complete rewrite to have low hoverhead when processing search.

This search code is more efficient and will research keyword by chunk and
backward count the number of line. It will reduce the amount of random access
on disk and be access all data sequencially, page after page.

A possible next step in optimization would be to allow for the search
of multiple keyword at the same time. Another most likely bigger reward
would be to have a cache mecanism leveraging the versionning system
and only reprocessing file when they do change. This would lead to the
biggest improvement.
This commit is contained in:
Cedric BAIL 2018-06-21 22:40:46 -07:00 committed by Andrew Williams
parent 354b675444
commit 202b21f423
1 changed files with 409 additions and 68 deletions

View File

@ -5,6 +5,7 @@
#include <Eo.h>
#include <Eina.h>
#include <Elementary.h>
#include <Efreet_Mime.h>
#include <string.h>
#include "edi_file.h"
@ -71,61 +72,375 @@ _edi_searchpanel_line_clicked_cb(void *data EINA_UNUSED, const Efl_Event *event)
free(numstr);
}
char *
_edi_searchpanel_line_render(Elm_Code_Line *line, const char *path)
static char *
edi_searchpanel_line_render(const Eina_File_Line *line, const char *path, size_t *length)
{
unsigned int len, trim = 0;
const char *text;
static char buf[1024];
static char str[1016];
Eina_Strbuf *buf = eina_strbuf_new();
char *r;
const char *text = line->start;
const char *end = line->end;
text = elm_code_line_text_get(line, &len);
if (!text)
return NULL;
while (trim < len)
while (text < end)
{
if (text[trim] != ' ' && text[trim] != '\t')
if (*text != ' ' && *text != '\t')
break;
trim++;
text++;
}
text += trim;
len -= trim;
if (len > sizeof(str) - 1)
len = sizeof(str) - 1;
snprintf(str, len + 1, "%s", text);
snprintf(buf, sizeof(buf), "%s:%d ->\t%s", ecore_file_file_get(path), line->number, str);
eina_strbuf_append_printf(buf, "%s:%d ->\t", ecore_file_file_get(path), line->index);
eina_strbuf_append_length(buf, text, end - text - 1);
return strdup(buf);
*length = eina_strbuf_length_get(buf);
r = eina_strbuf_string_steal(buf);
eina_strbuf_free(buf);
return r;
}
typedef struct _Eina_Iterator_Search Eina_Iterator_Search;
struct _Eina_Iterator_Search
{
Eina_Iterator iterator;
Eina_File *fp;
const char *map;
const char *end;
Eina_Stringshare *term;
Eina_File_Line current;
int boundary;
};
// Return the starting of the last line found and update the count
static inline const char *
edi_count_line(const char *start, unsigned int length, const char *line, unsigned int *count)
{
const char *cr;
const char *lf;
const char *end;
if (!length) return line;
lf = memchr(start, '\r', length);
cr = memchr(start, '\n', length);
if (!cr && !lf) return start;
end = lf + 1;
(*count)++;
// \r\n
if (lf && cr == lf + 1)
{
end = cr;
}
// \n
else if (cr)
{
end = cr;
}
// \r
else if (lf)
{
end = lf;
}
length = length - (end - start);
if (length == 0) return start;
return edi_count_line(end + 1, length - 1, end, count);
}
static inline const char *
edi_end_of_line(const char *start, int boundary, const char *end)
{
const char *cr;
const char *lf;
unsigned long long chunk;
while (start < end)
{
chunk = start + boundary < end ? boundary : end - start;
lf = memchr(start, '\r', chunk);
cr = memchr(start, '\n', chunk);
// \r\n
if (lf && cr == lf + 1)
return cr + 1;
// \n
if (cr)
return cr + 1;
// \r
if (lf)
return lf + 1;
start += chunk;
boundary = 4096;
}
return end;
}
static inline const char *
edi_search_term(const char *start, const char *end, int boundary,
Eina_Stringshare *term, Eina_File_Line *line)
{
char end_of_block = 0;
while (start < end)
{
const char *lookup;
const char *count;
unsigned long long chunk, cchunk;
const char *search = start;
cchunk = chunk = start + boundary < end ? boundary : end - start;
do
{
lookup = memchr(search, *term, cchunk);
// Did we found the right word or not ?
if (lookup && !memcmp(lookup, term, eina_stringshare_strlen(term)))
break ;
if (!lookup)
break ;
// We didn't, start looking from where we are at
cchunk -= lookup + 1 - search;
search = lookup + 1;
}
while (cchunk > 0);
// If not found, we want to count starting from the end all the
// line in this chunk.
count = lookup ? lookup : start + chunk;
line->start = edi_count_line(start, count - start, line->start, &line->index);
// Here we post adjust the counter as we may have double counted a line
// if \r\n is exactly at the boundary of a chunk. This also only happen
// when we haven't found what we are looking for yet.
if (end_of_block == '\r' && *start == '\n')
line->index--;
if (lookup) return lookup;
end_of_block = *(start + chunk - 1);
start += chunk;
boundary = 4096;
}
return end;
}
static Eina_Bool
edi_search_file_iterator_next(Eina_Iterator_Search *it, void **data)
{
const char *lookup;
int line_boundary;
if (it->end == it->current.end) return EINA_FALSE;
// We are starting counting at the end of the line, so we will forget
// to account for the line where we found the term we were looking for,
// manually adjust for it.
// This also work to adjust for the first line as we start at zero
it->current.index++;
// Account for first iteration when end == NULL
lookup = edi_search_term(it->current.end ? it->current.end : it->current.start,
it->end, it->boundary, it->term, &it->current);
if (lookup == it->end) return EINA_FALSE;
line_boundary = (uintptr_t) lookup & 0x3FF;
if (!line_boundary) line_boundary = 4096;
it->current.end = edi_end_of_line(lookup, line_boundary, it->end);
// We need to adjust the end position of the line for '\r\n',
// in case it is on a cluster boundary.
if (*it->current.end == '\r')
{
if (it->current.end + 1 < it->end &&
*(it->current.end + 1) == '\n')
it->current.end += 1;
}
it->current.length = it->current.end - it->current.start;
it->boundary = (uintptr_t) it->current.end & 0x3FF;
if (!it->boundary) it->boundary = 4096;
*data = &it->current;
return EINA_TRUE;
}
static Eina_File *
edi_search_file_iterator_container(Eina_Iterator_Search *it)
{
return it->fp;
}
static void
edi_search_file_iterator_free(Eina_Iterator_Search *it)
{
eina_file_map_free(it->fp, (void*) it->map);
eina_file_close(it->fp);
EINA_MAGIC_SET(&it->iterator, 0);
free(it);
}
static Eina_Iterator *
edi_search_file(Eina_File *file, const char *term)
{
Eina_Iterator_Search *it;
size_t length;
if (!file || !term || strlen(term) == 0) return NULL;
length = eina_file_size_get(file);
if (!length) return NULL;
it = calloc(1, sizeof (Eina_Iterator_Search));
if (!it) return NULL;
EINA_MAGIC_SET(&it->iterator, EINA_MAGIC_ITERATOR);
it->map = eina_file_map_all(file, EINA_FILE_SEQUENTIAL);
if (!it->map)
{
free(it);
return NULL;
}
it->fp = eina_file_dup(it->fp);
it->current.start = it->map;
it->current.end = NULL;
it->current.index = 0;
it->end = it->map + length;
it->term = eina_stringshare_add(term);
it->boundary = 4096;
it->iterator.version = EINA_ITERATOR_VERSION;
it->iterator.next = FUNC_ITERATOR_NEXT(edi_search_file_iterator_next);
it->iterator.get_container = FUNC_ITERATOR_GET_CONTAINER(edi_search_file_iterator_container);
it->iterator.free = FUNC_ITERATOR_FREE(edi_search_file_iterator_free);
return &it->iterator;
}
typedef struct {
char *text;
size_t length;
} Async_Item;
typedef struct {
Elm_Code *logger;
Eina_File *f;
Eina_Inarray texts;
} Async_Log;
static Eina_Spinlock logs_lock;
static unsigned int logs_count = 0;
static Eina_Trash *logs = NULL;
static void
main_loop_line_append_async(void *data)
{
Async_Log *log = data;
Async_Item *item;
while ((item = eina_inarray_pop(&log->texts)))
{
elm_code_file_line_append(log->logger->file, item->text, item->length,
strdup(eina_file_filename_get(log->f)));
free(item->text);
item->text = NULL;
}
eina_file_close(log->f);
log->f = NULL;
log->logger = NULL;
// We are keeping the texts array as it won't be touched by Eina_Trash
eina_spinlock_take(&logs_lock);
if (logs_count < 8)
{
logs_count++;
eina_trash_push(&logs, log);
log = NULL;
}
eina_spinlock_release(&logs_lock);
if (log)
{
eina_inarray_flush(&log->texts);
free(log);
}
}
void
_edi_searchpanel_search_project_file(const char *path, const char *search_term, Elm_Code *logger)
{
Elm_Code *code;
Elm_Code_File *code_file;
Eina_List *item;
Elm_Code_Line *line;
char *text;
Eina_Iterator *it;
Eina_File_Line *l;
Async_Log *log;
Eina_File *f;
code = elm_code_create();
code_file = elm_code_file_open(code, path);
f = eina_file_open(path, EINA_FALSE);
if (!f) return ;
EINA_LIST_FOREACH(code->file->lines, item, line)
// If the file looks big, check if it is a text file first.
if (eina_file_size_get(f) > 1 * 1024 * 1024 &&
strncmp(efreet_mime_type_get(path), "text/", 5))
{
int found = elm_code_line_text_strpos(line, search_term, 0);
if (found != ELM_CODE_TEXT_NOT_FOUND)
{
text = _edi_searchpanel_line_render(line, path);
ecore_thread_main_loop_begin();
elm_code_file_line_append(logger->file, text, strlen(text), strdup(path));
ecore_thread_main_loop_end();
free(text);
}
eina_file_close(f);
return ;
}
elm_code_file_close(code_file);
eina_spinlock_take(&logs_lock);
log = eina_trash_pop(&logs);
if (log) logs_count--;
eina_spinlock_release(&logs_lock);
if (!log)
{
log = calloc(1, sizeof (Async_Log));
eina_inarray_step_set(&log->texts, sizeof (log->texts),
sizeof (Async_Item), 4);
}
log->f = eina_file_dup(f);
log->logger = logger;
it = edi_search_file(f, search_term);
EINA_ITERATOR_FOREACH(it, l)
{
Async_Item *item = eina_inarray_grow(&log->texts, 1);
item->text = edi_searchpanel_line_render(l, path, &item->length);
}
eina_iterator_free(it);
if (eina_inarray_count(&log->texts) == 0)
{
eina_inarray_flush(&log->texts);
eina_file_close(log->f);
free(log);
log = NULL;
}
if (log) ecore_main_loop_thread_safe_call_async(main_loop_line_append_async, log);
eina_file_close(f);
}
Eina_Bool
@ -157,28 +472,53 @@ _file_ignore(const char *filename)
void
_edi_searchpanel_search_project(const char *directory, const char *search_term, Elm_Code *logger)
{
Eina_List *files, *item;
char *file;
char *path;
Eina_List *dirs;
char *dir;
files = ecore_file_ls(directory);
dirs = eina_list_append(NULL, strdup(directory));
EINA_LIST_FOREACH(files, item, file)
EINA_LIST_FREE(dirs, dir)
{
if (_file_ignore(file)) continue;
Eina_File_Direct_Info *info;
Eina_Iterator *it;
path = edi_path_append(directory, file);
if (!edi_file_path_hidden(path))
it = eina_file_stat_ls(dir);
EINA_ITERATOR_FOREACH(it, info)
{
if (ecore_file_is_dir(path))
_edi_searchpanel_search_project(path, search_term, logger);
else
_edi_searchpanel_search_project_file(path, search_term, logger);
}
if (_file_ignore(info->path + info->name_start))
continue ;
free (path);
if (ecore_thread_check(_search_thread)) return;
if (edi_file_path_hidden(info->path))
continue ;
switch (info->type)
{
case EINA_FILE_REG:
{
_edi_searchpanel_search_project_file(info->path, search_term, logger);
break;
}
case EINA_FILE_DIR:
{
dirs = eina_list_append(dirs, strdup(info->path));
break;
}
default:
// Ignore all other type
break;
}
if (ecore_thread_check(_search_thread)) break;
}
eina_iterator_free(it);
if (ecore_thread_check(_search_thread)) break;
free(dir);
}
// Cleanup in case of interuption
EINA_LIST_FREE(dirs, dir)
free(dir);
}
static void
@ -188,23 +528,12 @@ _search_end_cb(void *data EINA_UNUSED, Ecore_Thread *thread EINA_UNUSED)
_searching = EINA_FALSE;
}
static void
_search_cancel_cb(void *data EINA_UNUSED, Ecore_Thread *thread EINA_UNUSED)
{
while ((ecore_thread_wait(_search_thread, 0.1)) != EINA_TRUE);
_searching = EINA_FALSE;
}
static void
_search_begin_cb(void *data, Ecore_Thread *thread EINA_UNUSED)
{
const char *path = data;
_searching = EINA_TRUE;
_edi_searchpanel_search_project(path, _search_text, _elm_code);
if (ecore_thread_check(_search_thread)) return;
}
void
@ -214,7 +543,11 @@ edi_searchpanel_find(const char *text)
if (!text || strlen(text) == 0) return;
if (_searching) _search_cancel_cb(NULL, _search_thread);
if (_searching)
{
ecore_thread_cancel(_search_thread);
while ((ecore_thread_wait(_search_thread, 0.1)) != EINA_TRUE);
}
if (_search_text) free(_search_text);
_search_text = strdup(text);
@ -223,9 +556,10 @@ edi_searchpanel_find(const char *text)
elm_code_file_clear(_elm_code->file);
_searching = EINA_TRUE;
_search_thread = ecore_thread_feedback_run(_search_begin_cb, NULL,
_search_end_cb, _search_cancel_cb,
path, EINA_FALSE);
_search_end_cb, _search_end_cb,
path, EINA_FALSE);
}
void
@ -253,6 +587,7 @@ edi_searchpanel_add(Evas_Object *parent)
_elm_code = code;
_info_widget = widget;
eina_spinlock_new(&logs_lock);
elm_object_content_set(frame, widget);
elm_box_pack_end(parent, frame);
@ -302,8 +637,14 @@ edi_taskspanel_find(void)
path = edi_project_get();
if (_searching)
{
ecore_thread_cancel(_search_thread);
while ((ecore_thread_wait(_search_thread, 0.1)) != EINA_TRUE);
}
_search_thread = ecore_thread_feedback_run(_tasks_begin_cb, NULL,
_search_end_cb, _search_cancel_cb,
_search_end_cb, _search_end_cb,
path, EINA_FALSE);
}