just spent waaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaay too long working on eina_str_split/full. never again.

this function was severely broken since 1.0: it did not abide by the limit param, and it was almost comically slow. I have (hopefully) fixed both of these issues. I also fixed the bug where this function was less than 200 lines long.

I'll backport this once it gets tested/reviewed/reverted


SVN revision: 71871
This commit is contained in:
Mike Blumenkrantz 2012-06-08 23:28:11 +00:00
parent 1d1cd02899
commit 8dbcdefab9
3 changed files with 160 additions and 33 deletions

View File

@ -24,3 +24,4 @@ Guillaume Friloux <guillaume.friloux@asp64.com>
Jonas M. Gastal <jgastal@profusion.mobi> Jonas M. Gastal <jgastal@profusion.mobi>
Raphael Kubo da Costa <rakuco@freebsd.org> Raphael Kubo da Costa <rakuco@freebsd.org>
Jérôme Pinot <ngc891@gmail.com> Jérôme Pinot <ngc891@gmail.com>
Mike Blumenkrantz <michael.blumenkrantz@gmail.com>

View File

@ -295,4 +295,4 @@
2012-06-08 Mike Blumenkrantz 2012-06-08 Mike Blumenkrantz
* Prevent negative max_token count for eina_str_split functions * Fixed eina_str_split_full() to behave properly and be much faster

View File

@ -35,7 +35,7 @@
#include "eina_private.h" #include "eina_private.h"
#include "eina_str.h" #include "eina_str.h"
#include "eina_safety_checks.h"
/*============================================================================* /*============================================================================*
* Local * * Local *
*============================================================================*/ *============================================================================*/
@ -71,12 +71,38 @@ eina_str_split_full_helper(const char *str,
int max_tokens, int max_tokens,
unsigned int *elements) unsigned int *elements)
{ {
char *s, **str_array; char *s, *pos, **str_array;
const char *src; const char *src;
size_t len, dlen; size_t len, dlen;
unsigned int tokens; unsigned int tokens = 0, x;
const char *idx[256] = {NULL};
EINA_SAFETY_ON_TRUE_RETURN_VAL(max_tokens < 0, NULL); if (max_tokens < 0) max_tokens = 0;
if (max_tokens == 1)
{
str_array = malloc(sizeof(char *) * 2);
if (!str_array)
{
if (elements)
*elements = 0;
return NULL;
}
s = strdup(str);
if (!s)
{
free(str_array);
if (elements)
*elements = 0;
return NULL;
}
if (elements)
*elements = 1;
str_array[0] = s;
return str_array;
}
dlen = strlen(delim); dlen = strlen(delim);
if (dlen == 0) if (dlen == 0)
{ {
@ -86,7 +112,6 @@ eina_str_split_full_helper(const char *str,
return NULL; return NULL;
} }
tokens = 0;
src = str; src = str;
/* count tokens and check strlen(str) */ /* count tokens and check strlen(str) */
while (*src != '\0') while (*src != '\0')
@ -101,15 +126,18 @@ eina_str_split_full_helper(const char *str,
if (EINA_UNLIKELY(d == d_end)) if (EINA_UNLIKELY(d == d_end))
{ {
src = tmp; src = tmp;
if (tokens < (sizeof(idx) / sizeof(idx[0])))
{
idx[tokens] = tmp;
//printf("token %d='%s'\n", tokens + 1, idx[tokens]);
}
tokens++; tokens++;
if (tokens && (tokens == (unsigned int)max_tokens)) break;
} }
else else
src++; src++;
} }
len = src - str; len = src - str + strlen(src);
if ((max_tokens > 0) && (tokens > (unsigned int)max_tokens))
tokens = max_tokens;
str_array = malloc(sizeof(char *) * (tokens + 2)); str_array = malloc(sizeof(char *) * (tokens + 2));
if (!str_array) if (!str_array)
@ -120,6 +148,24 @@ eina_str_split_full_helper(const char *str,
return NULL; return NULL;
} }
if (!tokens)
{
s = strdup(str);
if (!s)
{
free(str_array);
if (elements)
*elements = 0;
return NULL;
}
str_array[0] = s;
str_array[1] = NULL;
if (elements)
*elements = 2;
return str_array;
}
s = malloc(len + 1); s = malloc(len + 1);
if (!s) if (!s)
{ {
@ -130,38 +176,118 @@ eina_str_split_full_helper(const char *str,
return NULL; return NULL;
} }
/* copy tokens and string */
tokens = 0;
str_array[0] = s; str_array[0] = s;
src = str;
while (*src != '\0') if (len == tokens * dlen)
{ {
const char *d = delim, *d_end = d + dlen; /* someone's having a laugh somewhere */
const char *tmp = src; memset(s, 0, len + 1);
for (; (d < d_end) && (*tmp != '\0'); d++, tmp++) for (x = 1; x < tokens + 1; x++)
str_array[x] = s + x;
str_array[x] = NULL;
if (elements)
*elements = x + 1;
return str_array;
}
/* copy tokens and string */
if (idx[0] - str - dlen > len)
{
/* FIXME: don't think this can happen but putting this here just in case */
abort();
}
pos = s;
for (x = 0; x < MIN(tokens, (sizeof(idx) / sizeof(idx[0]))); x++)
{
if (x + 1 < (sizeof(idx) / sizeof(idx[0])))
{ {
if (EINA_LIKELY(*d != *tmp)) /* first one is special */
break; if (!x)
} {
if (EINA_UNLIKELY(d == d_end)) eina_strlcpy(pos, str, idx[x] - str - dlen + 1);
{ str_array[x] = pos;
src = tmp; //printf("str_array[%d] = '%s'\n", x, str_array[x]);
*s = '\0'; pos += idx[x] - str - dlen + 1;
s += dlen; if ((tokens == 1) && (idx[0]))
tokens++; {
str_array[tokens] = s; eina_strlcpy(pos, idx[x], len + 1 - (pos - s));
x++, tokens++;
str_array[x] = pos;
}
}
/* more tokens */
else if (idx[x + 1])
{
eina_strlcpy(pos, idx[x - 1], idx[x] - idx[x - 1] - dlen + 1);
str_array[x] = pos;
//printf("str_array[%d] = '%s'\n", x, str_array[x]);
pos += idx[x] - idx[x - 1] - dlen + 1;
}
/* last token */
else
{
if (max_tokens && ((unsigned int)max_tokens < tokens + 1))
eina_strlcpy(pos, idx[x - 1], len + 1 - (pos - s));
else
{
//printf("diff: %d\n", len + 1 - (pos - s));
eina_strlcpy(pos, idx[x - 1], idx[x] - idx[x - 1] - dlen + 1);
str_array[x] = pos;
//printf("str_array[%d] = '%s'\n", x, str_array[x]);
pos += idx[x] - idx[x - 1] - dlen + 1;
x++, tokens++;
eina_strlcpy(pos, idx[x - 1], len + 1 - (pos - s));
}
str_array[x] = pos;
//printf("str_array[%d] = '%s'\n", x, str_array[x]);
}
} }
/* no more tokens saved after this one */
else else
{ {
*s = *src; eina_strlcpy(pos, idx[x - 1], idx[x] - idx[x - 1] - dlen + 1);
s++; str_array[x] = pos;
src++; //printf("str_array[%d] = '%s'\n", x, str_array[x]);
pos += idx[x] - idx[x - 1] - dlen + 1;
src = idx[x];
x++, tokens++;
str_array[x] = s = pos;
break;
} }
} }
*s = '\0'; if ((x != tokens) && ((!max_tokens) || (x < tokens)))
str_array[tokens + 1] = NULL; {
while (*src != '\0')
{
const char *d = delim, *d_end = d + dlen;
const char *tmp = src;
for (; (d < d_end) && (*tmp != '\0'); d++, tmp++)
{
if (EINA_LIKELY(*d != *tmp))
break;
}
if (((!max_tokens) || (((tokens == (unsigned int)max_tokens) || x < tokens - 2))) && (EINA_UNLIKELY(d == d_end)))
{
src = tmp;
*s = '\0';
s++, x++;
//printf("str_array[%d] = '%s'\n", x, str_array[x - 1]);
str_array[x] = s;
}
else
{
*s = *src;
s++, src++;
}
}
*s = 0;
}
str_array[tokens] = NULL;
if (elements) if (elements)
*elements = (tokens + 1); {
*elements = tokens;
if ((!max_tokens) || (tokens == (unsigned int)max_tokens))
(*elements)++;
}
return str_array; return str_array;
} }