eterm/libmej/strings.c

/*
 * Copyright (C) 1997-2000, Michael Jennings
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal in the Software without restriction, including without limitation the
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies of the Software, its documentation and marketing & publicity
 * materials, and acknowledgment shall be given in the documentation, materials
 * and software packages that this Software was used.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

static const char cvs_ident[] = "$Id$";

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include <libmej.h>

#ifndef HAVE_MEMMEM
/* Find first occurance of bytestring needle of size needlelen in memory region
   haystack of size haystacklen */
void *
memmem(void *haystack, register size_t haystacklen, void *needle, register size_t needlelen)
{
  register char *hs = (char *) haystack;
  register char *n = (char *) needle;
  register unsigned long i;
  register size_t len = haystacklen - needlelen;

  for (i = 0; i < len; i++) {
    if (!memcmp(hs + i, n, needlelen)) {
      return (hs + i);
    }
  }
  return (NULL);
}
#endif

#ifndef HAVE_USLEEP
void
usleep(unsigned long usec)
{
  struct timeval delay;

  delay.tv_sec = 0;
  delay.tv_usec = usec;
  select(0, NULL, NULL, NULL, &delay);

}

#endif

/***** Not needed ******
#ifndef HAVE_NANOSLEEP
__inline__ void
nanosleep(unsigned long nsec) {
    usleep(nsec / 1000);
}
#endif
************************/

/* Return the leftmost cnt characters of str */
char *
left_str(const char *str, unsigned long cnt)
{
  char *tmpstr;

  tmpstr = (char *) MALLOC(cnt + 1);
  strncpy(tmpstr, str, cnt);
  tmpstr[cnt] = 0;
  return (tmpstr);
}

/* Return cnt characters from str, starting at position index (from 0) */
char *
mid_str(const char *str, unsigned long index, unsigned long cnt)
{
  char *tmpstr;
  const char *pstr = str;

  tmpstr = (char *) MALLOC(cnt + 1);
  pstr += index;
  strncpy(tmpstr, pstr, cnt);
  tmpstr[cnt] = 0;
  return (tmpstr);
}

/* Return the rightmost characters of str */
char *
right_str(const char *str, unsigned long cnt)
{
  char *tmpstr;
  const char *pstr = str;

  tmpstr = (char *) MALLOC(cnt + 1);
  pstr += strlen(str);
  pstr -= cnt;
  strcpy(tmpstr, pstr);
  return (tmpstr);
}

/* Returns TRUE if str matches regular expression pattern, FALSE otherwise */
#if defined(HAVE_REGEX_H) || defined(IRIX)
unsigned char
regexp_match(register const char *str, register const char *pattern)
{
  register regex_t *rexp;
  register int result;
  char errbuf[256];

  rexp = (regex_t *) MALLOC(sizeof(regex_t));

  if ((result = regcomp(rexp, pattern, REG_EXTENDED)) != 0) {
    regerror(result, rexp, errbuf, 256);
    fprintf(stderr, "Unable to compile regexp %s -- %s.\n", pattern, errbuf);
    FREE(rexp);
    return (FALSE);
  }

  if (((result = regexec(rexp, str, (size_t) 0, (regmatch_t *) NULL, 0))
       != 0) && (result != REG_NOMATCH)) {
    regerror(result, rexp, errbuf, 256);
    fprintf(stderr, "Error testing input string %s -- %s.\n", str, errbuf);
    FREE(rexp);
    return (FALSE);
  }
  FREE(rexp);
  return (!result);
}
#endif

/* Return malloc'd pointer to index-th word in str.  "..." counts as 1 word. */
#define IS_DELIM(c)  (delim ? ((c) == delim) : (isspace(c)))

char *
get_word(unsigned long index, const char *str)
{
  char *tmpstr;
  char delim = 0;
  register unsigned long i, j, k;

  k = strlen(str) + 1;
  if ((tmpstr = (char *) MALLOC(k)) == NULL) {
    fprintf(stderr, "get_word(%lu, %s):  Unable to allocate memory -- %s.\n",
	    index, str, strerror(errno));
    return ((char *) NULL);
  }
  *tmpstr = 0;
  for (i = 0, j = 0; j < index && str[i]; j++) {
    for (; isspace(str[i]); i++);
    switch (str[i]) {
      case '\"':
	delim = '\"';
	i++;
	break;
      case '\'':
	delim = '\'';
	i++;
	break;
      default:
	delim = 0;
    }
    for (k = 0; str[i] && !IS_DELIM(str[i]);) {
      if (str[i] == '\\') {
	if (str[i + 1] == '\'' || str[i + 1] == '\"') {
	  i++;
	}
      }
      tmpstr[k++] = str[i++];
    }
    switch (str[i]) {
      case '\"':
      case '\'':
	i++;
	break;
    }
    tmpstr[k] = 0;
  }

  if (j != index) {
    FREE(tmpstr);
    D_STRINGS(("get_word(%lu, %s) returning NULL.\n", index, str));
    return ((char *) NULL);
  } else {
    tmpstr = (char *) REALLOC(tmpstr, strlen(tmpstr) + 1);
    D_STRINGS(("get_word(%lu, %s) returning \"%s\".\n", index, str, tmpstr));
    return (tmpstr);
  }
}

/* Return pointer into str to index-th word in str.  "..." counts as 1 word. */
char *
get_pword(unsigned long index, const char *str)
{
  register const char *tmpstr = str;
  register unsigned long j;

  if (!str)
    return ((char *) NULL);
  for (; isspace(*tmpstr) && *tmpstr; tmpstr++);
  for (j = 1; j < index && *tmpstr; j++) {
    for (; !isspace(*tmpstr) && *tmpstr; tmpstr++);
    for (; isspace(*tmpstr) && *tmpstr; tmpstr++);
  }

  if (*tmpstr == '\"' || *tmpstr == '\'') {
    tmpstr++;
  }
  if (*tmpstr == '\0') {
    D_STRINGS(("get_pword(%lu, %s) returning NULL.\n", index, str));
    return ((char *) NULL);
  } else {
    D_STRINGS(("get_pword(%lu, %s) returning \"%s\"\n", index, str, tmpstr));
    return (char *) tmpstr;
  }
}

/* Returns the number of words in str, for use with get_word() and get_pword().  "..." counts as 1 word. */
unsigned long
num_words(const char *str)
{
  register unsigned long cnt = 0;
  char delim = 0;
  register unsigned long i;

  for (i = 0; str[i] && IS_DELIM(str[i]); i++);
  for (; str[i]; cnt++) {
    switch (str[i]) {
      case '\"':
	delim = '\"';
	i++;
	break;
      case '\'':
	delim = '\'';
	i++;
	break;
      default:
	delim = 0;
    }
    for (; str[i] && !IS_DELIM(str[i]); i++);
    switch (str[i]) {
      case '\"':
      case '\'':
	i++;
	break;
    }
    for (; str[i] && isspace(str[i]); i++);
  }

  D_STRINGS(("num_words() returning %lu\n", cnt));
  return (cnt);
}

char *
strip_whitespace(register char *str)
{
  register unsigned long i, j;

  if ((j = strlen(str))) {
    for (i = j - 1; isspace(*(str + i)); i--);
    str[j = i + 1] = 0;
    for (i = 0; isspace(*(str + i)); i++);
    j -= i;
    memmove(str, str + i, j + 1);
  }
  return (str);
}

char *
downcase_str(char *str)
{
  register char *tmp;

  for (tmp = str; *tmp; tmp++) {
    *tmp = tolower(*tmp);
  }
  D_STRINGS(("downcase_str() returning %s\n", str));
  return (str);
}

char *
upcase_str(char *str)
{
  register char *tmp;

  for (tmp = str; *tmp; tmp++) {
    *tmp = toupper(*tmp);
  }
  D_STRINGS(("upcase_str() returning %s\n", str));
  return (str);
}

#ifndef HAVE_STRCASESTR
char *
strcasestr(char *haystack, register const char *needle)
{
  register char *t;
  register size_t len = strlen(needle);

  for (t = haystack; t && *t; t++) {
    if (!strncasecmp(t, needle, len)) {
      return (t);
    }
  }
  return (NULL);
}
#endif

#ifndef HAVE_STRCASECHR
char *
strcasechr(char *haystack, register char needle)
{
  register char *t;

  for (t = haystack; t && *t; t++) {
    if (tolower(*t) == tolower(needle)) {
      return (t);
    }
  }
  return (NULL);
}
#endif

#ifndef HAVE_STRCASEPBRK
char *
strcasepbrk(char *haystack, register char *needle)
{
  register char *t;

  for (t = haystack; t && *t; t++) {
    if (strcasechr(needle, *t)) {
      return (t);
    }
  }
  return (NULL);
}
#endif

#ifndef HAVE_STRREV
char *
strrev(register char *str)
{
  register int i, j;

  i = strlen(str);
  for (j = 0, i--; i > j; i--, j++) {
    SWAP(str[j], str[i]);
  }
  return (str);

}
#endif

#if !(HAVE_STRSEP)
char *
strsep(char **str, register char *sep)
{

  register char *s = *str;
  char *sptr;

  D_STRINGS(("strsep(%s, %s) called.\n", *str, sep));
  sptr = s;
  for (; *s && !strchr(sep, *s); s++);
  if (!*s) {
    if (s != sptr) {
      *str = s;
      D_STRINGS(("Reached end of string with token \"%s\" in buffer\n", sptr));
      return (sptr);
    } else {
      D_STRINGS(("Reached end of string\n"));
      return ((char *) NULL);
    }
  }
  *s = 0;
  *str = s + 1;
  D_STRINGS(("Got token \"%s\", *str == \"%s\"\n", sptr, *str));
  return (sptr);
}
#endif

char *
garbage_collect(char *buff, size_t len)
{

  register char *tbuff = buff, *pbuff = buff;
  register unsigned long i, j;

  D_STRINGS(("Garbage collecting on %lu bytes at %10.8p\n", len, buff));
  for (i = 0, j = 0; j < len; j++)
    if (pbuff[j])
      tbuff[i++] = pbuff[j];
  tbuff[i++] = '\0';
  D_STRINGS(("Garbage collecting gives: \n%s\n", buff));
  return ((char *) REALLOC(buff, sizeof(char) * i));
}

char *
file_garbage_collect(char *buff, size_t len)
{

  register char *tbuff = buff, *pbuff = buff;
  char *tmp1, *tmp2;
  register unsigned long j;

  D_STRINGS(("File garbage collecting on %lu bytes at %10.8p\n", len, buff));
  for (j = 0; j < len;) {
    switch (pbuff[j]) {
      case '#':
	for (; !strchr("\r\n", pbuff[j]) && j < len; j++)
	  pbuff[j] = '\0';	/* First null out the line up to the CR and/or LF */
	for (; strchr("\r\n", pbuff[j]) && j < len; j++)
	  pbuff[j] = '\0';	/* Then null out the CR and/or LF */
	break;
      case '\r':
      case '\n':
      case '\f':
      case ' ':
      case '\t':
      case '\v':
	for (; isspace(pbuff[j]) && j < len; j++)
	  pbuff[j] = '\0';	/* Null out the whitespace */
	break;
      default:
	/* Find the end of this line and the occurence of the
	   next mid-line comment. */
	tmp1 = strpbrk(pbuff + j, "\r\n");
	tmp2 = strstr(pbuff + j, " #");

	/* If either is null, take the non-null one.  Otherwise,
	   take the lesser of the two. */
	if (!tmp1 || !tmp2) {
	  tbuff = ((tmp1) ? (tmp1) : (tmp2));
	} else {
	  tbuff = ((tmp1 < tmp2) ? (tmp1) : (tmp2));
	}

	/* Now let j catch up so that pbuff+j = tbuff; i.e., let
	   pbuff[j] refer to the same character that tbuff does */
	j += tbuff - (pbuff + j);

	/* Finally, change whatever is at pbuff[j] to a newline.
	   This will accomplish several things at once:
	   o It will change a \r to a \n if that's what's there
	   o If it's a \n, it'll stay the same.  No biggie.
	   o If it's a space, it will end the line there and the
	   next line will begin with a comment, which is handled
	   above. */
	if (j < len)
	  pbuff[j++] = '\n';

    }
  }

  /* Change all occurances of a backslash followed by a newline to nulls
     and null out all whitespace up to the next non-whitespace character.
     This handles support for breaking a string across multiple lines. */
  for (j = 0; j < len; j++) {
    if (pbuff[j] == '\\' && pbuff[j + 1] == '\n') {
      pbuff[j++] = '\0';
      for (; isspace(pbuff[j]) && j < len; j++)
	pbuff[j] = '\0';	/* Null out the whitespace */
    }
  }

  /* And the final step, garbage collect the buffer to condense all
     those nulls we just put in. */
  return (garbage_collect(buff, len));
}

char *
condense_whitespace(char *s)
{

  register unsigned char gotspc = 0;
  register char *pbuff = s, *pbuff2 = s;

  D_STRINGS(("condense_whitespace(%s) called.\n", s));
  for (; *pbuff2; pbuff2++) {
    if (isspace(*pbuff2)) {
      if (!gotspc) {
	*pbuff = ' ';
	gotspc = 1;
	pbuff++;
      }
    } else {
      *pbuff = *pbuff2;
      gotspc = 0;
      pbuff++;
    }
  }
  if ((pbuff >= s) && (isspace(*(pbuff - 1))))
    pbuff--;
  *pbuff = 0;
  D_STRINGS(("condense_whitespace() returning \"%s\".\n", s));
  return (REALLOC(s, strlen(s) + 1));
}

char *
safe_str(register char *str, unsigned short len)
{
  register unsigned short i;

  for (i = 0; i < len; i++) {
    if (iscntrl(str[i])) {
      str[i] = '.';
    }
  }

  return (str);
}

void
hex_dump(void *buff, register size_t count)
{

  register unsigned long j, k, l;
  register unsigned char *ptr;
  unsigned char buffr[9];

  fprintf(stderr, " Address |  Size  | Offset  | 00 01 02 03 04 05 06 07 |  ASCII  \n");
  fprintf(stderr, "---------+--------+---------+-------------------------+---------\n");
  for (ptr = (unsigned char *) buff, j = 0; j < count; j += 8) {
    fprintf(stderr, " %8p | %06lu | %07x | ", buff, (unsigned long) count, (unsigned int) j);
    l = ((count - j < 8) ? (count - j) : (8));
    memset(buffr, 0, 9);
    memcpy(buffr, ptr + j, l);
    for (k = 0; k < l; k++) {
      fprintf(stderr, "%02x ", buffr[k]);
    }
    for (; k < 8; k++) {
      fprintf(stderr, "   ");
    }
    fprintf(stderr, "| %-8s\n", safe_str((char *) buffr, l));
  }
}