eterm/libmej/strings.c

550 lines
13 KiB
C
Raw Normal View History

/*
* Copyright (C) 1997-2000, Michael Jennings
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies of the Software, its documentation and marketing & publicity
* materials, and acknowledgment shall be given in the documentation, materials
* and software packages that this Software was used.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
static const char cvs_ident[] = "$Id$";
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <libmej.h>
#ifndef HAVE_MEMMEM
/* Find first occurance of bytestring needle of size needlelen in memory region
haystack of size haystacklen */
void *
memmem(void *haystack, register size_t haystacklen, void *needle, register size_t needlelen)
{
register char *hs = (char *) haystack;
register char *n = (char *) needle;
register unsigned long i;
register size_t len = haystacklen - needlelen;
for (i = 0; i < len; i++) {
if (!memcmp(hs + i, n, needlelen)) {
return (hs + i);
}
}
return (NULL);
}
#endif
#ifndef HAVE_USLEEP
void
usleep(unsigned long usec)
{
struct timeval delay;
delay.tv_sec = 0;
delay.tv_usec = usec;
select(0, NULL, NULL, NULL, &delay);
}
#endif
/***** Not needed ******
#ifndef HAVE_NANOSLEEP
Thu Feb 10 15:10:01 PST 2000 Michael Jennings <mej@eterm.org> This is the first public availability of the work thus far on Eterm 0.9.1. There's quite a bit of new stuff here. * Added scrollbar thumb support. * Completely redid the terminfo/termcap stuff. The terminfo file is now compiled (by tic) and installed by default (unless you specify --without-terminfo). The config files still say xterm, though, because some programs (like SLang and GNU mc) use the silly algorithm of "Is $TERM set to xterm?" to detect mouse reporting support in a terminal. =P But if you don't ever use xterm, you can use Eterm's termcap and just name it "xterm" instead. Thanks to Marius Gedminas <mgedmin@takas.lt> for his patch that started this whole revamp. * Added the kEsetroot script for KDE users from Dax Games <dgames@isoc.net>. * You can now configure the Home and End emulation via --with-home= and --with-end= options to configure. The --with-terminfo option is also new, and --enable-xim is now the default. * Added a new image state, disabled, for when Eterm loses focus. This is supported by all widgets (well, all those that could possibly be on screen when Eterm lost focus), even the background image. So you could actually have all your images darken on focus out and restore to normal on focus in. * Widget colors formerly dealt with as colors (menu text color, scrollbar color, etc.) are now handled by the imageclasses. Each image state can have a foreground and background color defined. The current exception is the background image; I hope to add that later. The foreground is the text color and the background is the object color (for solid color mode). So menu text color is set by the menu imageclass. And again, for unfocused colors, use the disabled state of the imageclass. * Proportionally-spaced fonts are now handled much better. They are still forced into evenly-spaced columns (it's a terminal for crying out loud!) but at least you don't end up with Eterm's wider than your screen. :-) * Home on refresh is gone, as is home on echo. It's now much simpler. There are two options: home on output, and home on input, the former being a combination of echo and refresh. Also, keypresses that don't necessarily have corresonding output can trigger a home on input, like Ctrl-End or whatever...ones that don't have special meaning. Credit to Darren Stuart Embry <dse@louisville.edu> for pointing out this issue and the one with "m-" in font names. * I finally got around to re-merging the new parser stuff from my work on the Not Game. Closed up some old potential behavior quirks with theme parsing. * Added a new escape sequence to fork-and-exec a program. Also added a scrollback search capability to highlight all occurances of a string in your scrollback buffer. Use the new "Etsearch" utility to access it. "Etsearch string" to search for a string, then "Etsearch" by itself to reset the highlighting. * And of course, the biggie. Eterm now supports a completely- customizeable buttonbar. Not a menubar, a buttonbar. It can have an arbitrary number of buttons, and each button can perform an action, just like a menuitem. So a button could bring up a menu (like a menubar) or launch a program (like a launchbar) or perform an operation (like a toolbar). Each button can have an icon, text, or both. And you can have buttons left- or right-justified in the buttonbar. You will eventually be able to have an arbitrary number of buttonbars, but I'm still working on that. As with any change this big, things could very easily be broken. So beware. :-) I have tested this myself, and everything seems to work, but I can't test every possibility. Let me know if you find anything that's broken, and enjoy! SVN revision: 2048
2000-02-10 16:25:07 -08:00
__inline__ void
nanosleep(unsigned long nsec) {
usleep(nsec / 1000);
}
#endif
************************/
/* Return the leftmost cnt characters of str */
char *
left_str(const char *str, unsigned long cnt)
{
char *tmpstr;
tmpstr = (char *) MALLOC(cnt + 1);
strncpy(tmpstr, str, cnt);
tmpstr[cnt] = 0;
return (tmpstr);
}
/* Return cnt characters from str, starting at position index (from 0) */
char *
mid_str(const char *str, unsigned long index, unsigned long cnt)
{
char *tmpstr;
const char *pstr = str;
tmpstr = (char *) MALLOC(cnt + 1);
pstr += index;
strncpy(tmpstr, pstr, cnt);
tmpstr[cnt] = 0;
return (tmpstr);
}
/* Return the rightmost characters of str */
char *
right_str(const char *str, unsigned long cnt)
{
char *tmpstr;
const char *pstr = str;
tmpstr = (char *) MALLOC(cnt + 1);
pstr += strlen(str);
pstr -= cnt;
strcpy(tmpstr, pstr);
return (tmpstr);
}
/* Returns TRUE if str matches regular expression pattern, FALSE otherwise */
#if defined(HAVE_REGEX_H) || defined(IRIX)
unsigned char
regexp_match(register const char *str, register const char *pattern)
{
register regex_t *rexp;
register int result;
char errbuf[256];
rexp = (regex_t *) MALLOC(sizeof(regex_t));
if ((result = regcomp(rexp, pattern, REG_EXTENDED)) != 0) {
regerror(result, rexp, errbuf, 256);
fprintf(stderr, "Unable to compile regexp %s -- %s.\n", pattern, errbuf);
FREE(rexp);
return (FALSE);
}
if (((result = regexec(rexp, str, (size_t) 0, (regmatch_t *) NULL, 0))
!= 0) && (result != REG_NOMATCH)) {
regerror(result, rexp, errbuf, 256);
fprintf(stderr, "Error testing input string %s -- %s.\n", str, errbuf);
FREE(rexp);
return (FALSE);
}
FREE(rexp);
return (!result);
}
#endif
/* Return malloc'd pointer to index-th word in str. "..." counts as 1 word. */
#define IS_DELIM(c) (delim ? ((c) == delim) : (isspace(c)))
char *
get_word(unsigned long index, const char *str)
{
char *tmpstr;
char delim = 0;
register unsigned long i, j, k;
k = strlen(str) + 1;
if ((tmpstr = (char *) MALLOC(k)) == NULL) {
fprintf(stderr, "get_word(%lu, %s): Unable to allocate memory -- %s.\n",
index, str, strerror(errno));
return ((char *) NULL);
}
*tmpstr = 0;
for (i = 0, j = 0; j < index && str[i]; j++) {
for (; isspace(str[i]); i++);
switch (str[i]) {
case '\"':
delim = '\"';
i++;
break;
case '\'':
delim = '\'';
i++;
break;
default:
delim = 0;
}
for (k = 0; str[i] && !IS_DELIM(str[i]);) {
if (str[i] == '\\') {
if (str[i + 1] == '\'' || str[i + 1] == '\"') {
i++;
}
}
tmpstr[k++] = str[i++];
}
switch (str[i]) {
case '\"':
case '\'':
i++;
break;
}
tmpstr[k] = 0;
}
if (j != index) {
FREE(tmpstr);
D_STRINGS(("get_word(%lu, %s) returning NULL.\n", index, str));
return ((char *) NULL);
} else {
tmpstr = (char *) REALLOC(tmpstr, strlen(tmpstr) + 1);
D_STRINGS(("get_word(%lu, %s) returning \"%s\".\n", index, str, tmpstr));
return (tmpstr);
}
}
/* Return pointer into str to index-th word in str. "..." counts as 1 word. */
char *
get_pword(unsigned long index, const char *str)
{
Fri May 26 20:43:03 PDT 2000 Michael Jennings <mej@eterm.org> Okay, there are a few changes here. First off, I made multi-byte font support the default now, as long as you have ISO 10646 fonts. In order to do this, I made the default encoding type "Latin1" so as not to interfere with 8-bit ISO 8859-1 characters. This means that if you relied on the default multi-byte encoding method to be SJIS, you'll need to update your theme files. I also set it up so that Eterm will ignore SIGHUP, at least until I do something with it (like reloading the theme or something). I fixed the proportional font size algorithm. If there is more than a 3-pixel variance between the minimum and maximum sizes for glyphs in a proportional font, Eterm will set the size to 2 standard deviations above the average width. This is so that they won't look so spread out and ugly, but it still doesn't look perfect. Not much I can do on that front...terminals must have fixed-width columns. And then there's the biggie. I put in the ability to configure the now-infamous font effects. I left a black drop shadow in as the default, but you can now customize it via the --font-fx option or in the config file using "font effects <stuff>" in the attributes context. You can even use "fx" instead of "effects" for short. So what goes in the <stuff> part? Well, you have several options. To use a single-color outline, say "outline <color>". Likewise, a single-color drop shadow is "shadow [corner] <color>"; "bottom_right" is the default corner if you don't specify one. For a 3-D embossed look, "emboss <dark_color> <light_color>". The opposite, a carved- out look, can be had with "carved <dark_color> <light_color>". (Of course, with those last two, the 3-D look will only work if you choose the colors wisely.) Those are all the shortcuts. The long way is to specify a series of corner/color pairs, like "tl blue" for top-left blue, or "bottom_right green". You can abbreviate using "tl," "tr," "bl," or "br," or you can spell out "top_left," "top_right," "bottom_left," or "bottom_right." If you omit a corner name, the first one defaults to top-left, the second to top-right, and so on as listed above. SVN revision: 2714
2000-05-26 20:41:22 -07:00
register const char *tmpstr = str;
register unsigned long j;
if (!str)
return ((char *) NULL);
for (; isspace(*tmpstr) && *tmpstr; tmpstr++);
for (j = 1; j < index && *tmpstr; j++) {
for (; !isspace(*tmpstr) && *tmpstr; tmpstr++);
for (; isspace(*tmpstr) && *tmpstr; tmpstr++);
}
if (*tmpstr == '\"' || *tmpstr == '\'') {
tmpstr++;
}
if (*tmpstr == '\0') {
D_STRINGS(("get_pword(%lu, %s) returning NULL.\n", index, str));
return ((char *) NULL);
} else {
D_STRINGS(("get_pword(%lu, %s) returning \"%s\"\n", index, str, tmpstr));
Fri May 26 20:43:03 PDT 2000 Michael Jennings <mej@eterm.org> Okay, there are a few changes here. First off, I made multi-byte font support the default now, as long as you have ISO 10646 fonts. In order to do this, I made the default encoding type "Latin1" so as not to interfere with 8-bit ISO 8859-1 characters. This means that if you relied on the default multi-byte encoding method to be SJIS, you'll need to update your theme files. I also set it up so that Eterm will ignore SIGHUP, at least until I do something with it (like reloading the theme or something). I fixed the proportional font size algorithm. If there is more than a 3-pixel variance between the minimum and maximum sizes for glyphs in a proportional font, Eterm will set the size to 2 standard deviations above the average width. This is so that they won't look so spread out and ugly, but it still doesn't look perfect. Not much I can do on that front...terminals must have fixed-width columns. And then there's the biggie. I put in the ability to configure the now-infamous font effects. I left a black drop shadow in as the default, but you can now customize it via the --font-fx option or in the config file using "font effects <stuff>" in the attributes context. You can even use "fx" instead of "effects" for short. So what goes in the <stuff> part? Well, you have several options. To use a single-color outline, say "outline <color>". Likewise, a single-color drop shadow is "shadow [corner] <color>"; "bottom_right" is the default corner if you don't specify one. For a 3-D embossed look, "emboss <dark_color> <light_color>". The opposite, a carved- out look, can be had with "carved <dark_color> <light_color>". (Of course, with those last two, the 3-D look will only work if you choose the colors wisely.) Those are all the shortcuts. The long way is to specify a series of corner/color pairs, like "tl blue" for top-left blue, or "bottom_right green". You can abbreviate using "tl," "tr," "bl," or "br," or you can spell out "top_left," "top_right," "bottom_left," or "bottom_right." If you omit a corner name, the first one defaults to top-left, the second to top-right, and so on as listed above. SVN revision: 2714
2000-05-26 20:41:22 -07:00
return (char *) tmpstr;
}
}
/* Returns the number of words in str, for use with get_word() and get_pword(). "..." counts as 1 word. */
unsigned long
num_words(const char *str)
{
register unsigned long cnt = 0;
char delim = 0;
register unsigned long i;
for (i = 0; str[i] && IS_DELIM(str[i]); i++);
for (; str[i]; cnt++) {
switch (str[i]) {
case '\"':
delim = '\"';
i++;
break;
case '\'':
delim = '\'';
i++;
break;
default:
delim = 0;
}
for (; str[i] && !IS_DELIM(str[i]); i++);
switch (str[i]) {
case '\"':
case '\'':
i++;
break;
}
for (; str[i] && isspace(str[i]); i++);
}
D_STRINGS(("num_words() returning %lu\n", cnt));
return (cnt);
}
char *
strip_whitespace(register char *str)
{
register unsigned long i, j;
if ((j = strlen(str))) {
for (i = j - 1; isspace(*(str + i)); i--);
str[j = i + 1] = 0;
for (i = 0; isspace(*(str + i)); i++);
j -= i;
memmove(str, str + i, j + 1);
}
return (str);
}
char *
downcase_str(char *str)
{
register char *tmp;
for (tmp = str; *tmp; tmp++) {
*tmp = tolower(*tmp);
}
D_STRINGS(("downcase_str() returning %s\n", str));
return (str);
}
char *
upcase_str(char *str)
{
register char *tmp;
for (tmp = str; *tmp; tmp++) {
*tmp = toupper(*tmp);
}
D_STRINGS(("upcase_str() returning %s\n", str));
return (str);
}
#ifndef HAVE_STRCASESTR
char *
strcasestr(char *haystack, register const char *needle)
{
register char *t;
register size_t len = strlen(needle);
for (t = haystack; t && *t; t++) {
if (!strncasecmp(t, needle, len)) {
return (t);
}
}
return (NULL);
}
#endif
#ifndef HAVE_STRCASECHR
char *
strcasechr(char *haystack, register char needle)
{
register char *t;
for (t = haystack; t && *t; t++) {
if (tolower(*t) == tolower(needle)) {
return (t);
}
}
return (NULL);
}
#endif
#ifndef HAVE_STRCASEPBRK
char *
strcasepbrk(char *haystack, register char *needle)
{
register char *t;
for (t = haystack; t && *t; t++) {
if (strcasechr(needle, *t)) {
return (t);
}
}
return (NULL);
}
#endif
#ifndef HAVE_STRREV
char *
strrev(register char *str)
{
register int i, j;
i = strlen(str);
for (j = 0, i--; i > j; i--, j++) {
SWAP(str[j], str[i]);
}
return (str);
}
#endif
#if !(HAVE_STRSEP)
char *
strsep(char **str, register char *sep)
{
register char *s = *str;
char *sptr;
D_STRINGS(("strsep(%s, %s) called.\n", *str, sep));
sptr = s;
for (; *s && !strchr(sep, *s); s++);
if (!*s) {
if (s != sptr) {
*str = s;
D_STRINGS(("Reached end of string with token \"%s\" in buffer\n", sptr));
return (sptr);
} else {
D_STRINGS(("Reached end of string\n"));
return ((char *) NULL);
}
}
*s = 0;
*str = s + 1;
D_STRINGS(("Got token \"%s\", *str == \"%s\"\n", sptr, *str));
return (sptr);
}
#endif
char *
garbage_collect(char *buff, size_t len)
{
register char *tbuff = buff, *pbuff = buff;
register unsigned long i, j;
D_STRINGS(("Garbage collecting on %lu bytes at %10.8p\n", len, buff));
for (i = 0, j = 0; j < len; j++)
if (pbuff[j])
tbuff[i++] = pbuff[j];
tbuff[i++] = '\0';
D_STRINGS(("Garbage collecting gives: \n%s\n", buff));
return ((char *) REALLOC(buff, sizeof(char) * i));
}
char *
file_garbage_collect(char *buff, size_t len)
{
register char *tbuff = buff, *pbuff = buff;
char *tmp1, *tmp2;
register unsigned long j;
D_STRINGS(("File garbage collecting on %lu bytes at %10.8p\n", len, buff));
for (j = 0; j < len;) {
switch (pbuff[j]) {
case '#':
for (; !strchr("\r\n", pbuff[j]) && j < len; j++)
pbuff[j] = '\0'; /* First null out the line up to the CR and/or LF */
for (; strchr("\r\n", pbuff[j]) && j < len; j++)
pbuff[j] = '\0'; /* Then null out the CR and/or LF */
break;
case '\r':
case '\n':
case '\f':
case ' ':
case '\t':
case '\v':
for (; isspace(pbuff[j]) && j < len; j++)
pbuff[j] = '\0'; /* Null out the whitespace */
break;
default:
/* Find the end of this line and the occurence of the
next mid-line comment. */
tmp1 = strpbrk(pbuff + j, "\r\n");
tmp2 = strstr(pbuff + j, " #");
/* If either is null, take the non-null one. Otherwise,
take the lesser of the two. */
if (!tmp1 || !tmp2) {
tbuff = ((tmp1) ? (tmp1) : (tmp2));
} else {
tbuff = ((tmp1 < tmp2) ? (tmp1) : (tmp2));
}
/* Now let j catch up so that pbuff+j = tbuff; i.e., let
pbuff[j] refer to the same character that tbuff does */
j += tbuff - (pbuff + j);
/* Finally, change whatever is at pbuff[j] to a newline.
This will accomplish several things at once:
o It will change a \r to a \n if that's what's there
o If it's a \n, it'll stay the same. No biggie.
o If it's a space, it will end the line there and the
next line will begin with a comment, which is handled
above. */
if (j < len)
pbuff[j++] = '\n';
}
}
/* Change all occurances of a backslash followed by a newline to nulls
and null out all whitespace up to the next non-whitespace character.
This handles support for breaking a string across multiple lines. */
for (j = 0; j < len; j++) {
if (pbuff[j] == '\\' && pbuff[j + 1] == '\n') {
pbuff[j++] = '\0';
for (; isspace(pbuff[j]) && j < len; j++)
pbuff[j] = '\0'; /* Null out the whitespace */
}
}
/* And the final step, garbage collect the buffer to condense all
those nulls we just put in. */
return (garbage_collect(buff, len));
}
char *
condense_whitespace(char *s)
{
register unsigned char gotspc = 0;
register char *pbuff = s, *pbuff2 = s;
D_STRINGS(("condense_whitespace(%s) called.\n", s));
for (; *pbuff2; pbuff2++) {
if (isspace(*pbuff2)) {
if (!gotspc) {
*pbuff = ' ';
gotspc = 1;
pbuff++;
}
} else {
*pbuff = *pbuff2;
gotspc = 0;
pbuff++;
}
}
if ((pbuff >= s) && (isspace(*(pbuff - 1))))
pbuff--;
*pbuff = 0;
D_STRINGS(("condense_whitespace() returning \"%s\".\n", s));
return (REALLOC(s, strlen(s) + 1));
}
char *
safe_str(register char *str, unsigned short len)
{
register unsigned short i;
for (i = 0; i < len; i++) {
if (iscntrl(str[i])) {
str[i] = '.';
}
}
return (str);
}
void
hex_dump(void *buff, register size_t count)
{
register unsigned long j, k, l;
register unsigned char *ptr;
unsigned char buffr[9];
fprintf(stderr, " Address | Size | Offset | 00 01 02 03 04 05 06 07 | ASCII \n");
fprintf(stderr, "---------+--------+---------+-------------------------+---------\n");
for (ptr = (unsigned char *) buff, j = 0; j < count; j += 8) {
fprintf(stderr, " %8p | %06lu | %07x | ", buff, (unsigned long) count, (unsigned int) j);
l = ((count - j < 8) ? (count - j) : (8));
memset(buffr, 0, 9);
memcpy(buffr, ptr + j, l);
for (k = 0; k < l; k++) {
fprintf(stderr, "%02x ", buffr[k]);
}
for (; k < 8; k++) {
fprintf(stderr, " ");
}
fprintf(stderr, "| %-8s\n", safe_str((char *) buffr, l));
}
}