ossp-pkg/str/str_search.c
/*
** OSSP str - String Handling
** Copyright (c) 1999-2005 Ralf S. Engelschall <rse@engelschall.com>
** Copyright (c) 1999-2005 The OSSP Project <http://www.ossp.org/>
**
** This file is part of OSSP str, a string handling and manipulation
** library which can be found at http://www.ossp.org/pkg/lib/str/.
**
** Permission to use, copy, modify, and distribute this software for
** any purpose with or without fee is hereby granted, provided that
** the above copyright notice and this permission notice appear in all
** copies.
**
** THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
** WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
** IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
** CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
** USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
** OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
** OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
** SUCH DAMAGE.
**
** str_search.c: searching functions
*/
#include "str_p.h"
/*
* str_span -- span over a set of character in a string.
* This is a spanning function inspired by POSIX strchr(3) and
* strrchr(3). But it is more flexible, because it provides
* left-to-right and right-to-left spanning combined with either
* performing positive or negative charsets. Additionally it allows one
* to restrict the number of maximum spanned characters.
*/
char *str_span(const char *s, str_size_t m, const char *charset, int mode)
{
register const char *sp;
register const char *cp;
register char sc, cc;
register int n;
char *rv;
if (s == NULL || charset == NULL)
return NULL;
n = m;
if (n == 0)
str_ilen(n, s);
rv = NULL;
if (!(mode & STR_RIGHT) && !(mode & STR_COMPLEMENT)) {
/* span from left to right while chars are in charset */
sp = s;
l1:
sc = *sp++; n--;
if (sc != NUL && n >= 0) {
for (cp = charset; (cc = *cp++) != NUL; )
if (sc == cc)
goto l1;
}
rv = (char *)(sp - 1);
}
else if ((mode & STR_RIGHT) && !(mode & STR_COMPLEMENT)) {
/* span from right to left while chars are in charset */
sp = s;
while (*sp != NUL && n > 0)
sp++, n--;
if (sp > s)
sp--;
l2:
sc = *sp--;
if (sp >= s) {
for (cp = charset; (cc = *cp++) != NUL; )
if (sc == cc)
goto l2;
}
rv = (char *)(sp + 1);
}
else if (!(mode & STR_RIGHT) && (mode & STR_COMPLEMENT)) {
/* span from left to right while chars are NOT in charset */
sp = s;
l3a:
sc = *sp++; n--;
if (sc != NUL && n >= 0) {
for (cp = charset; (cc = *cp++) != NUL; )
if (sc == cc)
goto l3;
goto l3a;
}
l3:
rv = (char *)(sp - 1);
}
else if ((mode & STR_RIGHT) && (mode & STR_COMPLEMENT)) {
/* span from right to left while chars are NOT in charset */
sp = s;
while (*sp != NUL && n > 0)
sp++, n--;
if (sp > s)
sp--;
l4a:
sc = *sp--;
if (sp >= s) {
for (cp = charset; (cc = *cp++) != NUL; )
if (sc == cc)
goto l4;
goto l4a;
}
l4:
rv = (char *)(sp + 1);
}
return rv;
}
/*
* str_locate - locate a string in another one.
* This is an optimized version of SUNDAY's Quick Search algorithm
* which it turn is a simplification of the Boyer-Moore text searching
* algorithm. It is very fast in practice for short patterns and long
* alphabets, because the bad-character shift used in the Boyer-Moore
* algorithm is not very efficient for small alphabets. But when the
* alphabet is large compared with the length of the pattern, as it is
* often the case with the ASCII table and ordinary searches made under
* a text editor or similar applications, it becomes very useful. Using
* it alone produces a very efficient algorithm in practice. It has a
* quadratic worst-case time complexity but a good practical behaviour.
* The implementation below was speed-optimized, too.
*/
char *str_locate(const char *ay, str_size_t n, const char *ax)
{
int qs_bc[256 /* 2^8 */];
register const unsigned char *x = (const unsigned char *)ax;
register const unsigned char *y = (const unsigned char *)ay;
register int m;
register int i;
register const unsigned char *tx;
register const unsigned char *ty;
int tm;
/*
* Consistency checks
*/
if (y == NULL || x == NULL)
return NULL;
if (*x == NUL)
return (char *)y;
/*
* Preprocessing
*/
/* determine length of strings */
str_ilen(m, x);
if (n == 0)
str_ilen(n, y);
/* fill whole bad char skip array */
for (i = 0; i < (sizeof(qs_bc)/sizeof(int)); i++)
qs_bc[(int)i] = m+1;
/* override positions related to pattern */
for (i = 0; i < m; i++)
qs_bc[(int)*(x+i)] = m-i;
/*
* Searching
*/
/* search inside a len-terminated string */
while (n >= m) {
/* compare first char (speed-up) */
if (*y == *x) {
/* compare remaining chars */
ty = y+1;
tx = x+1;
tm = m-1;
do {
if (tm-- == 0)
return (char *)y; /* found pattern */
} while (*ty++ == *tx++);
}
i = qs_bc[(int)*(y+m)]; /* perform shift */
y += i;
n -= i;
}
return NULL;
}