OSSP CVS Repository

ossp - ossp-pkg/str/str_search.c
Not logged in
[Honeypot]  [Browse]  [Directory]  [Home]  [Login
[Reports]  [Search]  [Ticket]  [Timeline
  [Raw

ossp-pkg/str/str_search.c
/*
**  OSSP str - String Handling
**  Copyright (c) 1999-2005 Ralf S. Engelschall <rse@engelschall.com>
**  Copyright (c) 1999-2005 The OSSP Project <http://www.ossp.org/>
**
**  This file is part of OSSP str, a string handling and manipulation
**  library which can be found at http://www.ossp.org/pkg/lib/str/.
**
**  Permission to use, copy, modify, and distribute this software for
**  any purpose with or without fee is hereby granted, provided that
**  the above copyright notice and this permission notice appear in all
**  copies.
**
**  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
**  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
**  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
**  IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
**  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
**  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
**  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
**  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
**  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
**  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
**  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
**  SUCH DAMAGE.
**
**  str_search.c: searching functions
*/

#include "str_p.h"

/*
 * str_span -- span over a set of character in a string.
 * This is a spanning function inspired by POSIX strchr(3) and
 * strrchr(3). But it is more flexible, because it provides
 * left-to-right and right-to-left spanning combined with either
 * performing positive or negative charsets. Additionally it allows one
 * to restrict the number of maximum spanned characters.
 */
char *str_span(const char *s, str_size_t m, const char *charset, int mode)
{
    register const char *sp;
    register const char *cp;
    register char sc, cc;
    register int n;
    char *rv;

    if (s == NULL || charset == NULL)
        return NULL;
    n = m;
    if (n == 0)
        str_ilen(n, s);
    rv = NULL;
    if (!(mode & STR_RIGHT) && !(mode & STR_COMPLEMENT)) {
        /* span from left to right while chars are in charset */
        sp = s;
        l1:
        sc = *sp++; n--;
        if (sc != NUL && n >= 0) {
            for (cp = charset; (cc = *cp++) != NUL; )
                if (sc == cc)
                    goto l1;
        }
        rv = (char *)(sp - 1);
    }
    else if ((mode & STR_RIGHT) && !(mode & STR_COMPLEMENT)) {
        /* span from right to left while chars are in charset */
        sp = s;
        while (*sp != NUL && n > 0)
            sp++, n--;
        if (sp > s)
            sp--;
        l2:
        sc = *sp--;
        if (sp >= s) {
            for (cp = charset; (cc = *cp++) != NUL; )
                if (sc == cc)
                    goto l2;
        }
        rv = (char *)(sp + 1);
    }
    else if (!(mode & STR_RIGHT) && (mode & STR_COMPLEMENT)) {
        /* span from left to right while chars are NOT in charset */
        sp = s;
        l3a:
        sc = *sp++; n--;
        if (sc != NUL && n >= 0) {
            for (cp = charset; (cc = *cp++) != NUL; )
                if (sc == cc)
                    goto l3;
            goto l3a;
        }
        l3:
        rv = (char *)(sp - 1);
    }
    else if ((mode & STR_RIGHT) && (mode & STR_COMPLEMENT)) {
        /* span from right to left while chars are NOT in charset */
        sp = s;
        while (*sp != NUL && n > 0)
            sp++, n--;
        if (sp > s)
            sp--;
        l4a:
        sc = *sp--;
        if (sp >= s) {
            for (cp = charset; (cc = *cp++) != NUL; )
                if (sc == cc)
                    goto l4;
            goto l4a;
        }
        l4:
        rv = (char *)(sp + 1);
    }
    return rv;
}

/*
 * str_locate - locate a string in another one.
 * This is an optimized version of SUNDAY's Quick Search algorithm
 * which it turn is a simplification of the Boyer-Moore text searching
 * algorithm. It is very fast in practice for short patterns and long
 * alphabets, because the bad-character shift used in the Boyer-Moore
 * algorithm is not very efficient for small alphabets. But when the
 * alphabet is large compared with the length of the pattern, as it is
 * often the case with the ASCII table and ordinary searches made under
 * a text editor or similar applications, it becomes very useful. Using
 * it alone produces a very efficient algorithm in practice. It has a
 * quadratic worst-case time complexity but a good practical behaviour.
 * The implementation below was speed-optimized, too.
 */
char *str_locate(const char *ay, str_size_t n, const char *ax)
{
    int qs_bc[256 /* 2^8 */];
    register const unsigned char *x = (const unsigned char *)ax;
    register const unsigned char *y = (const unsigned char *)ay;
    register int m;
    register int i;
    register const unsigned char *tx;
    register const unsigned char *ty;
    int tm;

    /*
     * Consistency checks
     */
    if (y == NULL || x == NULL)
        return NULL;
    if (*x == NUL)
        return (char *)y;

    /*
     * Preprocessing
     */

    /* determine length of strings */
    str_ilen(m, x);
    if (n == 0)
        str_ilen(n, y);

    /* fill whole bad char skip array */
    for (i = 0; i < (sizeof(qs_bc)/sizeof(int)); i++)
        qs_bc[(int)i] = m+1;

    /* override positions related to pattern */
    for (i = 0; i < m; i++)
        qs_bc[(int)*(x+i)] = m-i;

    /*
     * Searching
     */

    /* search inside a len-terminated string */
    while (n >= m) {
        /* compare first char (speed-up) */
        if (*y == *x) {
            /* compare remaining chars */
            ty = y+1;
            tx = x+1;
            tm = m-1;
            do {
                if (tm-- == 0)
                    return (char *)y; /* found pattern */
            } while (*ty++ == *tx++);
        }
        i = qs_bc[(int)*(y+m)];  /* perform shift */
        y += i;
        n -= i;
    }

    return NULL;
}


CVSTrac 2.0.1