/* ** OSSP str - String Handling ** Copyright (c) 1999-2005 Ralf S. Engelschall ** Copyright (c) 1999-2005 The OSSP Project ** ** This file is part of OSSP str, a string handling and manipulation ** library which can be found at http://www.ossp.org/pkg/lib/str/. ** ** Permission to use, copy, modify, and distribute this software for ** any purpose with or without fee is hereby granted, provided that ** the above copyright notice and this permission notice appear in all ** copies. ** ** THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED ** WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ** IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR ** CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF ** USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, ** OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT ** OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ** SUCH DAMAGE. ** ** str_search.c: searching functions */ #include "str_p.h" /* * str_span -- span over a set of character in a string. * This is a spanning function inspired by POSIX strchr(3) and * strrchr(3). But it is more flexible, because it provides * left-to-right and right-to-left spanning combined with either * performing positive or negative charsets. Additionally it allows one * to restrict the number of maximum spanned characters. */ char *str_span(const char *s, str_size_t m, const char *charset, int mode) { register const char *sp; register const char *cp; register char sc, cc; register int n; char *rv; if (s == NULL || charset == NULL) return NULL; n = m; if (n == 0) str_ilen(n, s); rv = NULL; if (!(mode & STR_RIGHT) && !(mode & STR_COMPLEMENT)) { /* span from left to right while chars are in charset */ sp = s; l1: sc = *sp++; n--; if (sc != NUL && n >= 0) { for (cp = charset; (cc = *cp++) != NUL; ) if (sc == cc) goto l1; } rv = (char *)(sp - 1); } else if ((mode & STR_RIGHT) && !(mode & STR_COMPLEMENT)) { /* span from right to left while chars are in charset */ sp = s; while (*sp != NUL && n > 0) sp++, n--; if (sp > s) sp--; l2: sc = *sp--; if (sp >= s) { for (cp = charset; (cc = *cp++) != NUL; ) if (sc == cc) goto l2; } rv = (char *)(sp + 1); } else if (!(mode & STR_RIGHT) && (mode & STR_COMPLEMENT)) { /* span from left to right while chars are NOT in charset */ sp = s; l3a: sc = *sp++; n--; if (sc != NUL && n >= 0) { for (cp = charset; (cc = *cp++) != NUL; ) if (sc == cc) goto l3; goto l3a; } l3: rv = (char *)(sp - 1); } else if ((mode & STR_RIGHT) && (mode & STR_COMPLEMENT)) { /* span from right to left while chars are NOT in charset */ sp = s; while (*sp != NUL && n > 0) sp++, n--; if (sp > s) sp--; l4a: sc = *sp--; if (sp >= s) { for (cp = charset; (cc = *cp++) != NUL; ) if (sc == cc) goto l4; goto l4a; } l4: rv = (char *)(sp + 1); } return rv; } /* * str_locate - locate a string in another one. * This is an optimized version of SUNDAY's Quick Search algorithm * which it turn is a simplification of the Boyer-Moore text searching * algorithm. It is very fast in practice for short patterns and long * alphabets, because the bad-character shift used in the Boyer-Moore * algorithm is not very efficient for small alphabets. But when the * alphabet is large compared with the length of the pattern, as it is * often the case with the ASCII table and ordinary searches made under * a text editor or similar applications, it becomes very useful. Using * it alone produces a very efficient algorithm in practice. It has a * quadratic worst-case time complexity but a good practical behaviour. * The implementation below was speed-optimized, too. */ char *str_locate(const char *ay, str_size_t n, const char *ax) { int qs_bc[256 /* 2^8 */]; register const unsigned char *x = (const unsigned char *)ax; register const unsigned char *y = (const unsigned char *)ay; register int m; register int i; register const unsigned char *tx; register const unsigned char *ty; int tm; /* * Consistency checks */ if (y == NULL || x == NULL) return NULL; if (*x == NUL) return (char *)y; /* * Preprocessing */ /* determine length of strings */ str_ilen(m, x); if (n == 0) str_ilen(n, y); /* fill whole bad char skip array */ for (i = 0; i < (sizeof(qs_bc)/sizeof(int)); i++) qs_bc[(int)i] = m+1; /* override positions related to pattern */ for (i = 0; i < m; i++) qs_bc[(int)*(x+i)] = m-i; /* * Searching */ /* search inside a len-terminated string */ while (n >= m) { /* compare first char (speed-up) */ if (*y == *x) { /* compare remaining chars */ ty = y+1; tx = x+1; tm = m-1; do { if (tm-- == 0) return (char *)y; /* found pattern */ } while (*ty++ == *tx++); } i = qs_bc[(int)*(y+m)]; /* perform shift */ y += i; n -= i; } return NULL; }