OSSP CVS Repository

ossp - ossp-pkg/str/str_parse.c
Not logged in
[Honeypot]  [Browse]  [Directory]  [Home]  [Login
[Reports]  [Search]  [Ticket]  [Timeline
  [Raw

ossp-pkg/str/str_parse.c
/*
**  OSSP str - String Handling
**  Copyright (c) 1999-2005 Ralf S. Engelschall <rse@engelschall.com>
**  Copyright (c) 1999-2005 The OSSP Project <http://www.ossp.org/>
**
**  This file is part of OSSP str, a string handling and manipulation
**  library which can be found at http://www.ossp.org/pkg/lib/str/.
**
**  Permission to use, copy, modify, and distribute this software for
**  any purpose with or without fee is hereby granted, provided that
**  the above copyright notice and this permission notice appear in all
**  copies.
**
**  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
**  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
**  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
**  IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
**  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
**  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
**  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
**  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
**  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
**  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
**  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
**  SUCH DAMAGE.
**
**  str_parse.c: parsing functions
*/

#include "str_p.h"

/* compile a regular expression pattern from string into internal format */
static int
pattern_compile(
    const char *ptr,
    int len,
    int opt,
    pcre **p_pcre,
    pcre_extra **p_pcre_extra)
{
    const char *err_str;
    char buf[128];
    int err_pos;
    char *cp;

    if (ptr[len] == NUL) {
        /* plain string, so we can speed up processing... */
        *p_pcre = pcre_compile(ptr, opt, &err_str, &err_pos, NULL);
    }
    else {
        /* ...else we have to create a temporary NUL-terminated string */
        if (len < sizeof(buf)) {
            /* either we use a local buffer to avoid malloc/free ping-pong... */
            memcpy(buf, ptr, len);
            buf[len] = NUL;
            *p_pcre = pcre_compile(buf, opt, &err_str, &err_pos, NULL);
        }
        else {
            /* ...or we have to actually allocate a memory chunk :-( */
            if ((cp = malloc(len+1)) == NULL)
                return FALSE;
            memcpy(cp, ptr, len);
            cp[len] = NUL;
            *p_pcre = pcre_compile(cp, opt, &err_str, &err_pos, NULL);
            free(cp);
        }
    }
    if (*p_pcre == NULL)
        return FALSE;

    /* optionally study pattern */
    if (p_pcre_extra != NULL) {
        *p_pcre_extra = pcre_study(*p_pcre, 0, &err_str);
        if (err_str != NULL) {
            free(p_pcre);
            return FALSE;
        }
    }
    return TRUE;

}

/* the hash table entry in the pattern cache */
struct hash_entry {
    struct hash_entry *next;
    char *key;
    int keylen;
    pcre *p_pcre;
    pcre_extra *p_pcre_extra;
};

/* size of the cache hash table; is prime */
#define HASH_SIZE 101

/* the pattern cache hash table */
static struct hash_entry *pattern_hash[HASH_SIZE];

/* initialization flag for hash table */
static int hash_initialized = FALSE;

/* initialize cache hash table */
static void
hash_init(void)
{
    int i;
    for (i = 0; i < HASH_SIZE; i++)
        pattern_hash[i] = NULL;
    return;
}

/* destroy cache hash table */
static void
hash_destroy(void)
{
    int i;
    struct hash_entry *he, *ohe;

    for (i = 0; i < HASH_SIZE; i++) {
        he = pattern_hash[i];
        pattern_hash[i] = NULL;
        while (he != NULL) {
            ohe = he;
            he = he->next;
            if (ohe->key != NULL)
                free(ohe->key);
            free(ohe);
        }
    }
    return;
}

/* the hashing function: a popular `times 33' hash */
static unsigned int
hash_func(
    const char *key,
    int keylen)
{
    unsigned int h;
    int i;

    h = 0xDEAD;
    for (i = 0; key[i] != NUL; i++)
        h = ((((h<<5)+h)+key[i]) % HASH_SIZE);
    return h;
}

/* cache a pattern */
static void
pattern_cache(
    const char *key,
    int keylen,
    pcre *p_pcre,
    pcre_extra *p_pcre_extra)
{
    int h;
    struct hash_entry *he, *che;

    if ((he = (struct hash_entry *)malloc(sizeof(struct hash_entry))) == NULL)
        return;
    if ((he->key = malloc(keylen)) == NULL) {
        free(he);
        return;
    }
    he->next = NULL;
    memcpy(he->key, key, keylen);
    he->keylen = keylen;
    he->p_pcre = p_pcre;
    he->p_pcre_extra = p_pcre_extra;
    h = hash_func(key, keylen);
    if (pattern_hash[h] == NULL)
        pattern_hash[h] = he;
    else {
        che = pattern_hash[h];
        while (che->next != NULL)
             che = che->next;
        che->next = he;
    }
    return;
}

/* lookup a pattern */
static void
pattern_lookup(
    const char *key,
    int keylen,
    pcre **p_pcre,
    pcre_extra **p_pcre_extra)
{
    int h;
    struct hash_entry *he;

    *p_pcre = NULL;
    *p_pcre_extra = NULL;

    h = hash_func(key, keylen);
    he = pattern_hash[h];
    while (he != NULL) {
        if (he->keylen == keylen)
            if (memcmp(he->key, key, keylen))
                break;
        he = he->next;
    }
    if (he == NULL)
        return;
    *p_pcre = he->p_pcre;
    *p_pcre_extra = he->p_pcre_extra;
    return;
}

static int
str_parse_flush_nop(
    str_vformat_t *sf)
{
    sf->data[2].i = sf->data[2].i + sf->data[1].i;
    sf->curpos = (char *)sf->data[0].p;
    return 0;
}

static int
str_parse_flush_str(
    str_vformat_t *sf)
{
    return -1;
}

static char *
str_parse_format(
    str_vformat_t *sf,
    char *cpPrefix,
    char *cpPad,
    int *ipStrLen,
    char *cpBuf,
    int nBufLen,
    char *cpExtinfo,
    int cFmt,
    va_list ap)
{
    char *pStr;
    int n;
    int *cap_vec;
    int cap_num;
    char *string;

    pStr = NULL;
    if (cFmt == 'R') {
        if (cpExtinfo != NULL && str_isdigit(cpExtinfo[0]) && cpExtinfo[1] == NUL) {
            n = cpExtinfo[0] - '0';
            string  = (char *)sf->data[3].p;
            cap_vec = (int *)sf->data[4].p;
            cap_num = sf->data[5].i;
            if (n <= cap_num) {
                if (cap_vec[(n*2)] != -1 && cap_vec[(n*2)+1] != -1) {
                    pStr = (char *)(string+cap_vec[(n*2)]);
                    *ipStrLen = (cap_vec[(n*2)+1] - cap_vec[(n*2)]);
                }
            }
        }
    }
    return pStr;
}

/* the API parsing function */
int str_parse(const char *string, const char *pattern, ...)
{
    va_list ap;
    int rv;

    va_start(ap, pattern);
    rv = str_parse_va(string, pattern, ap);
    va_end(ap);
    return rv;
}
int str_parse_va(const char *string, const char *pattern, va_list ap)
{
    pcre *p_pcre = NULL;
    pcre_extra *p_pcre_extra = NULL;
    const char *match_ptr;
    int match_len;
    int match_opt;
    int match_once;
    int match_1resbuf;
    const char *subst_ptr;
    int subst_len;
    int *cap_vec;
    int cap_num;
    int cap_len;
    char *cp;
    char *cp2;
    char **cpp;
    char cb[2];
    int n;
    int i;
    int k;
    int l;
    int ismop;
    int issop;
    char buf[128];
    char buf2[128];
    char *buf_ptr;
    str_vformat_t sf;
    va_list ap_temp;

    /*
     * Caching support
     */
    /* hash table initialization */
    if (!hash_initialized) {
        hash_init();
        atexit(hash_destroy);
        hash_initialized = TRUE;
    }
    /* hash table destruction */
    if (string == NULL && pattern == NULL) {
        hash_destroy();
        return 0;
    }

    /*
     * Check input parameters
     */
    if (string == NULL || pattern == NULL)
        return -1;

    /*
     * Parse pattern
     */
    match_ptr     = NULL;
    match_len     = 0;
    match_opt     = 0;
    match_once    = FALSE;
    match_1resbuf = FALSE;
    subst_ptr     = NULL;
    subst_len     = 0;
    ismop         = FALSE;
    issop         = FALSE;
    cp            = NULL; /* compiler happyness only */
    cp2           = NULL; /* compiler happyness only */
    /* determine type of pattern and remember important positions */
    if (*pattern == 'm' && str_len(pattern) >= 3)
        if ((cp = str_span(pattern, 0, "imsxob", STR_RIGHT)) > pattern+1)
            if (*(pattern+1) == *cp)
                ismop = TRUE;
    if (!ismop)
        if (*pattern == 's' && str_len(pattern) >= 4)
            if ((cp = str_span(pattern, 0, "imsxo", STR_RIGHT)) > pattern+1)
                if ((cb[0] = *cp, cb[1] = NUL,
                     cp2 = str_span(pattern, cp-pattern, cb, STR_RIGHT|STR_COMPLEMENT)) > pattern+1)
                    if (*(pattern+1) == *cp && *(pattern+1) == *cp2)
                        issop = TRUE;
    /* finish parsing */
    if (ismop) {
        /* pattern is a match operation */
        match_ptr = pattern + 2;
        match_len = cp - match_ptr;
        cp++;
        for (i = 0; cp[i] != NUL; i++) {
            switch (cp[i]) {
                case 'i': match_opt |= PCRE_CASELESS;  break;
                case 'm': match_opt |= PCRE_MULTILINE; break;
                case 's': match_opt |= PCRE_DOTALL;    break;
                case 'x': match_opt |= PCRE_EXTENDED;  break;
                case 'o': match_once = TRUE;           break;
                case 'b': match_1resbuf = TRUE;        break;
                default:
                     return -1;
            }
        }
    }
    else if (issop) {
        /* pattern is a substitute operation */
        match_ptr = pattern + 2;
        match_len = cp2 - match_ptr;
        subst_ptr = cp2 + 1;
        subst_len = cp - subst_ptr;
        cp++;
        for (i = 0; cp[i] != NUL; i++) {
            switch (cp[i]) {
                case 'i': match_opt |= PCRE_CASELESS;  break;
                case 'm': match_opt |= PCRE_MULTILINE; break;
                case 's': match_opt |= PCRE_DOTALL;    break;
                case 'x': match_opt |= PCRE_EXTENDED;  break;
                case 'o': match_once = TRUE;           break;
                default:
                     return -1;
            }
        }
    }
    else {
        /* fallback: treat pattern as a match operation */
        match_ptr = pattern;
        match_len = str_len(pattern);
        ismop = TRUE;
    }

    /*
     * Compile pattern into internal PCRE structure
     */
    if (match_once) {
        /* optimized processing: up to factor 15(!) for complex regular expressions */
        pattern_lookup(match_ptr, match_len, &p_pcre, &p_pcre_extra);
        if (p_pcre == NULL) {
            if (!pattern_compile(match_ptr, match_len, match_opt, &p_pcre, &p_pcre_extra))
                return -1;
            pattern_cache(match_ptr, match_len, p_pcre, p_pcre_extra);
        }
    }
    else {
        /* unoptimized processing */
        p_pcre_extra = NULL;
        if (!pattern_compile(match_ptr, match_len, match_opt, &p_pcre, NULL))
            return -1;
    }

    /*
     * Allocate storage for offset table of captured substrings
     */
    cap_vec = NULL;
    cap_len = 0;
    cap_num = pcre_info(p_pcre, NULL, NULL);
    if (cap_num > 0) {
        cap_len = (cap_num+1)*3;
        if ((cap_vec = (int *)malloc(cap_len*sizeof(int))) == NULL) {
            if (p_pcre != NULL)
                free(p_pcre);
            if (p_pcre_extra != NULL)
                free(p_pcre_extra);
            return -1;
        }
    }

    /*
     * Perform the matching operation
     */
    n = pcre_exec(p_pcre, p_pcre_extra, string, str_len(string), 0, 0, cap_vec, cap_len);
    if (n < 0) {
        if (cap_vec != NULL)
            free(cap_vec);
        if (p_pcre != NULL)
            free(p_pcre);
        if (p_pcre_extra != NULL)
            free(p_pcre_extra);
        if (n == PCRE_ERROR_NOMATCH)
            return 0;
        return -1;
    }

    /*
     * Create either matching or substitution result
     */
    if (ismop && cap_num > 0) {
        /*
         * extract captured substrings into caller provided pointer variables
         */
        if (match_1resbuf) {
            /* use a single result buffer */
            l = 0;
            for (i = 1; i <= cap_num && i <= (n-1); i++) {
                if (cap_vec[(i*2)] != -1 && cap_vec[(i*2)+1] != -1) {
                    k = (cap_vec[(i*2)+1] - cap_vec[(i*2)]);
                    if (k > 0)
                        l += k+1;
                }
            }
            cpp = va_arg(ap, char **);
            if (cpp == NULL)
                cpp = &cp;
            if ((*cpp = malloc(l)) != NULL) {
                cp = *cpp;
                for (i = 1; i <= cap_num; i++) {
                    cpp = va_arg(ap, char **);
                    if (cpp != NULL) {
                        if (i <= (n-1)) {
                            if (cap_vec[(i*2)] != -1 && cap_vec[(i*2)+1] != -1) {
                                k = (cap_vec[(i*2)+1] - cap_vec[(i*2)]);
                                if (k > 0) {
                                    memcpy(cp, (char *)(string+cap_vec[(i*2)]), k);
                                    cp += k;
                                    *cp++ = NUL;
                                    continue;
                                }
                            }
                        }
                        *cpp = cp;
                        *cp++ = NUL;
                    }
                }
            }
        }
        else {
            /* use multiple result buffers */
            for (i = 1; i <= cap_num; i++) {
                cpp = va_arg(ap, char **);
                if (cpp != NULL) {
                    if (i <= (n-1)) {
                        if (cap_vec[(i*2)] != -1 && cap_vec[(i*2)+1] != -1) {
                            k = (cap_vec[(i*2)+1] - cap_vec[(i*2)]);
                            if (k > 0) {
                                if ((*cpp = malloc(k+1)) != NULL) {
                                    memcpy(*cpp, (char *)(string+cap_vec[(i*2)]), k);
                                    (*cpp)[k] = NUL;
                                    continue;
                                }
                            }
                        }
                    }
                    *cpp = strdup("");
                }
            }
        }
    }
    else if (issop) {
        /*
         * create a substitutional string with optional expansions
         */

        /* determine required buffer len */
        l = 0;
        for (cp = (char *)subst_ptr; cp < (subst_ptr+subst_len); cp++, l++) {
            if (*cp == '$') {
                if (!(cp > subst_ptr && *(cp-1) == '\\')) {
                    if (cp < (subst_ptr+subst_len-1) && str_isdigit(*(cp+1))) {
                        cp += 1;
                        l  += 4;
                    }
                }
            }
        }
        l++; /* NUL char */

        /* allocate temp buffer */
        if (l <= sizeof(buf))
            buf_ptr = buf;
        else
            buf_ptr = (char *)malloc(l);

        /* copy subst string into temp buffer and replace $N with %{N}R */
        for (cp = (char *)subst_ptr, cp2 = buf_ptr; cp < (subst_ptr+subst_len); ) {
            if (*cp == '$') {
                if (!(cp > subst_ptr && *(cp-1) == '\\')) {
                    if (cp < (subst_ptr+subst_len-1) && str_isdigit(*(cp+1))) {
                        *cp2++ = '%';
                        *cp2++ = '{';
                        *cp2++ = *(cp+1);
                        *cp2++ = '}';
                        *cp2++ = 'R';
                        cp += 2;
                        continue;
                    }
                }
            }
            *cp2++ = *cp++;
        }
        *cp2 = NUL;

        /* remove output argument from varargs */
        cpp = va_arg(ap, char **);

        /* calculate output buffer requirement */
        sf.curpos    = buf2;
        sf.endpos    = buf2 + sizeof(buf2) - 1;
        sf.flush     = str_parse_flush_nop;
        sf.format    = str_parse_format;
        sf.data[0].p = buf2;
        sf.data[1].i = sizeof(buf2);
        sf.data[2].i = 0;
        sf.data[3].p = (char *)string;
        sf.data[4].p = cap_vec;
        sf.data[5].i = cap_num;
        va_copy(ap_temp, ap);
        l = str_vformat(&sf, buf_ptr, ap_temp);

        /* allocate output buffer */
        if ((*cpp = (char *)malloc(l+1)) == NULL) {
            if (cap_vec != NULL)
                free(cap_vec);
            if (p_pcre != NULL)
                free(p_pcre);
            if (p_pcre_extra != NULL)
                free(p_pcre_extra);
            return -1; /* XXX */
        }

        /* finally expand the substitutions string into output buffer */
        sf.curpos    = *cpp;
        sf.endpos    = *cpp + l;
        sf.flush     = str_parse_flush_str;
        sf.format    = str_parse_format;
        sf.data[3].p = (char *)string;
        sf.data[4].p = cap_vec;
        sf.data[5].i = cap_num;
        str_vformat(&sf, buf_ptr, ap);
        *((*cpp)+l) = NUL;

        /* free temp buffer */
        if (buf_ptr != buf)
            free(buf_ptr);
    }

    /* cleanup */
    if (cap_vec != NULL)
        free(cap_vec);
    if (p_pcre != NULL)
        free(p_pcre);
    if (p_pcre_extra != NULL)
        free(p_pcre_extra);
    /* return success */
    return 1;
}


CVSTrac 2.0.1