/* ** OSSP str - String Handling ** Copyright (c) 1999-2005 Ralf S. Engelschall ** Copyright (c) 1999-2005 The OSSP Project ** ** This file is part of OSSP str, a string handling and manipulation ** library which can be found at http://www.ossp.org/pkg/lib/str/. ** ** Permission to use, copy, modify, and distribute this software for ** any purpose with or without fee is hereby granted, provided that ** the above copyright notice and this permission notice appear in all ** copies. ** ** THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED ** WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ** IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR ** CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF ** USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, ** OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT ** OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ** SUCH DAMAGE. ** ** str_parse.c: parsing functions */ #include "str_p.h" /* compile a regular expression pattern from string into internal format */ static int pattern_compile( const char *ptr, int len, int opt, pcre **p_pcre, pcre_extra **p_pcre_extra) { const char *err_str; char buf[128]; int err_pos; char *cp; if (ptr[len] == NUL) { /* plain string, so we can speed up processing... */ *p_pcre = pcre_compile(ptr, opt, &err_str, &err_pos, NULL); } else { /* ...else we have to create a temporary NUL-terminated string */ if (len < sizeof(buf)) { /* either we use a local buffer to avoid malloc/free ping-pong... */ memcpy(buf, ptr, len); buf[len] = NUL; *p_pcre = pcre_compile(buf, opt, &err_str, &err_pos, NULL); } else { /* ...or we have to actually allocate a memory chunk :-( */ if ((cp = malloc(len+1)) == NULL) return FALSE; memcpy(cp, ptr, len); cp[len] = NUL; *p_pcre = pcre_compile(cp, opt, &err_str, &err_pos, NULL); free(cp); } } if (*p_pcre == NULL) return FALSE; /* optionally study pattern */ if (p_pcre_extra != NULL) { *p_pcre_extra = pcre_study(*p_pcre, 0, &err_str); if (err_str != NULL) { free(p_pcre); return FALSE; } } return TRUE; } /* the hash table entry in the pattern cache */ struct hash_entry { struct hash_entry *next; char *key; int keylen; pcre *p_pcre; pcre_extra *p_pcre_extra; }; /* size of the cache hash table; is prime */ #define HASH_SIZE 101 /* the pattern cache hash table */ static struct hash_entry *pattern_hash[HASH_SIZE]; /* initialization flag for hash table */ static int hash_initialized = FALSE; /* initialize cache hash table */ static void hash_init(void) { int i; for (i = 0; i < HASH_SIZE; i++) pattern_hash[i] = NULL; return; } /* destroy cache hash table */ static void hash_destroy(void) { int i; struct hash_entry *he, *ohe; for (i = 0; i < HASH_SIZE; i++) { he = pattern_hash[i]; pattern_hash[i] = NULL; while (he != NULL) { ohe = he; he = he->next; if (ohe->key != NULL) free(ohe->key); free(ohe); } } return; } /* the hashing function: a popular `times 33' hash */ static unsigned int hash_func( const char *key, int keylen) { unsigned int h; int i; h = 0xDEAD; for (i = 0; key[i] != NUL; i++) h = ((((h<<5)+h)+key[i]) % HASH_SIZE); return h; } /* cache a pattern */ static void pattern_cache( const char *key, int keylen, pcre *p_pcre, pcre_extra *p_pcre_extra) { int h; struct hash_entry *he, *che; if ((he = (struct hash_entry *)malloc(sizeof(struct hash_entry))) == NULL) return; if ((he->key = malloc(keylen)) == NULL) { free(he); return; } he->next = NULL; memcpy(he->key, key, keylen); he->keylen = keylen; he->p_pcre = p_pcre; he->p_pcre_extra = p_pcre_extra; h = hash_func(key, keylen); if (pattern_hash[h] == NULL) pattern_hash[h] = he; else { che = pattern_hash[h]; while (che->next != NULL) che = che->next; che->next = he; } return; } /* lookup a pattern */ static void pattern_lookup( const char *key, int keylen, pcre **p_pcre, pcre_extra **p_pcre_extra) { int h; struct hash_entry *he; *p_pcre = NULL; *p_pcre_extra = NULL; h = hash_func(key, keylen); he = pattern_hash[h]; while (he != NULL) { if (he->keylen == keylen) if (memcmp(he->key, key, keylen)) break; he = he->next; } if (he == NULL) return; *p_pcre = he->p_pcre; *p_pcre_extra = he->p_pcre_extra; return; } static int str_parse_flush_nop( str_vformat_t *sf) { sf->data[2].i = sf->data[2].i + sf->data[1].i; sf->curpos = (char *)sf->data[0].p; return 0; } static int str_parse_flush_str( str_vformat_t *sf) { return -1; } static char * str_parse_format( str_vformat_t *sf, char *cpPrefix, char *cpPad, int *ipStrLen, char *cpBuf, int nBufLen, char *cpExtinfo, int cFmt, va_list ap) { char *pStr; int n; int *cap_vec; int cap_num; char *string; pStr = NULL; if (cFmt == 'R') { if (cpExtinfo != NULL && str_isdigit(cpExtinfo[0]) && cpExtinfo[1] == NUL) { n = cpExtinfo[0] - '0'; string = (char *)sf->data[3].p; cap_vec = (int *)sf->data[4].p; cap_num = sf->data[5].i; if (n <= cap_num) { if (cap_vec[(n*2)] != -1 && cap_vec[(n*2)+1] != -1) { pStr = (char *)(string+cap_vec[(n*2)]); *ipStrLen = (cap_vec[(n*2)+1] - cap_vec[(n*2)]); } } } } return pStr; } /* the API parsing function */ int str_parse(const char *string, const char *pattern, ...) { va_list ap; int rv; va_start(ap, pattern); rv = str_parse_va(string, pattern, ap); va_end(ap); return rv; } int str_parse_va(const char *string, const char *pattern, va_list ap) { pcre *p_pcre = NULL; pcre_extra *p_pcre_extra = NULL; const char *match_ptr; int match_len; int match_opt; int match_once; int match_1resbuf; const char *subst_ptr; int subst_len; int *cap_vec; int cap_num; int cap_len; char *cp; char *cp2; char **cpp; char cb[2]; int n; int i; int k; int l; int ismop; int issop; char buf[128]; char buf2[128]; char *buf_ptr; str_vformat_t sf; va_list ap_temp; /* * Caching support */ /* hash table initialization */ if (!hash_initialized) { hash_init(); atexit(hash_destroy); hash_initialized = TRUE; } /* hash table destruction */ if (string == NULL && pattern == NULL) { hash_destroy(); return 0; } /* * Check input parameters */ if (string == NULL || pattern == NULL) return -1; /* * Parse pattern */ match_ptr = NULL; match_len = 0; match_opt = 0; match_once = FALSE; match_1resbuf = FALSE; subst_ptr = NULL; subst_len = 0; ismop = FALSE; issop = FALSE; cp = NULL; /* compiler happyness only */ cp2 = NULL; /* compiler happyness only */ /* determine type of pattern and remember important positions */ if (*pattern == 'm' && str_len(pattern) >= 3) if ((cp = str_span(pattern, 0, "imsxob", STR_RIGHT)) > pattern+1) if (*(pattern+1) == *cp) ismop = TRUE; if (!ismop) if (*pattern == 's' && str_len(pattern) >= 4) if ((cp = str_span(pattern, 0, "imsxo", STR_RIGHT)) > pattern+1) if ((cb[0] = *cp, cb[1] = NUL, cp2 = str_span(pattern, cp-pattern, cb, STR_RIGHT|STR_COMPLEMENT)) > pattern+1) if (*(pattern+1) == *cp && *(pattern+1) == *cp2) issop = TRUE; /* finish parsing */ if (ismop) { /* pattern is a match operation */ match_ptr = pattern + 2; match_len = cp - match_ptr; cp++; for (i = 0; cp[i] != NUL; i++) { switch (cp[i]) { case 'i': match_opt |= PCRE_CASELESS; break; case 'm': match_opt |= PCRE_MULTILINE; break; case 's': match_opt |= PCRE_DOTALL; break; case 'x': match_opt |= PCRE_EXTENDED; break; case 'o': match_once = TRUE; break; case 'b': match_1resbuf = TRUE; break; default: return -1; } } } else if (issop) { /* pattern is a substitute operation */ match_ptr = pattern + 2; match_len = cp2 - match_ptr; subst_ptr = cp2 + 1; subst_len = cp - subst_ptr; cp++; for (i = 0; cp[i] != NUL; i++) { switch (cp[i]) { case 'i': match_opt |= PCRE_CASELESS; break; case 'm': match_opt |= PCRE_MULTILINE; break; case 's': match_opt |= PCRE_DOTALL; break; case 'x': match_opt |= PCRE_EXTENDED; break; case 'o': match_once = TRUE; break; default: return -1; } } } else { /* fallback: treat pattern as a match operation */ match_ptr = pattern; match_len = str_len(pattern); ismop = TRUE; } /* * Compile pattern into internal PCRE structure */ if (match_once) { /* optimized processing: up to factor 15(!) for complex regular expressions */ pattern_lookup(match_ptr, match_len, &p_pcre, &p_pcre_extra); if (p_pcre == NULL) { if (!pattern_compile(match_ptr, match_len, match_opt, &p_pcre, &p_pcre_extra)) return -1; pattern_cache(match_ptr, match_len, p_pcre, p_pcre_extra); } } else { /* unoptimized processing */ p_pcre_extra = NULL; if (!pattern_compile(match_ptr, match_len, match_opt, &p_pcre, NULL)) return -1; } /* * Allocate storage for offset table of captured substrings */ cap_vec = NULL; cap_len = 0; cap_num = pcre_info(p_pcre, NULL, NULL); if (cap_num > 0) { cap_len = (cap_num+1)*3; if ((cap_vec = (int *)malloc(cap_len*sizeof(int))) == NULL) { if (p_pcre != NULL) free(p_pcre); if (p_pcre_extra != NULL) free(p_pcre_extra); return -1; } } /* * Perform the matching operation */ n = pcre_exec(p_pcre, p_pcre_extra, string, str_len(string), 0, 0, cap_vec, cap_len); if (n < 0) { if (cap_vec != NULL) free(cap_vec); if (p_pcre != NULL) free(p_pcre); if (p_pcre_extra != NULL) free(p_pcre_extra); if (n == PCRE_ERROR_NOMATCH) return 0; return -1; } /* * Create either matching or substitution result */ if (ismop && cap_num > 0) { /* * extract captured substrings into caller provided pointer variables */ if (match_1resbuf) { /* use a single result buffer */ l = 0; for (i = 1; i <= cap_num && i <= (n-1); i++) { if (cap_vec[(i*2)] != -1 && cap_vec[(i*2)+1] != -1) { k = (cap_vec[(i*2)+1] - cap_vec[(i*2)]); if (k > 0) l += k+1; } } cpp = va_arg(ap, char **); if (cpp == NULL) cpp = &cp; if ((*cpp = malloc(l)) != NULL) { cp = *cpp; for (i = 1; i <= cap_num; i++) { cpp = va_arg(ap, char **); if (cpp != NULL) { if (i <= (n-1)) { if (cap_vec[(i*2)] != -1 && cap_vec[(i*2)+1] != -1) { k = (cap_vec[(i*2)+1] - cap_vec[(i*2)]); if (k > 0) { memcpy(cp, (char *)(string+cap_vec[(i*2)]), k); cp += k; *cp++ = NUL; continue; } } } *cpp = cp; *cp++ = NUL; } } } } else { /* use multiple result buffers */ for (i = 1; i <= cap_num; i++) { cpp = va_arg(ap, char **); if (cpp != NULL) { if (i <= (n-1)) { if (cap_vec[(i*2)] != -1 && cap_vec[(i*2)+1] != -1) { k = (cap_vec[(i*2)+1] - cap_vec[(i*2)]); if (k > 0) { if ((*cpp = malloc(k+1)) != NULL) { memcpy(*cpp, (char *)(string+cap_vec[(i*2)]), k); (*cpp)[k] = NUL; continue; } } } } *cpp = strdup(""); } } } } else if (issop) { /* * create a substitutional string with optional expansions */ /* determine required buffer len */ l = 0; for (cp = (char *)subst_ptr; cp < (subst_ptr+subst_len); cp++, l++) { if (*cp == '$') { if (!(cp > subst_ptr && *(cp-1) == '\\')) { if (cp < (subst_ptr+subst_len-1) && str_isdigit(*(cp+1))) { cp += 1; l += 4; } } } } l++; /* NUL char */ /* allocate temp buffer */ if (l <= sizeof(buf)) buf_ptr = buf; else buf_ptr = (char *)malloc(l); /* copy subst string into temp buffer and replace $N with %{N}R */ for (cp = (char *)subst_ptr, cp2 = buf_ptr; cp < (subst_ptr+subst_len); ) { if (*cp == '$') { if (!(cp > subst_ptr && *(cp-1) == '\\')) { if (cp < (subst_ptr+subst_len-1) && str_isdigit(*(cp+1))) { *cp2++ = '%'; *cp2++ = '{'; *cp2++ = *(cp+1); *cp2++ = '}'; *cp2++ = 'R'; cp += 2; continue; } } } *cp2++ = *cp++; } *cp2 = NUL; /* remove output argument from varargs */ cpp = va_arg(ap, char **); /* calculate output buffer requirement */ sf.curpos = buf2; sf.endpos = buf2 + sizeof(buf2) - 1; sf.flush = str_parse_flush_nop; sf.format = str_parse_format; sf.data[0].p = buf2; sf.data[1].i = sizeof(buf2); sf.data[2].i = 0; sf.data[3].p = (char *)string; sf.data[4].p = cap_vec; sf.data[5].i = cap_num; va_copy(ap_temp, ap); l = str_vformat(&sf, buf_ptr, ap_temp); /* allocate output buffer */ if ((*cpp = (char *)malloc(l+1)) == NULL) { if (cap_vec != NULL) free(cap_vec); if (p_pcre != NULL) free(p_pcre); if (p_pcre_extra != NULL) free(p_pcre_extra); return -1; /* XXX */ } /* finally expand the substitutions string into output buffer */ sf.curpos = *cpp; sf.endpos = *cpp + l; sf.flush = str_parse_flush_str; sf.format = str_parse_format; sf.data[3].p = (char *)string; sf.data[4].p = cap_vec; sf.data[5].i = cap_num; str_vformat(&sf, buf_ptr, ap); *((*cpp)+l) = NUL; /* free temp buffer */ if (buf_ptr != buf) free(buf_ptr); } /* cleanup */ if (cap_vec != NULL) free(cap_vec); if (p_pcre != NULL) free(p_pcre); if (p_pcre_extra != NULL) free(p_pcre_extra); /* return success */ return 1; }