Index: ossp-pkg/rc/rc_pcre.c RCS File: /v/ossp/cvs/ossp-pkg/rc/rc_pcre.c,v co -q -kk -p'1.1' '/v/ossp/cvs/ossp-pkg/rc/rc_pcre.c,v' | diff -u /dev/null - -L'ossp-pkg/rc/rc_pcre.c' 2>/dev/null --- ossp-pkg/rc/rc_pcre.c +++ - 2024-05-02 15:46:11.916264582 +0200 @@ -0,0 +1,4288 @@ +/* + * Perl Compatible Regular Expression (PCRE) Library + * Copyright (c) 1997-2001 Philip Hazel + * Copyright (c) 1997-2001 University of Cambridge + * + * DO NOT EDIT THIS FILE, IT WAS AUTOMATICALLY GENERATED! + * + * This is an automatically generated, extremely stripped down + * version of the PCRE 3.9 library from the Philip Hazel. + * This version is still distributed under the same original PCRE + * Open Source license, but Philip Hazel is no longer responsible + * for this version. + */ + +/* + This is a library of functions to support regular expressions whose syntax + and semantics are as close as possible to those of the Perl 5 language. See + the file Tech.Notes for some information on the internals. + + Written by: Philip Hazel + + Copyright (c) 1997-2001 University of Cambridge + + ----------------------------------------------------------------------------- + Permission is granted to anyone to use this software for any purpose on any + computer system, and to redistribute it freely, subject to the following + restrictions: + + 1. This software is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. + + 2. The origin of this software must not be misrepresented, either by explicit + claim or by omission. + + 3. Altered versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 4. If PCRE is embedded in any software that is released under the GNU General + Purpose Licence (GPL), then the terms of that licence shall supersede any + condition above with which it is incompatible. + ----------------------------------------------------------------------------- */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include "rc_pcre.h" + +#ifndef offsetof +#define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field)) +#endif + +#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL) + +#define PCRE_FIRSTSET 0x40000000 +#define PCRE_REQCHSET 0x20000000 +#define PCRE_STARTLINE 0x10000000 +#define PCRE_INGROUP 0x08000000 +#define PCRE_ICHANGED 0x04000000 + +#define PCRE_STUDY_MAPPED 0x01 + +#define PUBLIC_OPTIONS \ + (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ + PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8) + +#define PUBLIC_EXEC_OPTIONS \ + (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY) + +#define PUBLIC_STUDY_OPTIONS 0 + +#define MAGIC_NUMBER 0x50435245UL + +typedef int BOOL; + +#ifndef FALSE +#define FALSE 0 +#endif +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef ESC_E +#define ESC_E 27 +#endif + +#ifndef ESC_F +#define ESC_F '\f' +#endif + +#ifndef ESC_N +#define ESC_N '\n' +#endif + +#ifndef ESC_R +#define ESC_R '\r' +#endif + +#ifndef ESC_T +#define ESC_T '\t' +#endif + +enum { ESC_A = 1, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, + ESC_Z, ESC_z, ESC_REF +}; + +enum { + OP_END, + + OP_SOD, + OP_NOT_WORD_BOUNDARY, + OP_WORD_BOUNDARY, + OP_NOT_DIGIT, + OP_DIGIT, + OP_NOT_WHITESPACE, + OP_WHITESPACE, + OP_NOT_WORDCHAR, + OP_WORDCHAR, + OP_EODN, + OP_EOD, + + OP_OPT, + OP_CIRC, + OP_DOLL, + OP_ANY, + OP_CHARS, + OP_NOT, + + OP_STAR, + OP_MINSTAR, + OP_PLUS, + OP_MINPLUS, + OP_QUERY, + OP_MINQUERY, + OP_UPTO, + OP_MINUPTO, + OP_EXACT, + + OP_NOTSTAR, + OP_NOTMINSTAR, + OP_NOTPLUS, + OP_NOTMINPLUS, + OP_NOTQUERY, + OP_NOTMINQUERY, + OP_NOTUPTO, + OP_NOTMINUPTO, + OP_NOTEXACT, + + OP_TYPESTAR, + OP_TYPEMINSTAR, + OP_TYPEPLUS, + OP_TYPEMINPLUS, + OP_TYPEQUERY, + OP_TYPEMINQUERY, + OP_TYPEUPTO, + OP_TYPEMINUPTO, + OP_TYPEEXACT, + + OP_CRSTAR, + OP_CRMINSTAR, + OP_CRPLUS, + OP_CRMINPLUS, + OP_CRQUERY, + OP_CRMINQUERY, + OP_CRRANGE, + OP_CRMINRANGE, + + OP_CLASS, + OP_REF, + OP_RECURSE, + + OP_ALT, + OP_KET, + OP_KETRMAX, + OP_KETRMIN, + + OP_ASSERT, + OP_ASSERT_NOT, + OP_ASSERTBACK, + OP_ASSERTBACK_NOT, + OP_REVERSE, + + OP_ONCE, + OP_COND, + OP_CREF, + + OP_BRAZERO, + OP_BRAMINZERO, + + OP_BRANUMBER, + + OP_BRA +}; + +#define EXTRACT_BASIC_MAX 150 + +#define ERR1 "\\ at end of pattern" +#define ERR2 "\\c at end of pattern" +#define ERR3 "unrecognized character follows \\" +#define ERR4 "numbers out of order in {} quantifier" +#define ERR5 "number too big in {} quantifier" +#define ERR6 "missing terminating ] for character class" +#define ERR7 "invalid escape sequence in character class" +#define ERR8 "range out of order in character class" +#define ERR9 "nothing to repeat" +#define ERR10 "operand of unlimited repeat could match the empty string" +#define ERR11 "internal error: unexpected repeat" +#define ERR12 "unrecognized character after (?" +#define ERR13 "unused error" +#define ERR14 "missing )" +#define ERR15 "back reference to non-existent subpattern" +#define ERR16 "erroffset passed as NULL" +#define ERR17 "unknown option bit(s) set" +#define ERR18 "missing ) after comment" +#define ERR19 "parentheses nested too deeply" +#define ERR20 "regular expression too large" +#define ERR21 "failed to get memory" +#define ERR22 "unmatched parentheses" +#define ERR23 "internal error: code overflow" +#define ERR24 "unrecognized character after (?<" +#define ERR25 "lookbehind assertion is not fixed length" +#define ERR26 "malformed number after (?(" +#define ERR27 "conditional group contains more than two branches" +#define ERR28 "assertion expected after (?(" +#define ERR29 "(?p must be followed by )" +#define ERR30 "unknown POSIX class name" +#define ERR31 "POSIX collating elements are not supported" +#define ERR32 "this version of PCRE is not compiled with PCRE_UTF8 support" +#define ERR33 "characters with values > 255 are not yet supported in classes" +#define ERR34 "character value in \\x{...} sequence is too large" +#define ERR35 "invalid condition (?(0)" + +typedef unsigned char uschar; + +typedef struct pcre_st { + unsigned long int magic_number; + size_t size; + const unsigned char *tables; + unsigned long int options; + unsigned short int top_bracket; + unsigned short int top_backref; + uschar first_char; + uschar req_char; + uschar code[1]; +} pcre_st; + +typedef struct pcre_extra_st { + uschar options; + uschar start_bits[32]; +} pcre_extra_st; + +typedef struct compile_data { + const uschar *lcc; + const uschar *fcc; + const uschar *cbits; + const uschar *ctypes; +} compile_data; + +typedef struct match_data { + int errorcode; + int *offset_vector; + int offset_end; + int offset_max; + const uschar *lcc; + const uschar *ctypes; + BOOL offset_overflow; + BOOL notbol; + BOOL noteol; + BOOL utf8; + BOOL endonly; + BOOL notempty; + const uschar *start_pattern; + const uschar *start_subject; + const uschar *end_subject; + const uschar *start_match; + const uschar *end_match_ptr; + int end_offset_top; +} match_data; + +#define ctype_space 0x01 +#define ctype_letter 0x02 +#define ctype_digit 0x04 +#define ctype_xdigit 0x08 +#define ctype_word 0x10 +#define ctype_meta 0x80 + +#define cbit_space 0 +#define cbit_xdigit 32 +#define cbit_digit 64 +#define cbit_upper 96 +#define cbit_lower 128 +#define cbit_word 160 +#define cbit_graph 192 +#define cbit_print 224 +#define cbit_punct 256 +#define cbit_cntrl 288 +#define cbit_length 320 + +#define lcc_offset 0 +#define fcc_offset 256 +#define cbits_offset 512 +#define ctypes_offset (cbits_offset + cbit_length) +#define tables_length (ctypes_offset + 256) + +#ifndef RC_PCRE_TAB + +#ifdef __cplusplus +#define class pcre_class +#endif + +#define BRASTACK_SIZE 200 + +#ifdef RC_PCRE_SUPPORT_UTF8 +#define MAXLIT 250 +#else +#define MAXLIT 255 +#endif + +static const char rep_min[] = { 0, 0, 1, 1, 0, 0 }; +static const char rep_max[] = { 0, 0, 0, 0, 1, 1 }; + +static const short int escapes[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, ':', ';', '<', '=', '>', '?', + '@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W, + 0, 0, -ESC_Z, '[', '\\', ']', '^', '_', + '`', 7, -ESC_b, 0, -ESC_d, ESC_E, ESC_F, 0, + 0, 0, 0, 0, 0, 0, ESC_N, 0, + 0, 0, ESC_R, -ESC_s, ESC_T, 0, 0, -ESC_w, + 0, 0, -ESC_z +}; + +static const char *posix_names[] = { + "alpha", "lower", "upper", + "alnum", "ascii", "cntrl", "digit", "graph", + "print", "punct", "space", "word", "xdigit" +}; + +static const uschar posix_name_lengths[] = { + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 +}; + +static const int posix_class_maps[] = { + cbit_lower, cbit_upper, -1, + cbit_lower, -1, -1, + cbit_upper, -1, -1, + cbit_digit, cbit_lower, cbit_upper, + cbit_print, cbit_cntrl, -1, + cbit_cntrl, -1, -1, + cbit_digit, -1, -1, + cbit_graph, -1, -1, + cbit_print, -1, -1, + cbit_punct, -1, -1, + cbit_space, -1, -1, + cbit_word, -1, -1, + cbit_xdigit, -1, -1 +}; + +static BOOL +compile_regex(int, int, int *, uschar **, const uschar **, const char **, + BOOL, int, int *, int *, compile_data *); + +typedef struct eptrblock { + struct eptrblock *prev; + const uschar *saved_eptr; +} eptrblock; + +#define match_condassert 0x01 +#define match_isgroup 0x02 + +void *(*pcre_malloc) (size_t) = malloc; +void (*pcre_free) (void *) = free; + +#ifndef RC_PCRE_SUPPORT_UTF8 +#define GETCHARINC(c, eptr) c = *eptr++; +#define GETCHARLEN(c, eptr, len) c = *eptr; +#define BACKCHAR(eptr) + +#else + +#define GETCHARINC(c, eptr) \ + c = *eptr++; \ + if (md->utf8 && (c & 0xc0) == 0xc0) \ + { \ + int a = utf8_table4[c & 0x3f]; \ + int s = 6*a; \ + c = (c & utf8_table3[a]) << s; \ + while (a-- > 0) \ + { \ + s -= 6; \ + c |= (*eptr++ & 0x3f) << s; \ + } \ + } + +#define GETCHARLEN(c, eptr, len) \ + c = *eptr; \ + len = 1; \ + if (md->utf8 && (c & 0xc0) == 0xc0) \ + { \ + int i; \ + int a = utf8_table4[c & 0x3f]; \ + int s = 6*a; \ + c = (c & utf8_table3[a]) << s; \ + for (i = 1; i <= a; i++) \ + { \ + s -= 6; \ + c |= (eptr[i] & 0x3f) << s; \ + } \ + len += a; \ + } + +#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--; + +#endif + +#include "rc_pcre.tab" + +#ifdef RC_PCRE_SUPPORT_UTF8 + +static int utf8_table1[] = + { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff }; + +static int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; +static int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01 }; + +static uschar utf8_table4[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 +}; + +static int ord2utf8(int cvalue, uschar * buffer) +{ + register int i, j; + for (i = 0; i < sizeof (utf8_table1) / sizeof (int); i++) + if (cvalue <= utf8_table1[i]) + break; + buffer += i; + for (j = i; j > 0; j--) { + *buffer-- = 0x80 | (cvalue & 0x3f); + cvalue >>= 6; + } + *buffer = utf8_table2[i] | cvalue; + return i + 1; +} +#endif + +#define STRING(a) # a +#define XSTRING(s) STRING(s) + +const char *pcre_version(void) +{ + return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE); +} + +int pcre_info(const pcre * external_re, int *optptr, int *first_char) +{ + const pcre_st *re = (const pcre_st *)external_re; + if (re == NULL) + return PCRE_ERROR_NULL; + if (re->magic_number != MAGIC_NUMBER) + return PCRE_ERROR_BADMAGIC; + if (optptr != NULL) + *optptr = (int)(re->options & PUBLIC_OPTIONS); + if (first_char != NULL) + *first_char = ((re->options & PCRE_FIRSTSET) != 0) ? re->first_char : + ((re->options & PCRE_STARTLINE) != 0) ? -1 : -2; + return re->top_bracket; +} + +int +pcre_fullinfo(const pcre * external_re, const pcre_extra * study_data, + int what, void *where) +{ + const pcre_st *re = (const pcre_st *)external_re; + const pcre_extra_st *study = (const pcre_extra_st *)study_data; + + if (re == NULL || where == NULL) + return PCRE_ERROR_NULL; + if (re->magic_number != MAGIC_NUMBER) + return PCRE_ERROR_BADMAGIC; + + switch (what) { + case PCRE_INFO_OPTIONS: + *((unsigned long int *)where) = re->options & PUBLIC_OPTIONS; + break; + + case PCRE_INFO_SIZE: + *((size_t *) where) = re->size; + break; + + case PCRE_INFO_CAPTURECOUNT: + *((int *)where) = re->top_bracket; + break; + + case PCRE_INFO_BACKREFMAX: + *((int *)where) = re->top_backref; + break; + + case PCRE_INFO_FIRSTCHAR: + *((int *)where) = + ((re->options & PCRE_FIRSTSET) != 0) ? re->first_char : + ((re->options & PCRE_STARTLINE) != 0) ? -1 : -2; + break; + + case PCRE_INFO_FIRSTTABLE: + *((const uschar **)where) = + (study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0) ? + study->start_bits : NULL; + break; + + case PCRE_INFO_LASTLITERAL: + *((int *)where) = + ((re->options & PCRE_REQCHSET) != 0) ? re->req_char : -1; + break; + + default: + return PCRE_ERROR_BADOPTION; + } + + return 0; +} + +static int +check_escape(const uschar ** ptrptr, const char **errorptr, int bracount, + int options, BOOL isclass, compile_data * cd) +{ + const uschar *ptr = *ptrptr; + int c, i; + + c = *(++ptr); + if (c == 0) + *errorptr = ERR1; + + else if (c < '0' || c > 'z') { + } + + else if ((i = escapes[c - '0']) != 0) + c = i; + + else { + const uschar *oldptr; + switch (c) { + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + + if (!isclass) { + oldptr = ptr; + c -= '0'; + while ((cd->ctypes[ptr[1]] & ctype_digit) != 0) + c = c * 10 + *(++ptr) - '0'; + if (c < 10 || c <= bracount) { + c = -(ESC_REF + c); + break; + } + ptr = oldptr; + } + + if ((c = *ptr) >= '8') { + ptr--; + c = 0; + break; + } + + case '0': + c -= '0'; + while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 && + ptr[1] != '8' && ptr[1] != '9') + c = c * 8 + *(++ptr) - '0'; + c &= 255; + break; + + case 'x': +#ifdef RC_PCRE_SUPPORT_UTF8 + if (ptr[1] == '{' && (options & PCRE_UTF8) != 0) { + const uschar *pt = ptr + 2; + register int count = 0; + c = 0; + while ((cd->ctypes[*pt] & ctype_xdigit) != 0) { + count++; + c = c * 16 + cd->lcc[*pt] - + (((cd->ctypes[*pt] & ctype_digit) != + 0) ? '0' : 'W'); + pt++; + } + if (*pt == '}') { + if (c < 0 || count > 8) + *errorptr = ERR34; + ptr = pt; + break; + } + } +#endif + + c = 0; + while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0) { + ptr++; + c = c * 16 + cd->lcc[*ptr] - + (((cd->ctypes[*ptr] & ctype_digit) != 0) ? '0' : 'W'); + } + break; + + case 'c': + c = *(++ptr); + if (c == 0) { + *errorptr = ERR2; + return 0; + } + + if (c >= 'a' && c <= 'z') + c = cd->fcc[c]; + c ^= 0x40; + break; + + default: + if ((options & PCRE_EXTRA) != 0) + switch (c) { + default: + *errorptr = ERR3; + break; + } + break; + } + } + + *ptrptr = ptr; + return c; +} + +static BOOL is_counted_repeat(const uschar * p, compile_data * cd) +{ + if ((cd->ctypes[*p++] & ctype_digit) == 0) + return FALSE; + while ((cd->ctypes[*p] & ctype_digit) != 0) + p++; + if (*p == '}') + return TRUE; + + if (*p++ != ',') + return FALSE; + if (*p == '}') + return TRUE; + + if ((cd->ctypes[*p++] & ctype_digit) == 0) + return FALSE; + while ((cd->ctypes[*p] & ctype_digit) != 0) + p++; + return (*p == '}'); +} + +static const uschar *read_repeat_counts(const uschar * p, int *minp, + int *maxp, const char **errorptr, + compile_data * cd) +{ + int min = 0; + int max = -1; + + while ((cd->ctypes[*p] & ctype_digit) != 0) + min = min * 10 + *p++ - '0'; + + if (*p == '}') + max = min; + else { + if (*(++p) != '}') { + max = 0; + while ((cd->ctypes[*p] & ctype_digit) != 0) + max = max * 10 + *p++ - '0'; + if (max < min) { + *errorptr = ERR4; + return p; + } + } + } + + if (min > 65535 || max > 65535) + *errorptr = ERR5; + else { + *minp = min; + *maxp = max; + } + return p; +} + +static int find_fixedlength(uschar * code, int options) +{ + int length = -1; + + register int branchlength = 0; + register uschar *cc = code + 3; + + for (;;) { + int d; + register int op = *cc; + if (op >= OP_BRA) + op = OP_BRA; + + switch (op) { + case OP_BRA: + case OP_ONCE: + case OP_COND: + d = find_fixedlength(cc, options); + if (d < 0) + return -1; + branchlength += d; + do + cc += (cc[1] << 8) + cc[2]; + while (*cc == OP_ALT); + cc += 3; + break; + + case OP_ALT: + case OP_KET: + case OP_KETRMAX: + case OP_KETRMIN: + case OP_END: + if (length < 0) + length = branchlength; + else if (length != branchlength) + return -1; + if (*cc != OP_ALT) + return length; + cc += 3; + branchlength = 0; + break; + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + do + cc += (cc[1] << 8) + cc[2]; + while (*cc == OP_ALT); + cc += 3; + break; + + case OP_REVERSE: + case OP_BRANUMBER: + case OP_CREF: + cc++; + case OP_OPT: + cc++; + case OP_SOD: + case OP_EOD: + case OP_EODN: + case OP_CIRC: + case OP_DOLL: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + cc++; + break; + + case OP_CHARS: + branchlength += *(++cc); +#ifdef RC_PCRE_SUPPORT_UTF8 + for (d = 1; d <= *cc; d++) + if ((cc[d] & 0xc0) == 0x80) + branchlength--; +#endif + cc += *cc + 1; + break; + + case OP_EXACT: + case OP_TYPEEXACT: + branchlength += (cc[1] << 8) + cc[2]; + cc += 4; + break; + + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + branchlength++; + cc++; + break; + + case OP_CLASS: + cc += 33; + + switch (*cc) { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRQUERY: + case OP_CRMINQUERY: + return -1; + + case OP_CRRANGE: + case OP_CRMINRANGE: + if ((cc[1] << 8) + cc[2] != (cc[3] << 8) + cc[4]) + return -1; + branchlength += (cc[1] << 8) + cc[2]; + cc += 5; + break; + + default: + branchlength++; + } + break; + + default: + return -1; + } + } +} + +static BOOL +check_posix_syntax(const uschar * ptr, const uschar ** endptr, + compile_data * cd) +{ + int terminator; + terminator = *(++ptr); + if (*(++ptr) == '^') + ptr++; + while ((cd->ctypes[*ptr] & ctype_letter) != 0) + ptr++; + if (*ptr == terminator && ptr[1] == ']') { + *endptr = ptr; + return TRUE; + } + return FALSE; +} + +static int check_posix_name(const uschar * ptr, int len) +{ + register int yield = 0; + while (posix_name_lengths[yield] != 0) { + if (len == posix_name_lengths[yield] && + strncmp((const char *)ptr, posix_names[yield], len) == 0) + return yield; + yield++; + } + return -1; +} + +static BOOL +compile_branch(int options, int *brackets, uschar ** codeptr, + const uschar ** ptrptr, const char **errorptr, int *optchanged, + int *reqchar, int *countlits, compile_data * cd) +{ + int repeat_type, op_type; + int repeat_min, repeat_max; + int bravalue, length; + int greedy_default, greedy_non_default; + int prevreqchar; + int condcount = 0; + int subcountlits = 0; + register int c; + register uschar *code = *codeptr; + uschar *tempcode; + const uschar *ptr = *ptrptr; + const uschar *tempptr; + uschar *previous = NULL; + uschar class[32]; + + greedy_default = ((options & PCRE_UNGREEDY) != 0); + greedy_non_default = greedy_default ^ 1; + + *reqchar = prevreqchar = -1; + *countlits = 0; + + for (;; ptr++) { + BOOL negate_class; + int class_charcount; + int class_lastchar; + int newoptions; + int skipbytes; + int subreqchar; + + c = *ptr; + if ((options & PCRE_EXTENDED) != 0) { + if ((cd->ctypes[c] & ctype_space) != 0) + continue; + if (c == '#') { + while ((c = *(++ptr)) != 0 && c != '\n') ; + continue; + } + } + + switch (c) { + case 0: + case '|': + case ')': + *codeptr = code; + *ptrptr = ptr; + return TRUE; + + case '^': + previous = NULL; + *code++ = OP_CIRC; + break; + + case '$': + previous = NULL; + *code++ = OP_DOLL; + break; + + case '.': + previous = code; + *code++ = OP_ANY; + break; + + case '[': + previous = code; + *code++ = OP_CLASS; + + if ((c = *(++ptr)) == '^') { + negate_class = TRUE; + c = *(++ptr); + } + else + negate_class = FALSE; + + class_charcount = 0; + class_lastchar = -1; + + memset(class, 0, 32 * sizeof (uschar)); + + do { + if (c == 0) { + *errorptr = ERR6; + goto FAILED; + } + + if (c == '[' && + (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') && + check_posix_syntax(ptr, &tempptr, cd)) { + BOOL local_negate = FALSE; + int posix_class, i; + register const uschar *cbits = cd->cbits; + + if (ptr[1] != ':') { + *errorptr = ERR31; + goto FAILED; + } + + ptr += 2; + if (*ptr == '^') { + local_negate = TRUE; + ptr++; + } + + posix_class = check_posix_name(ptr, tempptr - ptr); + if (posix_class < 0) { + *errorptr = ERR30; + goto FAILED; + } + + if ((options & PCRE_CASELESS) != 0 + && posix_class <= 2) + posix_class = 0; + + posix_class *= 3; + for (i = 0; i < 3; i++) { + int taboffset = posix_class_maps[posix_class + i]; + if (taboffset < 0) + break; + if (local_negate) + for (c = 0; c < 32; c++) + class[c] |= ~cbits[c + taboffset]; + else + for (c = 0; c < 32; c++) + class[c] |= cbits[c + taboffset]; + } + + ptr = tempptr + 1; + class_charcount = 10; + continue; + } + + if (c == '\\') { + c = check_escape(&ptr, errorptr, *brackets, options, + TRUE, cd); + if (-c == ESC_b) + c = '\b'; + else if (c < 0) { + register const uschar *cbits = cd->cbits; + class_charcount = 10; + switch (-c) { + case ESC_d: + for (c = 0; c < 32; c++) + class[c] |= cbits[c + cbit_digit]; + continue; + + case ESC_D: + for (c = 0; c < 32; c++) + class[c] |= ~cbits[c + cbit_digit]; + continue; + + case ESC_w: + for (c = 0; c < 32; c++) + class[c] |= cbits[c + cbit_word]; + continue; + + case ESC_W: + for (c = 0; c < 32; c++) + class[c] |= ~cbits[c + cbit_word]; + continue; + + case ESC_s: + for (c = 0; c < 32; c++) + class[c] |= cbits[c + cbit_space]; + continue; + + case ESC_S: + for (c = 0; c < 32; c++) + class[c] |= ~cbits[c + cbit_space]; + continue; + + default: + *errorptr = ERR7; + goto FAILED; + } + } + +#ifdef RC_PCRE_SUPPORT_UTF8 + if (c > 255) { + *errorptr = ERR33; + goto FAILED; + } +#endif + } + + if (ptr[1] == '-' && ptr[2] != ']') { + int d; + ptr += 2; + d = *ptr; + + if (d == 0) { + *errorptr = ERR6; + goto FAILED; + } + + if (d == '\\') { + const uschar *oldptr = ptr; + d = check_escape(&ptr, errorptr, *brackets, + options, TRUE, cd); + +#ifdef RC_PCRE_SUPPORT_UTF8 + if (d > 255) { + *errorptr = ERR33; + goto FAILED; + } +#endif + if (d < 0) { + if (d == -ESC_b) + d = '\b'; + else { + ptr = oldptr - 2; + goto SINGLE_CHARACTER; + } + } + } + + if (d < c) { + *errorptr = ERR8; + goto FAILED; + } + + for (; c <= d; c++) { + class[c / 8] |= (1 << (c & 7)); + if ((options & PCRE_CASELESS) != 0) { + int uc = cd->fcc[c]; + class[uc / 8] |= (1 << (uc & 7)); + } + class_charcount++; + class_lastchar = c; + } + continue; + } + + SINGLE_CHARACTER: + + class[c / 8] |= (1 << (c & 7)); + if ((options & PCRE_CASELESS) != 0) { + c = cd->fcc[c]; + class[c / 8] |= (1 << (c & 7)); + } + class_charcount++; + class_lastchar = c; + } + + while ((c = *(++ptr)) != ']'); + + if (class_charcount == 1 && class_lastchar >= 0) { + if (negate_class) { + code[-1] = OP_NOT; + } + else { + code[-1] = OP_CHARS; + *code++ = 1; + } + *code++ = class_lastchar; + } + + else { + if (negate_class) + for (c = 0; c < 32; c++) + code[c] = ~class[c]; + else + memcpy(code, class, 32); + code += 32; + } + break; + + case '{': + if (!is_counted_repeat(ptr + 1, cd)) + goto NORMAL_CHAR; + ptr = + read_repeat_counts(ptr + 1, &repeat_min, &repeat_max, + errorptr, cd); + if (*errorptr != NULL) + goto FAILED; + goto REPEAT; + + case '*': + repeat_min = 0; + repeat_max = -1; + goto REPEAT; + + case '+': + repeat_min = 1; + repeat_max = -1; + goto REPEAT; + + case '?': + repeat_min = 0; + repeat_max = 1; + + REPEAT: + if (previous == NULL) { + *errorptr = ERR9; + goto FAILED; + } + + if (ptr[1] == '?') { + repeat_type = greedy_non_default; + ptr++; + } + else + repeat_type = greedy_default; + + if (*previous == OP_CHARS) { + int len = previous[1]; + + if (repeat_min == 0) + *reqchar = prevreqchar; + *countlits += repeat_min - 1; + + if (len == 1) { + c = previous[2]; + code = previous; + } + else { + c = previous[len + 1]; + previous[1]--; + code--; + } + op_type = 0; + goto OUTPUT_SINGLE_REPEAT; + } + + else if ((int)*previous == OP_NOT) { + op_type = OP_NOTSTAR - OP_STAR; + c = previous[1]; + code = previous; + goto OUTPUT_SINGLE_REPEAT; + } + + else if ((int)*previous < OP_EODN || *previous == OP_ANY) { + op_type = OP_TYPESTAR - OP_STAR; + c = *previous; + code = previous; + + OUTPUT_SINGLE_REPEAT: + + if (repeat_max == 0) + goto END_REPEAT; + + repeat_type += op_type; + + if (repeat_min == 0) { + if (repeat_max == -1) + *code++ = OP_STAR + repeat_type; + else if (repeat_max == 1) + *code++ = OP_QUERY + repeat_type; + else { + *code++ = OP_UPTO + repeat_type; + *code++ = repeat_max >> 8; + *code++ = (repeat_max & 255); + } + } + + else if (repeat_min == 1 && repeat_max == -1) + *code++ = OP_PLUS + repeat_type; + + else { + if (repeat_min != 1) { + *code++ = OP_EXACT + op_type; + *code++ = repeat_min >> 8; + *code++ = (repeat_min & 255); + } + + else if (*previous == OP_CHARS) { + if (code == previous) + code += 2; + else + previous[1]++; + } + + else if (*previous == OP_NOT) + code++; + + if (repeat_max < 0) { + *code++ = c; + *code++ = OP_STAR + repeat_type; + } + + else if (repeat_max != repeat_min) { + *code++ = c; + repeat_max -= repeat_min; + *code++ = OP_UPTO + repeat_type; + *code++ = repeat_max >> 8; + *code++ = (repeat_max & 255); + } + } + + *code++ = c; + } + + else if (*previous == OP_CLASS || *previous == OP_REF) { + if (repeat_max == 0) { + code = previous; + goto END_REPEAT; + } + if (repeat_min == 0 && repeat_max == -1) + *code++ = OP_CRSTAR + repeat_type; + else if (repeat_min == 1 && repeat_max == -1) + *code++ = OP_CRPLUS + repeat_type; + else if (repeat_min == 0 && repeat_max == 1) + *code++ = OP_CRQUERY + repeat_type; + else { + *code++ = OP_CRRANGE + repeat_type; + *code++ = repeat_min >> 8; + *code++ = repeat_min & 255; + if (repeat_max == -1) + repeat_max = 0; + *code++ = repeat_max >> 8; + *code++ = repeat_max & 255; + } + } + + else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE + || (int)*previous == OP_COND) { + register int i; + int ketoffset = 0; + int len = code - previous; + uschar *bralink = NULL; + + if (repeat_max == -1) { + register uschar *ket = previous; + do + ket += (ket[1] << 8) + ket[2]; + while (*ket != OP_KET); + ketoffset = code - ket; + } + + if (repeat_min == 0) { + if (subcountlits > 0) { + *reqchar = prevreqchar; + *countlits -= subcountlits; + } + + if (repeat_max == 0) { + code = previous; + goto END_REPEAT; + } + + if (repeat_max <= 1) { + memmove(previous + 1, previous, len); + code++; + *previous++ = OP_BRAZERO + repeat_type; + } + + else { + int offset; + memmove(previous + 4, previous, len); + code += 4; + *previous++ = OP_BRAZERO + repeat_type; + *previous++ = OP_BRA; + + offset = + (bralink == NULL) ? 0 : previous - bralink; + bralink = previous; + *previous++ = offset >> 8; + *previous++ = offset & 255; + } + + repeat_max--; + } + + else { + for (i = 1; i < repeat_min; i++) { + memcpy(code, previous, len); + code += len; + } + if (repeat_max > 0) + repeat_max -= repeat_min; + } + + if (repeat_max >= 0) { + for (i = repeat_max - 1; i >= 0; i--) { + *code++ = OP_BRAZERO + repeat_type; + + if (i != 0) { + int offset; + *code++ = OP_BRA; + offset = + (bralink == NULL) ? 0 : code - bralink; + bralink = code; + *code++ = offset >> 8; + *code++ = offset & 255; + } + + memcpy(code, previous, len); + code += len; + } + + while (bralink != NULL) { + int oldlinkoffset; + int offset = code - bralink + 1; + uschar *bra = code - offset; + oldlinkoffset = (bra[1] << 8) + bra[2]; + bralink = + (oldlinkoffset == + 0) ? NULL : bralink - oldlinkoffset; + *code++ = OP_KET; + *code++ = bra[1] = offset >> 8; + *code++ = bra[2] = (offset & 255); + } + } + + else + code[-ketoffset] = OP_KETRMAX + repeat_type; + } + + else { + *errorptr = ERR11; + goto FAILED; + } + + END_REPEAT: + previous = NULL; + break; + + case '(': + newoptions = options; + skipbytes = 0; + + if (*(++ptr) == '?') { + int set, unset; + int *optset; + + switch (*(++ptr)) { + case '#': + ptr++; + while (*ptr != ')') + ptr++; + continue; + + case ':': + bravalue = OP_BRA; + ptr++; + break; + + case '(': + bravalue = OP_COND; + if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0) { + int condref = *ptr - '0'; + while (*(++ptr) != ')') + condref = condref * 10 + *ptr - '0'; + if (condref == 0) { + *errorptr = ERR35; + goto FAILED; + } + ptr++; + code[3] = OP_CREF; + code[4] = condref >> 8; + code[5] = condref & 255; + skipbytes = 3; + } + else + ptr--; + break; + + case '=': + bravalue = OP_ASSERT; + ptr++; + break; + + case '!': + bravalue = OP_ASSERT_NOT; + ptr++; + break; + + case '<': + switch (*(++ptr)) { + case '=': + bravalue = OP_ASSERTBACK; + ptr++; + break; + + case '!': + bravalue = OP_ASSERTBACK_NOT; + ptr++; + break; + + default: + *errorptr = ERR24; + goto FAILED; + } + break; + + case '>': + bravalue = OP_ONCE; + ptr++; + break; + + case 'R': + *code++ = OP_RECURSE; + ptr++; + continue; + + default: + set = unset = 0; + optset = &set; + + while (*ptr != ')' && *ptr != ':') { + switch (*ptr++) { + case '-': + optset = &unset; + break; + + case 'i': + *optset |= PCRE_CASELESS; + break; + case 'm': + *optset |= PCRE_MULTILINE; + break; + case 's': + *optset |= PCRE_DOTALL; + break; + case 'x': + *optset |= PCRE_EXTENDED; + break; + case 'U': + *optset |= PCRE_UNGREEDY; + break; + case 'X': + *optset |= PCRE_EXTRA; + break; + + default: + *errorptr = ERR12; + goto FAILED; + } + } + + newoptions = (options | set) & (~unset); + + if (*ptr == ')') { + if ((options & PCRE_INGROUP) != 0 && + (options & PCRE_IMS) != + (newoptions & PCRE_IMS)) { + *code++ = OP_OPT; + *code++ = *optchanged = + newoptions & PCRE_IMS; + } + options = newoptions; + previous = NULL; + continue; + } + + bravalue = OP_BRA; + ptr++; + } + } + + else { + if (++(*brackets) > EXTRACT_BASIC_MAX) { + bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1; + code[3] = OP_BRANUMBER; + code[4] = *brackets >> 8; + code[5] = *brackets & 255; + skipbytes = 3; + } + else + bravalue = OP_BRA + *brackets; + } + + previous = (bravalue >= OP_ONCE) ? code : NULL; + *code = bravalue; + tempcode = code; + + if (!compile_regex(options | PCRE_INGROUP, + ((options & PCRE_IMS) != + (newoptions & PCRE_IMS)) ? newoptions & + PCRE_IMS : -1, brackets, &tempcode, &ptr, + errorptr, (bravalue == OP_ASSERTBACK + || bravalue == + OP_ASSERTBACK_NOT), skipbytes, + &subreqchar, &subcountlits, cd)) + goto FAILED; + + else if (bravalue == OP_COND) { + uschar *tc = code; + condcount = 0; + + do { + condcount++; + tc += (tc[1] << 8) | tc[2]; + } + while (*tc != OP_KET); + + if (condcount > 2) { + *errorptr = ERR27; + goto FAILED; + } + } + + if (subreqchar > 0 && + (bravalue >= OP_BRA || bravalue == OP_ONCE + || bravalue == OP_ASSERT || (bravalue == OP_COND + && condcount == 2))) { + prevreqchar = *reqchar; + *reqchar = subreqchar; + if (bravalue != OP_ASSERT) + *countlits += subcountlits; + } + + code = tempcode; + + if (*ptr != ')') { + *errorptr = ERR14; + goto FAILED; + } + break; + + case '\\': + tempptr = ptr; + c = check_escape(&ptr, errorptr, *brackets, options, FALSE, + cd); + + if (c < 0) { + if (-c >= ESC_REF) { + int number = -c - ESC_REF; + previous = code; + *code++ = OP_REF; + *code++ = number >> 8; + *code++ = number & 255; + } + else { + previous = (-c > ESC_b && -c < ESC_Z) ? code : NULL; + *code++ = -c; + } + continue; + } + + ptr = tempptr; + c = '\\'; + + NORMAL_CHAR: + default: + previous = code; + *code = OP_CHARS; + code += 2; + length = 0; + + do { + if ((options & PCRE_EXTENDED) != 0) { + if ((cd->ctypes[c] & ctype_space) != 0) + continue; + if (c == '#') { + while ((c = *(++ptr)) != 0 && c != '\n') ; + if (c == 0) + break; + continue; + } + } + + if (c == '\\') { + tempptr = ptr; + c = check_escape(&ptr, errorptr, *brackets, options, + FALSE, cd); + if (c < 0) { + ptr = tempptr; + break; + } + +#ifdef RC_PCRE_SUPPORT_UTF8 + if (c > 127 && (options & PCRE_UTF8) != 0) { + uschar buffer[8]; + int len = ord2utf8(c, buffer); + for (c = 0; c < len; c++) + *code++ = buffer[c]; + length += len; + continue; + } +#endif + } + + *code++ = c; + length++; + } + + while (length < MAXLIT + && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0); + + prevreqchar = (length > 1) ? code[-2] : *reqchar; + *reqchar = code[-1]; + *countlits += length; + + previous[1] = length; + if (length < MAXLIT) + ptr--; + break; + } + } + + FAILED: + *ptrptr = ptr; + return FALSE; +} + +static BOOL +compile_regex(int options, int optchanged, int *brackets, uschar ** codeptr, + const uschar ** ptrptr, const char **errorptr, BOOL lookbehind, + int skipbytes, int *reqchar, int *countlits, compile_data * cd) +{ + const uschar *ptr = *ptrptr; + uschar *code = *codeptr; + uschar *last_branch = code; + uschar *start_bracket = code; + uschar *reverse_count = NULL; + int oldoptions = options & PCRE_IMS; + int branchreqchar, branchcountlits; + + *reqchar = -1; + *countlits = INT_MAX; + code += 3 + skipbytes; + + for (;;) { + int length; + + if (optchanged >= 0) { + *code++ = OP_OPT; + *code++ = optchanged; + options = (options & ~PCRE_IMS) | optchanged; + } + + if (lookbehind) { + *code++ = OP_REVERSE; + reverse_count = code; + *code++ = 0; + *code++ = 0; + } + + if (!compile_branch + (options, brackets, &code, &ptr, errorptr, &optchanged, + &branchreqchar, &branchcountlits, cd)) { + *ptrptr = ptr; + return FALSE; + } + + length = code - last_branch; + last_branch[1] = length >> 8; + last_branch[2] = length & 255; + + if (*reqchar != -2) { + if (branchreqchar >= 0) { + if (*reqchar == -1) + *reqchar = branchreqchar; + else if (*reqchar != branchreqchar) + *reqchar = -2; + } + else + *reqchar = -2; + } + + if (branchcountlits < *countlits) + *countlits = branchcountlits; + + if (lookbehind) { + *code = OP_END; + length = find_fixedlength(last_branch, options); + + if (length < 0) { + *errorptr = ERR25; + *ptrptr = ptr; + return FALSE; + } + reverse_count[0] = (length >> 8); + reverse_count[1] = length & 255; + } + + if (*ptr != '|') { + length = code - start_bracket; + *code++ = OP_KET; + *code++ = length >> 8; + *code++ = length & 255; + if (optchanged >= 0) { + *code++ = OP_OPT; + *code++ = oldoptions; + } + *codeptr = code; + *ptrptr = ptr; + return TRUE; + } + + *code = OP_ALT; + last_branch = code; + code += 3; + ptr++; + } +} + +static const uschar *first_significant_code(const uschar * code, int *options, + int optbit, BOOL optstop) +{ + for (;;) { + switch ((int)*code) { + case OP_OPT: + if (optbit > 0 + && ((int)code[1] & optbit) != (*options & optbit)) { + if (optstop) + return code; + *options = (int)code[1]; + } + code += 2; + break; + + case OP_CREF: + case OP_BRANUMBER: + code += 3; + break; + + case OP_WORD_BOUNDARY: + case OP_NOT_WORD_BOUNDARY: + code++; + break; + + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + do + code += (code[1] << 8) + code[2]; + while (*code == OP_ALT); + code += 3; + break; + + default: + return code; + } + } +} + +static BOOL is_anchored(register const uschar * code, int *options) +{ + do { + const uschar *scode = first_significant_code(code + 3, options, + PCRE_MULTILINE, FALSE); + register int op = *scode; + if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) { + if (!is_anchored(scode, options)) + return FALSE; + } + else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) && + (*options & PCRE_DOTALL) != 0) { + if (scode[1] != OP_ANY) + return FALSE; + } + else if (op != OP_SOD && + ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC)) + return FALSE; + code += (code[1] << 8) + code[2]; + } + while (*code == OP_ALT); + return TRUE; +} + +static BOOL is_startline(const uschar * code) +{ + do { + const uschar *scode = + first_significant_code(code + 3, NULL, 0, FALSE); + register int op = *scode; + if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) { + if (!is_startline(scode)) + return FALSE; + } + else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR) { + if (scode[1] != OP_ANY) + return FALSE; + } + else if (op != OP_CIRC) + return FALSE; + code += (code[1] << 8) + code[2]; + } + while (*code == OP_ALT); + return TRUE; +} + +static int find_firstchar(const uschar * code, int *options) +{ + register int c = -1; + do { + int d; + const uschar *scode = first_significant_code(code + 3, options, + PCRE_CASELESS, TRUE); + register int op = *scode; + + if (op >= OP_BRA) + op = OP_BRA; + + switch (op) { + default: + return -1; + + case OP_BRA: + case OP_ASSERT: + case OP_ONCE: + case OP_COND: + if ((d = find_firstchar(scode, options)) < 0) + return -1; + if (c < 0) + c = d; + else if (c != d) + return -1; + break; + + case OP_EXACT: + scode++; + + case OP_CHARS: + scode++; + + case OP_PLUS: + case OP_MINPLUS: + if (c < 0) + c = scode[1]; + else if (c != scode[1]) + return -1; + break; + } + + code += (code[1] << 8) + code[2]; + } + while (*code == OP_ALT); + return c; +} + +pcre *pcre_compile(const char *pattern, int options, const char **errorptr, + int *erroroffset, const unsigned char *tables) +{ + pcre_st *re; + int length = 3; + int runlength; + int c, reqchar, countlits; + int bracount = 0; + int top_backref = 0; + int branch_extra = 0; + int branch_newextra; + unsigned int brastackptr = 0; + size_t size; + uschar *code; + const uschar *ptr; + compile_data compile_block; + int brastack[BRASTACK_SIZE]; + uschar bralenstack[BRASTACK_SIZE]; + +#ifndef RC_PCRE_SUPPORT_UTF8 + if ((options & PCRE_UTF8) != 0) { + *errorptr = ERR32; + return NULL; + } +#endif + + if (errorptr == NULL) + return NULL; + *errorptr = NULL; + + if (erroroffset == NULL) { + *errorptr = ERR16; + return NULL; + } + *erroroffset = 0; + + if ((options & ~PUBLIC_OPTIONS) != 0) { + *errorptr = ERR17; + return NULL; + } + + if (tables == NULL) + tables = pcre_default_tables; + compile_block.lcc = tables + lcc_offset; + compile_block.fcc = tables + fcc_offset; + compile_block.cbits = tables + cbits_offset; + compile_block.ctypes = tables + ctypes_offset; + + ptr = (const uschar *)(pattern - 1); + while ((c = *(++ptr)) != 0) { + int min, max; + int class_charcount; + int bracket_length; + + if ((options & PCRE_EXTENDED) != 0) { + if ((compile_block.ctypes[c] & ctype_space) != 0) + continue; + if (c == '#') { + while ((c = *(++ptr)) != 0 && c != '\n') ; + continue; + } + } + + switch (c) { + case '\\': + { + const uschar *save_ptr = ptr; + c = check_escape(&ptr, errorptr, bracount, options, FALSE, + &compile_block); + if (*errorptr != NULL) + goto PCRE_ERROR_RETURN; + if (c >= 0) { + ptr = save_ptr; + c = '\\'; + goto NORMAL_CHAR; + } + } + length++; + + if (c <= -ESC_REF) { + int refnum = -c - ESC_REF; + if (refnum > top_backref) + top_backref = refnum; + length += 2; + if (ptr[1] == '{' + && is_counted_repeat(ptr + 2, &compile_block)) { + ptr = + read_repeat_counts(ptr + 2, &min, &max, errorptr, + &compile_block); + if (*errorptr != NULL) + goto PCRE_ERROR_RETURN; + if ((min == 0 && (max == 1 || max == -1)) || + (min == 1 && max == -1)) + length++; + else + length += 5; + if (ptr[1] == '?') + ptr++; + } + } + continue; + + case '^': + case '.': + case '$': + case '*': + case '+': + case '?': + length++; + continue; + + case '{': + if (!is_counted_repeat(ptr + 1, &compile_block)) + goto NORMAL_CHAR; + ptr = + read_repeat_counts(ptr + 1, &min, &max, errorptr, + &compile_block); + if (*errorptr != NULL) + goto PCRE_ERROR_RETURN; + if ((min == 0 && (max == 1 || max == -1)) || + (min == 1 && max == -1)) + length++; + else { + length--; + if (min == 1) + length++; + else if (min > 0) + length += 4; + if (max > 0) + length += 4; + else + length += 2; + } + if (ptr[1] == '?') + ptr++; + continue; + + case '|': + length += 3 + branch_extra; + continue; + + case '[': + class_charcount = 0; + if (*(++ptr) == '^') + ptr++; + do { + if (*ptr == '\\') { + int ch = + check_escape(&ptr, errorptr, bracount, options, + TRUE, + &compile_block); + if (*errorptr != NULL) + goto PCRE_ERROR_RETURN; + if (-ch == ESC_b) + class_charcount++; + else + class_charcount = 10; + } + else + class_charcount++; + ptr++; + } + while (*ptr != 0 && *ptr != ']'); + + if (class_charcount == 1) + length += 3; + else { + length += 33; + + if (*ptr != 0 && ptr[1] == '{' + && is_counted_repeat(ptr + 2, &compile_block)) { + ptr = + read_repeat_counts(ptr + 2, &min, &max, errorptr, + &compile_block); + if (*errorptr != NULL) + goto PCRE_ERROR_RETURN; + if ((min == 0 && (max == 1 || max == -1)) || + (min == 1 && max == -1)) + length++; + else + length += 5; + if (ptr[1] == '?') + ptr++; + } + } + continue; + + case '(': + branch_newextra = 0; + bracket_length = 3; + + if (ptr[1] == '?') { + int set, unset; + int *optset; + + switch (c = ptr[2]) { + case '#': + ptr += 3; + while (*ptr != 0 && *ptr != ')') + ptr++; + if (*ptr == 0) { + *errorptr = ERR18; + goto PCRE_ERROR_RETURN; + } + continue; + + case ':': + case '=': + case '!': + case '>': + ptr += 2; + break; + + case 'R': + if (ptr[3] != ')') { + *errorptr = ERR29; + goto PCRE_ERROR_RETURN; + } + ptr += 3; + length += 1; + break; + + case '<': + if (ptr[3] == '=' || ptr[3] == '!') { + ptr += 3; + branch_newextra = 3; + length += 3; + break; + } + *errorptr = ERR24; + goto PCRE_ERROR_RETURN; + + case '(': + if ((compile_block. + ctypes[ptr[3]] & ctype_digit) != 0) { + ptr += 4; + length += 3; + while ((compile_block. + ctypes[*ptr] & ctype_digit) != 0) + ptr++; + if (*ptr != ')') { + *errorptr = ERR26; + goto PCRE_ERROR_RETURN; + } + } + else { + ptr++; + if (ptr[2] != '?' || + (ptr[3] != '=' && ptr[3] != '!' + && ptr[3] != '<')) { + ptr += 2; + *errorptr = ERR28; + goto PCRE_ERROR_RETURN; + } + } + break; + + default: + set = unset = 0; + optset = &set; + ptr += 2; + + for (;; ptr++) { + c = *ptr; + switch (c) { + case 'i': + *optset |= PCRE_CASELESS; + continue; + + case 'm': + *optset |= PCRE_MULTILINE; + continue; + + case 's': + *optset |= PCRE_DOTALL; + continue; + + case 'x': + *optset |= PCRE_EXTENDED; + continue; + + case 'X': + *optset |= PCRE_EXTRA; + continue; + + case 'U': + *optset |= PCRE_UNGREEDY; + continue; + + case '-': + optset = &unset; + continue; + + case ')': + if (brastackptr == 0) { + options = + (options | set) & (~unset); + set = unset = 0; + } + + case ':': + if (((set | unset) & PCRE_IMS) != 0) { + length += 4; + branch_newextra = 2; + if (((set | unset) & + PCRE_CASELESS) != 0) + options |= PCRE_ICHANGED; + } + goto END_OPTIONS; + + default: + *errorptr = ERR12; + goto PCRE_ERROR_RETURN; + } + } + + END_OPTIONS: + if (c == ')') { + if (branch_newextra == 2 + && (branch_extra == 0 + || branch_extra == 3)) + branch_extra += branch_newextra; + continue; + } + + } + } + + else { + bracount++; + if (bracount > EXTRACT_BASIC_MAX) + bracket_length += 3; + } + + if (brastackptr >= sizeof (brastack) / sizeof (int)) { + *errorptr = ERR19; + goto PCRE_ERROR_RETURN; + } + + bralenstack[brastackptr] = branch_extra; + branch_extra = branch_newextra; + + brastack[brastackptr++] = length; + length += bracket_length; + continue; + + case ')': + length += 3; + { + int minval = 1; + int maxval = 1; + int duplength; + + if (brastackptr > 0) { + duplength = length - brastack[--brastackptr]; + branch_extra = bralenstack[brastackptr]; + } + else + duplength = 0; + + if ((c = ptr[1]) == '{' + && is_counted_repeat(ptr + 2, &compile_block)) { + ptr = + read_repeat_counts(ptr + 2, &minval, &maxval, + errorptr, &compile_block); + if (*errorptr != NULL) + goto PCRE_ERROR_RETURN; + } + else if (c == '*') { + minval = 0; + maxval = -1; + ptr++; + } + else if (c == '+') { + maxval = -1; + ptr++; + } + else if (c == '?') { + minval = 0; + ptr++; + } + + if (minval == 0) { + length++; + if (maxval > 0) + length += (maxval - 1) * (duplength + 7); + } + + else { + length += (minval - 1) * duplength; + if (maxval > minval) + length += (maxval - minval) * (duplength + 7) - 6; + } + } + continue; + + NORMAL_CHAR: + default: + length += 2; + runlength = 0; + do { + if ((options & PCRE_EXTENDED) != 0) { + if ((compile_block.ctypes[c] & ctype_space) != 0) + continue; + if (c == '#') { + while ((c = *(++ptr)) != 0 && c != '\n') ; + continue; + } + } + + if (c == '\\') { + const uschar *saveptr = ptr; + c = check_escape(&ptr, errorptr, bracount, options, + FALSE, &compile_block); + if (*errorptr != NULL) + goto PCRE_ERROR_RETURN; + if (c < 0) { + ptr = saveptr; + break; + } + +#ifdef RC_PCRE_SUPPORT_UTF8 + if (c > 127 && (options & PCRE_UTF8) != 0) { + int i; + for (i = 0; + i < sizeof (utf8_table1) / sizeof (int); i++) + if (c <= utf8_table1[i]) + break; + runlength += i; + } +#endif + } + + runlength++; + } + + while (runlength < MAXLIT && + (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == + 0); + + ptr--; + length += runlength; + continue; + } + } + + length += 4; + + if (length > 65539) { + *errorptr = ERR20; + return NULL; + } + + size = length + offsetof(pcre_st, code[0]); + re = (pcre_st *) (pcre_malloc) (size); + + if (re == NULL) { + *errorptr = ERR21; + return NULL; + } + + re->magic_number = MAGIC_NUMBER; + re->size = size; + re->options = options; + re->tables = tables; + + ptr = (const uschar *)pattern; + code = re->code; + *code = OP_BRA; + bracount = 0; + (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, + 0, &reqchar, &countlits, &compile_block); + re->top_bracket = bracount; + re->top_backref = top_backref; + + if (*errorptr == NULL && *ptr != 0) + *errorptr = ERR22; + + *code++ = OP_END; + if (code - re->code > length) + *errorptr = ERR23; + + if (top_backref > re->top_bracket) + *errorptr = ERR15; + + if (*errorptr != NULL) { + (pcre_free) (re); + PCRE_ERROR_RETURN: + *erroroffset = ptr - (const uschar *)pattern; + return NULL; + } + + if ((options & PCRE_ANCHORED) == 0) { + int temp_options = options; + if (is_anchored(re->code, &temp_options)) + re->options |= PCRE_ANCHORED; + else { + int ch = find_firstchar(re->code, &temp_options); + if (ch >= 0) { + re->first_char = ch; + re->options |= PCRE_FIRSTSET; + } + else if (is_startline(re->code)) + re->options |= PCRE_STARTLINE; + } + } + + if (reqchar >= 0 && (countlits > 1 || (re->options & PCRE_FIRSTSET) == 0)) { + re->req_char = reqchar; + re->options |= PCRE_REQCHSET; + } + + return (pcre *) re; +} + +static BOOL +match_ref(int offset, register const uschar * eptr, int length, + match_data * md, unsigned long int ims) +{ + const uschar *p = md->start_subject + md->offset_vector[offset]; + + if (length > md->end_subject - eptr) + return FALSE; + + if ((ims & PCRE_CASELESS) != 0) { + while (length-- > 0) + if (md->lcc[*p++] != md->lcc[*eptr++]) + return FALSE; + } + else { + while (length-- > 0) + if (*p++ != *eptr++) + return FALSE; + } + + return TRUE; +} + +static BOOL +match(register const uschar * eptr, register const uschar * ecode, + int offset_top, match_data * md, unsigned long int ims, + eptrblock * eptrb, int flags) +{ + unsigned long int original_ims = ims; + eptrblock newptrb; + + if ((flags & match_isgroup) != 0) { + newptrb.prev = eptrb; + newptrb.saved_eptr = eptr; + eptrb = &newptrb; + } + + for (;;) { + int op = (int)*ecode; + int min, max, ctype; + register int i; + register int c; + BOOL minimize = FALSE; + + if (op > OP_BRA) { + int offset; + int number = op - OP_BRA; + + if (number > EXTRACT_BASIC_MAX) + number = (ecode[4] << 8) | ecode[5]; + offset = number << 1; + + if (offset < md->offset_max) { + int save_offset1 = md->offset_vector[offset]; + int save_offset2 = md->offset_vector[offset + 1]; + int save_offset3 = md->offset_vector[md->offset_end - number]; + + md->offset_vector[md->offset_end - number] = + eptr - md->start_subject; + + do { + if (match + (eptr, ecode + 3, offset_top, md, ims, eptrb, + match_isgroup)) + return TRUE; + ecode += (ecode[1] << 8) + ecode[2]; + } + while (*ecode == OP_ALT); + + md->offset_vector[offset] = save_offset1; + md->offset_vector[offset + 1] = save_offset2; + md->offset_vector[md->offset_end - number] = save_offset3; + + return FALSE; + } + + else + op = OP_BRA; + } + + switch (op) { + case OP_BRA: + + do { + if (match + (eptr, ecode + 3, offset_top, md, ims, eptrb, + match_isgroup)) + return TRUE; + ecode += (ecode[1] << 8) + ecode[2]; + } + while (*ecode == OP_ALT); + + return FALSE; + + case OP_COND: + if (ecode[3] == OP_CREF) { + int offset = (ecode[4] << 9) | (ecode[5] << 1); + return match(eptr, + ecode + + ((offset < offset_top + && md->offset_vector[offset] >= + 0) ? 6 : 3 + (ecode[1] << 8) + ecode[2]), + offset_top, md, ims, eptrb, match_isgroup); + } + + else { + if (match(eptr, ecode + 3, offset_top, md, ims, NULL, + match_condassert | match_isgroup)) { + ecode += 3 + (ecode[4] << 8) + ecode[5]; + while (*ecode == OP_ALT) + ecode += (ecode[1] << 8) + ecode[2]; + } + else + ecode += (ecode[1] << 8) + ecode[2]; + return match(eptr, ecode + 3, offset_top, md, ims, eptrb, + match_isgroup); + } + + case OP_CREF: + case OP_BRANUMBER: + ecode += 3; + break; + + case OP_END: + if (md->notempty && eptr == md->start_match) + return FALSE; + md->end_match_ptr = eptr; + md->end_offset_top = offset_top; + return TRUE; + + case OP_OPT: + ims = ecode[1]; + ecode += 2; + + break; + + case OP_ASSERT: + case OP_ASSERTBACK: + do { + if (match + (eptr, ecode + 3, offset_top, md, ims, NULL, + match_isgroup)) + break; + ecode += (ecode[1] << 8) + ecode[2]; + } + while (*ecode == OP_ALT); + if (*ecode == OP_KET) + return FALSE; + + if ((flags & match_condassert) != 0) + return TRUE; + + do + ecode += (ecode[1] << 8) + ecode[2]; + while (*ecode == OP_ALT); + ecode += 3; + offset_top = md->end_offset_top; + continue; + + case OP_ASSERT_NOT: + case OP_ASSERTBACK_NOT: + do { + if (match + (eptr, ecode + 3, offset_top, md, ims, NULL, + match_isgroup)) + return FALSE; + ecode += (ecode[1] << 8) + ecode[2]; + } + while (*ecode == OP_ALT); + + if ((flags & match_condassert) != 0) + return TRUE; + + ecode += 3; + continue; + + case OP_REVERSE: +#ifdef RC_PCRE_SUPPORT_UTF8 + c = (ecode[1] << 8) + ecode[2]; + for (i = 0; i < c; i++) { + eptr--; + BACKCHAR(eptr) + } +#else + eptr -= (ecode[1] << 8) + ecode[2]; +#endif + + if (eptr < md->start_subject) + return FALSE; + ecode += 3; + break; + + case OP_RECURSE: + { + BOOL rc; + int *save; + int stacksave[15]; + + c = md->offset_max; + + if (c < 16) + save = stacksave; + else { + save = (int *)(pcre_malloc) ((c + 1) * sizeof (int)); + if (save == NULL) { + save = stacksave; + c = 15; + } + } + + for (i = 1; i <= c; i++) + save[i] = md->offset_vector[md->offset_end - i]; + rc = match(eptr, md->start_pattern, offset_top, md, ims, + eptrb, match_isgroup); + for (i = 1; i <= c; i++) + md->offset_vector[md->offset_end - i] = save[i]; + if (save != stacksave) + (pcre_free) (save); + if (!rc) + return FALSE; + + offset_top = md->end_offset_top; + eptr = md->end_match_ptr; + ecode++; + } + break; + + case OP_ONCE: + { + const uschar *prev = ecode; + const uschar *saved_eptr = eptr; + + do { + if (match + (eptr, ecode + 3, offset_top, md, ims, eptrb, + match_isgroup)) + break; + ecode += (ecode[1] << 8) + ecode[2]; + } + while (*ecode == OP_ALT); + + if (*ecode != OP_ONCE && *ecode != OP_ALT) + return FALSE; + + do + ecode += (ecode[1] << 8) + ecode[2]; + while (*ecode == OP_ALT); + + offset_top = md->end_offset_top; + eptr = md->end_match_ptr; + + if (*ecode == OP_KET || eptr == saved_eptr) { + ecode += 3; + break; + } + + if (ecode[3] == OP_OPT) { + ims = (ims & ~PCRE_IMS) | ecode[4]; + + } + + if (*ecode == OP_KETRMIN) { + if (match + (eptr, ecode + 3, offset_top, md, ims, eptrb, 0) + || match(eptr, prev, offset_top, md, ims, eptrb, + match_isgroup)) + return TRUE; + } + else { + if (match + (eptr, prev, offset_top, md, ims, eptrb, + match_isgroup) + || match(eptr, ecode + 3, offset_top, md, ims, + eptrb, 0)) + return TRUE; + } + } + return FALSE; + + case OP_ALT: + do + ecode += (ecode[1] << 8) + ecode[2]; + while (*ecode == OP_ALT); + break; + + case OP_BRAZERO: + { + const uschar *next = ecode + 1; + if (match + (eptr, next, offset_top, md, ims, eptrb, + match_isgroup)) + return TRUE; + do + next += (next[1] << 8) + next[2]; + while (*next == OP_ALT); + ecode = next + 3; + } + break; + + case OP_BRAMINZERO: + { + const uschar *next = ecode + 1; + do + next += (next[1] << 8) + next[2]; + while (*next == OP_ALT); + if (match + (eptr, next + 3, offset_top, md, ims, eptrb, + match_isgroup)) + return TRUE; + ecode++; + } + break; + + case OP_KET: + case OP_KETRMIN: + case OP_KETRMAX: + { + const uschar *prev = ecode - (ecode[1] << 8) - ecode[2]; + const uschar *saved_eptr = eptrb->saved_eptr; + + eptrb = eptrb->prev; + + if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || + *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT + || *prev == OP_ONCE) { + md->end_match_ptr = eptr; + md->end_offset_top = offset_top; + return TRUE; + } + + if (*prev != OP_COND) { + int offset; + int number = *prev - OP_BRA; + + if (number > EXTRACT_BASIC_MAX) + number = (prev[4] << 8) | prev[5]; + offset = number << 1; + + if (number > 0) { + if (offset >= md->offset_max) + md->offset_overflow = TRUE; + else { + md->offset_vector[offset] = + md->offset_vector[md->offset_end - + number]; + md->offset_vector[offset + 1] = + eptr - md->start_subject; + if (offset_top <= offset) + offset_top = offset + 2; + } + } + } + + ims = original_ims; + + if (*ecode == OP_KET || eptr == saved_eptr) { + ecode += 3; + break; + } + + if (*ecode == OP_KETRMIN) { + if (match + (eptr, ecode + 3, offset_top, md, ims, eptrb, 0) + || match(eptr, prev, offset_top, md, ims, eptrb, + match_isgroup)) + return TRUE; + } + else { + if (match + (eptr, prev, offset_top, md, ims, eptrb, + match_isgroup) + || match(eptr, ecode + 3, offset_top, md, ims, + eptrb, 0)) + return TRUE; + } + } + return FALSE; + + case OP_CIRC: + if (md->notbol && eptr == md->start_subject) + return FALSE; + if ((ims & PCRE_MULTILINE) != 0) { + if (eptr != md->start_subject && eptr[-1] != '\n') + return FALSE; + ecode++; + break; + } + + case OP_SOD: + if (eptr != md->start_subject) + return FALSE; + ecode++; + break; + + case OP_DOLL: + if ((ims & PCRE_MULTILINE) != 0) { + if (eptr < md->end_subject) { + if (*eptr != '\n') + return FALSE; + } + else { + if (md->noteol) + return FALSE; + } + ecode++; + break; + } + else { + if (md->noteol) + return FALSE; + if (!md->endonly) { + if (eptr < md->end_subject - 1 || + (eptr == md->end_subject - 1 && *eptr != '\n')) + return FALSE; + + ecode++; + break; + } + } + case OP_EOD: + if (eptr < md->end_subject) + return FALSE; + ecode++; + break; + + case OP_EODN: + if (eptr < md->end_subject - 1 || + (eptr == md->end_subject - 1 && *eptr != '\n')) + return FALSE; + ecode++; + break; + + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + { + BOOL prev_is_word = (eptr != md->start_subject) && + ((md->ctypes[eptr[-1]] & ctype_word) != 0); + BOOL cur_is_word = (eptr < md->end_subject) && + ((md->ctypes[*eptr] & ctype_word) != 0); + if ((*ecode++ == OP_WORD_BOUNDARY) ? + cur_is_word == prev_is_word : cur_is_word != + prev_is_word) + return FALSE; + } + break; + + case OP_ANY: + if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject + && *eptr == '\n') + return FALSE; + if (eptr++ >= md->end_subject) + return FALSE; +#ifdef RC_PCRE_SUPPORT_UTF8 + if (md->utf8) + while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) + eptr++; +#endif + ecode++; + break; + + case OP_NOT_DIGIT: + if (eptr >= md->end_subject || + (md->ctypes[*eptr++] & ctype_digit) != 0) + return FALSE; + ecode++; + break; + + case OP_DIGIT: + if (eptr >= md->end_subject || + (md->ctypes[*eptr++] & ctype_digit) == 0) + return FALSE; + ecode++; + break; + + case OP_NOT_WHITESPACE: + if (eptr >= md->end_subject || + (md->ctypes[*eptr++] & ctype_space) != 0) + return FALSE; + ecode++; + break; + + case OP_WHITESPACE: + if (eptr >= md->end_subject || + (md->ctypes[*eptr++] & ctype_space) == 0) + return FALSE; + ecode++; + break; + + case OP_NOT_WORDCHAR: + if (eptr >= md->end_subject || + (md->ctypes[*eptr++] & ctype_word) != 0) + return FALSE; + ecode++; + break; + + case OP_WORDCHAR: + if (eptr >= md->end_subject || + (md->ctypes[*eptr++] & ctype_word) == 0) + return FALSE; + ecode++; + break; + + case OP_REF: + { + int length; + int offset = (ecode[1] << 9) | (ecode[2] << 1); + ecode += 3; + + length = (offset >= offset_top + || md->offset_vector[offset] < + 0) ? md->end_subject - eptr + + 1 : md->offset_vector[offset + 1] - + md->offset_vector[offset]; + + switch (*ecode) { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + c = *ecode++ - OP_CRSTAR; + minimize = (c & 1) != 0; + min = rep_min[c]; + max = rep_max[c]; + if (max == 0) + max = INT_MAX; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + minimize = (*ecode == OP_CRMINRANGE); + min = (ecode[1] << 8) + ecode[2]; + max = (ecode[3] << 8) + ecode[4]; + if (max == 0) + max = INT_MAX; + ecode += 5; + break; + + default: + if (!match_ref(offset, eptr, length, md, ims)) + return FALSE; + eptr += length; + continue; + } + + if (length == 0) + continue; + + for (i = 1; i <= min; i++) { + if (!match_ref(offset, eptr, length, md, ims)) + return FALSE; + eptr += length; + } + + if (min == max) + continue; + + if (minimize) { + for (i = min;; i++) { + if (match + (eptr, ecode, offset_top, md, ims, eptrb, 0)) + return TRUE; + if (i >= max + || !match_ref(offset, eptr, length, md, ims)) + return FALSE; + eptr += length; + } + } + + else { + const uschar *pp = eptr; + for (i = min; i < max; i++) { + if (!match_ref(offset, eptr, length, md, ims)) + break; + eptr += length; + } + while (eptr >= pp) { + if (match + (eptr, ecode, offset_top, md, ims, eptrb, 0)) + return TRUE; + eptr -= length; + } + return FALSE; + } + } + + case OP_CLASS: + { + const uschar *data = ecode + 1; + ecode += 33; + + switch (*ecode) { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + c = *ecode++ - OP_CRSTAR; + minimize = (c & 1) != 0; + min = rep_min[c]; + max = rep_max[c]; + if (max == 0) + max = INT_MAX; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + minimize = (*ecode == OP_CRMINRANGE); + min = (ecode[1] << 8) + ecode[2]; + max = (ecode[3] << 8) + ecode[4]; + if (max == 0) + max = INT_MAX; + ecode += 5; + break; + + default: + min = max = 1; + break; + } + + for (i = 1; i <= min; i++) { + if (eptr >= md->end_subject) + return FALSE; + GETCHARINC(c, eptr) +#ifdef RC_PCRE_SUPPORT_UTF8 + if (c > 255) + return FALSE; +#endif + + if ((data[c / 8] & (1 << (c & 7))) != 0) + continue; + return FALSE; + } + + if (min == max) + continue; + + if (minimize) { + for (i = min;; i++) { + if (match + (eptr, ecode, offset_top, md, ims, eptrb, 0)) + return TRUE; + if (i >= max || eptr >= md->end_subject) + return FALSE; + GETCHARINC(c, eptr) +#ifdef RC_PCRE_SUPPORT_UTF8 + if (c > 255) + return FALSE; +#endif + if ((data[c / 8] & (1 << (c & 7))) != 0) + continue; + return FALSE; + } + } + + else { + const uschar *pp = eptr; + int len = 1; + for (i = min; i < max; i++) { + if (eptr >= md->end_subject) + break; + GETCHARLEN(c, eptr, len) +#ifdef RC_PCRE_SUPPORT_UTF8 + if (c > 255) + break; +#endif + if ((data[c / 8] & (1 << (c & 7))) == 0) + break; + eptr += len; + } + + while (eptr >= pp) { + if (match + (eptr--, ecode, offset_top, md, ims, eptrb, + 0)) + return TRUE; + +#ifdef RC_PCRE_SUPPORT_UTF8 + BACKCHAR(eptr) +#endif + } + return FALSE; + } + } + + case OP_CHARS: + { + register int length = ecode[1]; + ecode += 2; + + if (length > md->end_subject - eptr) + return FALSE; + if ((ims & PCRE_CASELESS) != 0) { + while (length-- > 0) + if (md->lcc[*ecode++] != md->lcc[*eptr++]) + return FALSE; + } + else { + while (length-- > 0) + if (*ecode++ != *eptr++) + return FALSE; + } + } + break; + + case OP_EXACT: + min = max = (ecode[1] << 8) + ecode[2]; + ecode += 3; + goto REPEATCHAR; + + case OP_UPTO: + case OP_MINUPTO: + min = 0; + max = (ecode[1] << 8) + ecode[2]; + minimize = *ecode == OP_MINUPTO; + ecode += 3; + goto REPEATCHAR; + + case OP_STAR: + case OP_MINSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_QUERY: + case OP_MINQUERY: + c = *ecode++ - OP_STAR; + minimize = (c & 1) != 0; + min = rep_min[c]; + max = rep_max[c]; + if (max == 0) + max = INT_MAX; + + REPEATCHAR: + if (min > md->end_subject - eptr) + return FALSE; + c = *ecode++; + + if ((ims & PCRE_CASELESS) != 0) { + c = md->lcc[c]; + for (i = 1; i <= min; i++) + if (c != md->lcc[*eptr++]) + return FALSE; + if (min == max) + continue; + if (minimize) { + for (i = min;; i++) { + if (match + (eptr, ecode, offset_top, md, ims, eptrb, 0)) + return TRUE; + if (i >= max || eptr >= md->end_subject || + c != md->lcc[*eptr++]) + return FALSE; + } + } + else { + const uschar *pp = eptr; + for (i = min; i < max; i++) { + if (eptr >= md->end_subject + || c != md->lcc[*eptr]) + break; + eptr++; + } + while (eptr >= pp) + if (match + (eptr--, ecode, offset_top, md, ims, eptrb, + 0)) + return TRUE; + return FALSE; + } + } + + else { + for (i = 1; i <= min; i++) + if (c != *eptr++) + return FALSE; + if (min == max) + continue; + if (minimize) { + for (i = min;; i++) { + if (match + (eptr, ecode, offset_top, md, ims, eptrb, 0)) + return TRUE; + if (i >= max || eptr >= md->end_subject + || c != *eptr++) + return FALSE; + } + } + else { + const uschar *pp = eptr; + for (i = min; i < max; i++) { + if (eptr >= md->end_subject || c != *eptr) + break; + eptr++; + } + while (eptr >= pp) + if (match + (eptr--, ecode, offset_top, md, ims, eptrb, + 0)) + return TRUE; + return FALSE; + } + } + + case OP_NOT: + if (eptr >= md->end_subject) + return FALSE; + ecode++; + if ((ims & PCRE_CASELESS) != 0) { + if (md->lcc[*ecode++] == md->lcc[*eptr++]) + return FALSE; + } + else { + if (*ecode++ == *eptr++) + return FALSE; + } + break; + + case OP_NOTEXACT: + min = max = (ecode[1] << 8) + ecode[2]; + ecode += 3; + goto REPEATNOTCHAR; + + case OP_NOTUPTO: + case OP_NOTMINUPTO: + min = 0; + max = (ecode[1] << 8) + ecode[2]; + minimize = *ecode == OP_NOTMINUPTO; + ecode += 3; + goto REPEATNOTCHAR; + + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + c = *ecode++ - OP_NOTSTAR; + minimize = (c & 1) != 0; + min = rep_min[c]; + max = rep_max[c]; + if (max == 0) + max = INT_MAX; + + REPEATNOTCHAR: + if (min > md->end_subject - eptr) + return FALSE; + c = *ecode++; + + if ((ims & PCRE_CASELESS) != 0) { + c = md->lcc[c]; + for (i = 1; i <= min; i++) + if (c == md->lcc[*eptr++]) + return FALSE; + if (min == max) + continue; + if (minimize) { + for (i = min;; i++) { + if (match + (eptr, ecode, offset_top, md, ims, eptrb, 0)) + return TRUE; + if (i >= max || eptr >= md->end_subject || + c == md->lcc[*eptr++]) + return FALSE; + } + } + else { + const uschar *pp = eptr; + for (i = min; i < max; i++) { + if (eptr >= md->end_subject + || c == md->lcc[*eptr]) + break; + eptr++; + } + while (eptr >= pp) + if (match + (eptr--, ecode, offset_top, md, ims, eptrb, + 0)) + return TRUE; + return FALSE; + } + } + + else { + for (i = 1; i <= min; i++) + if (c == *eptr++) + return FALSE; + if (min == max) + continue; + if (minimize) { + for (i = min;; i++) { + if (match + (eptr, ecode, offset_top, md, ims, eptrb, 0)) + return TRUE; + if (i >= max || eptr >= md->end_subject + || c == *eptr++) + return FALSE; + } + } + else { + const uschar *pp = eptr; + for (i = min; i < max; i++) { + if (eptr >= md->end_subject || c == *eptr) + break; + eptr++; + } + while (eptr >= pp) + if (match + (eptr--, ecode, offset_top, md, ims, eptrb, + 0)) + return TRUE; + return FALSE; + } + } + + case OP_TYPEEXACT: + min = max = (ecode[1] << 8) + ecode[2]; + minimize = TRUE; + ecode += 3; + goto REPEATTYPE; + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + min = 0; + max = (ecode[1] << 8) + ecode[2]; + minimize = *ecode == OP_TYPEMINUPTO; + ecode += 3; + goto REPEATTYPE; + + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + c = *ecode++ - OP_TYPESTAR; + minimize = (c & 1) != 0; + min = rep_min[c]; + max = rep_max[c]; + if (max == 0) + max = INT_MAX; + + REPEATTYPE: + ctype = *ecode++; + + if (min > md->end_subject - eptr) + return FALSE; + if (min > 0) + switch (ctype) { + case OP_ANY: +#ifdef RC_PCRE_SUPPORT_UTF8 + if (md->utf8) { + for (i = 1; i <= min; i++) { + if (eptr >= md->end_subject || + (*eptr++ == '\n' + && (ims & PCRE_DOTALL) == 0)) + return FALSE; + while (eptr < md->end_subject + && (*eptr & 0xc0) == 0x80) + eptr++; + } + break; + } +#endif + if ((ims & PCRE_DOTALL) == 0) { + for (i = 1; i <= min; i++) + if (*eptr++ == '\n') + return FALSE; + } + else + eptr += min; + break; + + case OP_NOT_DIGIT: + for (i = 1; i <= min; i++) + if ((md->ctypes[*eptr++] & ctype_digit) != 0) + return FALSE; + break; + + case OP_DIGIT: + for (i = 1; i <= min; i++) + if ((md->ctypes[*eptr++] & ctype_digit) == 0) + return FALSE; + break; + + case OP_NOT_WHITESPACE: + for (i = 1; i <= min; i++) + if ((md->ctypes[*eptr++] & ctype_space) != 0) + return FALSE; + break; + + case OP_WHITESPACE: + for (i = 1; i <= min; i++) + if ((md->ctypes[*eptr++] & ctype_space) == 0) + return FALSE; + break; + + case OP_NOT_WORDCHAR: + for (i = 1; i <= min; i++) + if ((md->ctypes[*eptr++] & ctype_word) != 0) + return FALSE; + break; + + case OP_WORDCHAR: + for (i = 1; i <= min; i++) + if ((md->ctypes[*eptr++] & ctype_word) == 0) + return FALSE; + break; + } + + if (min == max) + continue; + + if (minimize) { + for (i = min;; i++) { + if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) + return TRUE; + if (i >= max || eptr >= md->end_subject) + return FALSE; + + c = *eptr++; + switch (ctype) { + case OP_ANY: + if ((ims & PCRE_DOTALL) == 0 && c == '\n') + return FALSE; +#ifdef RC_PCRE_SUPPORT_UTF8 + if (md->utf8) + while (eptr < md->end_subject + && (*eptr & 0xc0) == 0x80) + eptr++; +#endif + break; + + case OP_NOT_DIGIT: + if ((md->ctypes[c] & ctype_digit) != 0) + return FALSE; + break; + + case OP_DIGIT: + if ((md->ctypes[c] & ctype_digit) == 0) + return FALSE; + break; + + case OP_NOT_WHITESPACE: + if ((md->ctypes[c] & ctype_space) != 0) + return FALSE; + break; + + case OP_WHITESPACE: + if ((md->ctypes[c] & ctype_space) == 0) + return FALSE; + break; + + case OP_NOT_WORDCHAR: + if ((md->ctypes[c] & ctype_word) != 0) + return FALSE; + break; + + case OP_WORDCHAR: + if ((md->ctypes[c] & ctype_word) == 0) + return FALSE; + break; + } + } + } + + else { + const uschar *pp = eptr; + switch (ctype) { + case OP_ANY: + +#ifdef RC_PCRE_SUPPORT_UTF8 + if (md->utf8 && max < INT_MAX) { + if ((ims & PCRE_DOTALL) == 0) { + for (i = min; i < max; i++) { + if (eptr >= md->end_subject + || *eptr++ == '\n') + break; + while (eptr < md->end_subject + && (*eptr & 0xc0) == 0x80) + eptr++; + } + } + else { + for (i = min; i < max; i++) { + eptr++; + while (eptr < md->end_subject + && (*eptr & 0xc0) == 0x80) + eptr++; + } + } + break; + } +#endif + if ((ims & PCRE_DOTALL) == 0) { + for (i = min; i < max; i++) { + if (eptr >= md->end_subject + || *eptr == '\n') + break; + eptr++; + } + } + else { + c = max - min; + if (c > md->end_subject - eptr) + c = md->end_subject - eptr; + eptr += c; + } + break; + + case OP_NOT_DIGIT: + for (i = min; i < max; i++) { + if (eptr >= md->end_subject + || (md->ctypes[*eptr] & ctype_digit) != 0) + break; + eptr++; + } + break; + + case OP_DIGIT: + for (i = min; i < max; i++) { + if (eptr >= md->end_subject + || (md->ctypes[*eptr] & ctype_digit) == 0) + break; + eptr++; + } + break; + + case OP_NOT_WHITESPACE: + for (i = min; i < max; i++) { + if (eptr >= md->end_subject + || (md->ctypes[*eptr] & ctype_space) != 0) + break; + eptr++; + } + break; + + case OP_WHITESPACE: + for (i = min; i < max; i++) { + if (eptr >= md->end_subject + || (md->ctypes[*eptr] & ctype_space) == 0) + break; + eptr++; + } + break; + + case OP_NOT_WORDCHAR: + for (i = min; i < max; i++) { + if (eptr >= md->end_subject + || (md->ctypes[*eptr] & ctype_word) != 0) + break; + eptr++; + } + break; + + case OP_WORDCHAR: + for (i = min; i < max; i++) { + if (eptr >= md->end_subject + || (md->ctypes[*eptr] & ctype_word) == 0) + break; + eptr++; + } + break; + } + + while (eptr >= pp) { + if (match + (eptr--, ecode, offset_top, md, ims, eptrb, 0)) + return TRUE; +#ifdef RC_PCRE_SUPPORT_UTF8 + if (md->utf8) + while (eptr > pp && (*eptr & 0xc0) == 0x80) + eptr--; +#endif + } + return FALSE; + } + + default: + + md->errorcode = PCRE_ERROR_UNKNOWN_NODE; + return FALSE; + } + + } +} + +int +pcre_exec(const pcre * external_re, const pcre_extra * external_extra, + const char *subject, int length, int start_offset, int options, + int *offsets, int offsetcount) +{ + int resetcount, ocount; + int first_char = -1; + int req_char = -1; + int req_char2 = -1; + unsigned long int ims = 0; + match_data match_block; + const uschar *start_bits = NULL; + const uschar *start_match = (const uschar *)subject + start_offset; + const uschar *end_subject; + const uschar *req_char_ptr = start_match - 1; + const pcre_st *re = (const pcre_st *)external_re; + const pcre_extra_st *extra = (const pcre_extra_st *)external_extra; + BOOL using_temporary_offsets = FALSE; + BOOL anchored; + BOOL startline; + + if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) + return PCRE_ERROR_BADOPTION; + + if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0)) + return PCRE_ERROR_NULL; + if (re->magic_number != MAGIC_NUMBER) + return PCRE_ERROR_BADMAGIC; + + anchored = ((re->options | options) & PCRE_ANCHORED) != 0; + startline = (re->options & PCRE_STARTLINE) != 0; + + match_block.start_pattern = re->code; + match_block.start_subject = (const uschar *)subject; + match_block.end_subject = match_block.start_subject + length; + end_subject = match_block.end_subject; + + match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; + match_block.utf8 = (re->options & PCRE_UTF8) != 0; + + match_block.notbol = (options & PCRE_NOTBOL) != 0; + match_block.noteol = (options & PCRE_NOTEOL) != 0; + match_block.notempty = (options & PCRE_NOTEMPTY) != 0; + + match_block.errorcode = PCRE_ERROR_NOMATCH; + + match_block.lcc = re->tables + lcc_offset; + match_block.ctypes = re->tables + ctypes_offset; + + ims = re->options & (PCRE_CASELESS | PCRE_MULTILINE | PCRE_DOTALL); + + ocount = offsetcount - (offsetcount % 3); + + if (re->top_backref > 0 && re->top_backref >= ocount / 3) { + ocount = re->top_backref * 3 + 3; + match_block.offset_vector = + (int *)(pcre_malloc) (ocount * sizeof (int)); + if (match_block.offset_vector == NULL) + return PCRE_ERROR_NOMEMORY; + using_temporary_offsets = TRUE; + + } + else + match_block.offset_vector = offsets; + + match_block.offset_end = ocount; + match_block.offset_max = (2 * ocount) / 3; + match_block.offset_overflow = FALSE; + + resetcount = 2 + re->top_bracket * 2; + if (resetcount > offsetcount) + resetcount = ocount; + + if (match_block.offset_vector != NULL) { + register int *iptr = match_block.offset_vector + ocount; + register int *iend = iptr - resetcount / 2 + 1; + while (--iptr >= iend) + *iptr = -1; + } + + if (!anchored) { + if ((re->options & PCRE_FIRSTSET) != 0) { + first_char = re->first_char; + if ((ims & PCRE_CASELESS) != 0) + first_char = match_block.lcc[first_char]; + } + else if (!startline && extra != NULL && + (extra->options & PCRE_STUDY_MAPPED) != 0) + start_bits = extra->start_bits; + } + + if ((re->options & PCRE_REQCHSET) != 0) { + req_char = re->req_char; + req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0) ? + (re->tables + fcc_offset)[req_char] : req_char; + } + + do { + int rc; + register int *iptr = match_block.offset_vector; + register int *iend = iptr + resetcount; + + while (iptr < iend) + *iptr++ = -1; + + if (first_char >= 0) { + if ((ims & PCRE_CASELESS) != 0) + while (start_match < end_subject && + match_block.lcc[*start_match] != first_char) + start_match++; + else + while (start_match < end_subject + && *start_match != first_char) + start_match++; + } + + else if (startline) { + if (start_match > match_block.start_subject + start_offset) { + while (start_match < end_subject && start_match[-1] != '\n') + start_match++; + } + } + + else if (start_bits != NULL) { + while (start_match < end_subject) { + register int c = *start_match; + if ((start_bits[c / 8] & (1 << (c & 7))) == 0) + start_match++; + else + break; + } + } + + if (req_char >= 0) { + register const uschar *p = + start_match + ((first_char >= 0) ? 1 : 0); + + if (p > req_char_ptr) { + if (req_char == req_char2) { + while (p < end_subject) { + if (*p++ == req_char) { + p--; + break; + } + } + } + + else { + while (p < end_subject) { + register int pp = *p++; + if (pp == req_char || pp == req_char2) { + p--; + break; + } + } + } + + if (p >= end_subject) + break; + + req_char_ptr = p; + } + } + + match_block.start_match = start_match; + if (!match + (start_match, re->code, 2, &match_block, ims, NULL, + match_isgroup)) + continue; + + if (using_temporary_offsets) { + if (offsetcount >= 4) { + memcpy(offsets + 2, match_block.offset_vector + 2, + (offsetcount - 2) * sizeof (int)); + + } + if (match_block.end_offset_top > offsetcount) + match_block.offset_overflow = TRUE; + + (pcre_free) (match_block.offset_vector); + } + + rc = match_block.offset_overflow ? 0 : match_block.end_offset_top / 2; + + if (offsetcount < 2) + rc = 0; + else { + offsets[0] = start_match - match_block.start_subject; + offsets[1] = + match_block.end_match_ptr - match_block.start_subject; + } + + return rc; + } + + while (!anchored && + match_block.errorcode == PCRE_ERROR_NOMATCH && + start_match++ < end_subject); + + if (using_temporary_offsets) { + + (pcre_free) (match_block.offset_vector); + } + + return match_block.errorcode; +} + +static void +set_bit(uschar * start_bits, int c, BOOL caseless, compile_data * cd) +{ + start_bits[c / 8] |= (1 << (c & 7)); + if (caseless && (cd->ctypes[c] & ctype_letter) != 0) + start_bits[cd->fcc[c] / 8] |= (1 << (cd->fcc[c] & 7)); +} + +static BOOL +set_start_bits(const uschar * code, uschar * start_bits, BOOL caseless, + compile_data * cd) +{ + register int c; + + volatile int dummy; + + do { + const uschar *tcode = code + 3; + BOOL try_next = TRUE; + + while (try_next) { + if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT) { + if (!set_start_bits(tcode, start_bits, caseless, cd)) + return FALSE; + try_next = FALSE; + } + + else + switch (*tcode) { + default: + return FALSE; + + case OP_BRANUMBER: + tcode += 3; + break; + + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + do + tcode += (tcode[1] << 8) + tcode[2]; + while (*tcode == OP_ALT); + tcode += 3; + break; + + case OP_OPT: + caseless = (tcode[1] & PCRE_CASELESS) != 0; + tcode += 2; + break; + + case OP_BRAZERO: + case OP_BRAMINZERO: + if (!set_start_bits + (++tcode, start_bits, caseless, cd)) + return FALSE; + dummy = 1; + do + tcode += (tcode[1] << 8) + tcode[2]; + while (*tcode == OP_ALT); + tcode += 3; + break; + + case OP_STAR: + case OP_MINSTAR: + case OP_QUERY: + case OP_MINQUERY: + set_bit(start_bits, tcode[1], caseless, cd); + tcode += 2; + break; + + case OP_UPTO: + case OP_MINUPTO: + set_bit(start_bits, tcode[3], caseless, cd); + tcode += 4; + break; + + case OP_EXACT: + tcode++; + + case OP_CHARS: + tcode++; + + case OP_PLUS: + case OP_MINPLUS: + set_bit(start_bits, tcode[1], caseless, cd); + try_next = FALSE; + break; + + case OP_NOT_DIGIT: + for (c = 0; c < 32; c++) + start_bits[c] |= ~cd->cbits[c + cbit_digit]; + try_next = FALSE; + break; + + case OP_DIGIT: + for (c = 0; c < 32; c++) + start_bits[c] |= cd->cbits[c + cbit_digit]; + try_next = FALSE; + break; + + case OP_NOT_WHITESPACE: + for (c = 0; c < 32; c++) + start_bits[c] |= ~cd->cbits[c + cbit_space]; + try_next = FALSE; + break; + + case OP_WHITESPACE: + for (c = 0; c < 32; c++) + start_bits[c] |= cd->cbits[c + cbit_space]; + try_next = FALSE; + break; + + case OP_NOT_WORDCHAR: + for (c = 0; c < 32; c++) + start_bits[c] |= ~cd->cbits[c + cbit_word]; + try_next = FALSE; + break; + + case OP_WORDCHAR: + for (c = 0; c < 32; c++) + start_bits[c] |= cd->cbits[c + cbit_word]; + try_next = FALSE; + break; + + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + tcode++; + break; + + case OP_TYPEEXACT: + tcode += 3; + break; + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + tcode += 2; + + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + switch (tcode[1]) { + case OP_NOT_DIGIT: + for (c = 0; c < 32; c++) + start_bits[c] |= + ~cd->cbits[c + cbit_digit]; + break; + + case OP_DIGIT: + for (c = 0; c < 32; c++) + start_bits[c] |= + cd->cbits[c + cbit_digit]; + break; + + case OP_NOT_WHITESPACE: + for (c = 0; c < 32; c++) + start_bits[c] |= + ~cd->cbits[c + cbit_space]; + break; + + case OP_WHITESPACE: + for (c = 0; c < 32; c++) + start_bits[c] |= + cd->cbits[c + cbit_space]; + break; + + case OP_NOT_WORDCHAR: + for (c = 0; c < 32; c++) + start_bits[c] |= + ~cd->cbits[c + cbit_word]; + break; + + case OP_WORDCHAR: + for (c = 0; c < 32; c++) + start_bits[c] |= cd->cbits[c + cbit_word]; + break; + } + + tcode += 2; + break; + + case OP_CLASS: + { + tcode++; + for (c = 0; c < 32; c++) + start_bits[c] |= tcode[c]; + tcode += 32; + switch (*tcode) { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRQUERY: + case OP_CRMINQUERY: + tcode++; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + if (((tcode[1] << 8) + tcode[2]) == 0) + tcode += 5; + else + try_next = FALSE; + break; + + default: + try_next = FALSE; + break; + } + } + break; + + } + } + + code += (code[1] << 8) + code[2]; + } + while (*code == OP_ALT); + return TRUE; +} + +pcre_extra *pcre_study(const pcre * external_re, int options, + const char **errorptr) +{ + uschar start_bits[32]; + pcre_extra_st *extra; + const pcre_st *re = (const pcre_st *)external_re; + compile_data compile_block; + + *errorptr = NULL; + + if (re == NULL || re->magic_number != MAGIC_NUMBER) { + *errorptr = "argument is not a compiled regular expression"; + return NULL; + } + + if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) { + *errorptr = "unknown or incorrect option bit(s) set"; + return NULL; + } + + if ((re->options & (PCRE_ANCHORED | PCRE_FIRSTSET | PCRE_STARTLINE)) != 0) + return NULL; + + compile_block.lcc = re->tables + lcc_offset; + compile_block.fcc = re->tables + fcc_offset; + compile_block.cbits = re->tables + cbits_offset; + compile_block.ctypes = re->tables + ctypes_offset; + + memset(start_bits, 0, 32 * sizeof (uschar)); + if (!set_start_bits + (re->code, start_bits, (re->options & PCRE_CASELESS) != 0, + &compile_block)) + return NULL; + + extra = (pcre_extra_st *) (pcre_malloc) (sizeof (pcre_extra_st)); + + if (extra == NULL) { + *errorptr = "failed to get memory"; + return NULL; + } + + extra->options = PCRE_STUDY_MAPPED; + memcpy(extra->start_bits, start_bits, sizeof (start_bits)); + + return (pcre_extra *) extra; +} + +int +pcre_copy_substring(const char *subject, int *ovector, int stringcount, + int stringnumber, char *buffer, int size) +{ + int yield; + if (stringnumber < 0 || stringnumber >= stringcount) + return PCRE_ERROR_NOSUBSTRING; + stringnumber *= 2; + yield = ovector[stringnumber + 1] - ovector[stringnumber]; + if (size < yield + 1) + return PCRE_ERROR_NOMEMORY; + memcpy(buffer, subject + ovector[stringnumber], yield); + buffer[yield] = 0; + return yield; +} + +int +pcre_get_substring_list(const char *subject, int *ovector, int stringcount, + const char ***listptr) +{ + int i; + int size = sizeof (char *); + int double_count = stringcount * 2; + char **stringlist; + char *p; + + for (i = 0; i < double_count; i += 2) + size += sizeof (char *) + ovector[i + 1] - ovector[i] + 1; + + stringlist = (char **)(pcre_malloc) (size); + if (stringlist == NULL) + return PCRE_ERROR_NOMEMORY; + + *listptr = (const char **)stringlist; + p = (char *)(stringlist + stringcount + 1); + + for (i = 0; i < double_count; i += 2) { + int len = ovector[i + 1] - ovector[i]; + memcpy(p, subject + ovector[i], len); + *stringlist++ = p; + p += len; + *p++ = 0; + } + + *stringlist = NULL; + return 0; +} + +void pcre_free_substring_list(const char **pointer) +{ + (pcre_free) ((void *)pointer); +} + +int +pcre_get_substring(const char *subject, int *ovector, int stringcount, + int stringnumber, const char **stringptr) +{ + int yield; + char *substring; + if (stringnumber < 0 || stringnumber >= stringcount) + return PCRE_ERROR_NOSUBSTRING; + stringnumber *= 2; + yield = ovector[stringnumber + 1] - ovector[stringnumber]; + substring = (char *)(pcre_malloc) (yield + 1); + if (substring == NULL) + return PCRE_ERROR_NOMEMORY; + memcpy(substring, subject + ovector[stringnumber], yield); + substring[yield] = 0; + *stringptr = substring; + return yield; +} + +void pcre_free_substring(const char *pointer) +{ + (pcre_free) ((void *)pointer); +} + +#endif + +const unsigned char *pcre_maketables(void) +{ + unsigned char *yield, *p; + int i; + +#ifndef RC_PCRE_TAB + yield = (unsigned char *)(pcre_malloc) (tables_length); +#else + yield = (unsigned char *)malloc(tables_length); +#endif + + if (yield == NULL) + return NULL; + p = yield; + + for (i = 0; i < 256; i++) + *p++ = tolower(i); + + for (i = 0; i < 256; i++) + *p++ = islower(i) ? toupper(i) : tolower(i); + + memset(p, 0, cbit_length); + for (i = 0; i < 256; i++) { + if (isdigit(i)) { + p[cbit_digit + i / 8] |= 1 << (i & 7); + p[cbit_word + i / 8] |= 1 << (i & 7); + } + if (isupper(i)) { + p[cbit_upper + i / 8] |= 1 << (i & 7); + p[cbit_word + i / 8] |= 1 << (i & 7); + } + if (islower(i)) { + p[cbit_lower + i / 8] |= 1 << (i & 7); + p[cbit_word + i / 8] |= 1 << (i & 7); + } + if (i == '_') + p[cbit_word + i / 8] |= 1 << (i & 7); + if (isspace(i)) + p[cbit_space + i / 8] |= 1 << (i & 7); + if (isxdigit(i)) + p[cbit_xdigit + i / 8] |= 1 << (i & 7); + if (isgraph(i)) + p[cbit_graph + i / 8] |= 1 << (i & 7); + if (isprint(i)) + p[cbit_print + i / 8] |= 1 << (i & 7); + if (ispunct(i)) + p[cbit_punct + i / 8] |= 1 << (i & 7); + if (iscntrl(i)) + p[cbit_cntrl + i / 8] |= 1 << (i & 7); + } + p += cbit_length; + + for (i = 0; i < 256; i++) { + int x = 0; + if (isspace(i)) + x += ctype_space; + if (isalpha(i)) + x += ctype_letter; + if (isdigit(i)) + x += ctype_digit; + if (isxdigit(i)) + x += ctype_xdigit; + if (isalnum(i) || i == '_') + x += ctype_word; + if (strchr("*+?{^.$|()[", i) != 0) + x += ctype_meta; + *p++ = x; + } + + return yield; +} + +#ifdef RC_PCRE_TAB + +#include +#include +#include + +int main(void) +{ + int i; + const unsigned char *tables = pcre_maketables(); + + printf("/*************************************************\n" + "* Perl-Compatible Regular Expressions *\n" + "*************************************************/\n\n" + "static unsigned char pcre_default_tables[] = {\n\n" + "/* This table is a lower casing table. */\n\n"); + + printf(" "); + for (i = 0; i < 256; i++) { + if ((i & 7) == 0 && i != 0) + printf("\n "); + printf("%3d", *tables++); + if (i != 255) + printf(","); + } + printf(",\n\n"); + + printf("/* This table is a case flipping table. */\n\n"); + + printf(" "); + for (i = 0; i < 256; i++) { + if ((i & 7) == 0 && i != 0) + printf("\n "); + printf("%3d", *tables++); + if (i != 255) + printf(","); + } + printf(",\n\n"); + + printf("/* This table contains bit maps for various character classes.\n" + "Each map is 32 bytes long and the bits run from the least\n" + "significant end of each byte. The classes that have their own\n" + "maps are: space, xdigit, digit, upper, lower, word, graph\n" + "print, punct, and cntrl. Other classes are built from combinations. */\n\n"); + + printf(" "); + for (i = 0; i < cbit_length; i++) { + if ((i & 7) == 0 && i != 0) { + if ((i & 31) == 0) + printf("\n"); + printf("\n "); + } + printf("0x%02x", *tables++); + if (i != cbit_length - 1) + printf(","); + } + printf(",\n\n"); + + printf + ("/* This table identifies various classes of character by individual bits:\n" + " 0x%02x white space character\n" " 0x%02x letter\n" + " 0x%02x decimal digit\n" " 0x%02x hexadecimal digit\n" + " 0x%02x alphanumeric or '_'\n" + " 0x%02x regular expression metacharacter or binary zero\n*/\n\n", + ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word, + ctype_meta); + + printf(" "); + for (i = 0; i < 256; i++) { + if ((i & 7) == 0 && i != 0) { + printf(" \n "); + } + printf("0x%02x", *tables++); + if (i != 255) + printf(","); + } + + printf("};\n\n\n"); + + return 0; +} + +#endif Index: ossp-pkg/rc/rc_pcre.h RCS File: /v/ossp/cvs/ossp-pkg/rc/rc_pcre.h,v co -q -kk -p'1.1' '/v/ossp/cvs/ossp-pkg/rc/rc_pcre.h,v' | diff -u /dev/null - -L'ossp-pkg/rc/rc_pcre.h' 2>/dev/null --- ossp-pkg/rc/rc_pcre.h +++ - 2024-05-02 15:46:11.944664850 +0200 @@ -0,0 +1,141 @@ +/* + * Perl Compatible Regular Expression (PCRE) Library + * Copyright (c) 1997-2001 Philip Hazel + * Copyright (c) 1997-2001 University of Cambridge + * + * DO NOT EDIT THIS FILE, IT WAS AUTOMATICALLY GENERATED! + * + * This is an automatically generated, extremely stripped down + * version of the PCRE 3.9 library from the Philip Hazel. + * This version is still distributed under the same original PCRE + * Open Source license, but Philip Hazel is no longer responsible + * for this version. + */ + +/* +This is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. See +the file Tech.Notes for some information on the internals. + +Written by: Philip Hazel + + Copyright (c) 1997-2001 University of Cambridge + +----------------------------------------------------------------------------- +Permission is granted to anyone to use this software for any purpose on any +computer system, and to redistribute it freely, subject to the following +restrictions: + +1. This software is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +2. The origin of this software must not be misrepresented, either by + explicit claim or by omission. + +3. Altered versions must be plainly marked as such, and must not be + misrepresented as being the original software. + +4. If PCRE is embedded in any software that is released under the GNU + General Purpose Licence (GPL), then the terms of that licence shall + supersede any condition above with which it is incompatible. +----------------------------------------------------------------------------- +*/ + +#ifndef __RC_PCRE_H__ +#define __RC_PCRE_H__ + +#define PCRE_PREFIX RC_ + +#define PCRE_MAJOR 3 +#define PCRE_MINOR 9 +#define PCRE_DATE 02-Jan-2002 + +#ifdef PCRE_PREFIX +#if defined(__STDC__) || defined(__cplusplus) +#define __PCRE_CONCAT(x,y) x ## y +#define PCRE_CONCAT(x,y) __PCRE_CONCAT(x,y) +#else +#define __PCRE_CONCAT(x) x +#define PCRE_CONCAT(x,y) __PCRE_CONCAT(x)y +#endif +#define pcre_malloc PCRE_CONCAT(PCRE_PREFIX,pcre_malloc) +#define pcre_free PCRE_CONCAT(PCRE_PREFIX,pcre_free) +#define pcre_compile PCRE_CONCAT(PCRE_PREFIX,pcre_compile) +#define pcre_copy_substring PCRE_CONCAT(PCRE_PREFIX,pcre_copy_substring) +#define pcre_exec PCRE_CONCAT(PCRE_PREFIX,pcre_exec) +#define pcre_free_substring PCRE_CONCAT(PCRE_PREFIX,pcre_free_substring) +#define pcre_free_substring_list PCRE_CONCAT(PCRE_PREFIX,pcre_free_substring_list) +#define pcre_get_substring PCRE_CONCAT(PCRE_PREFIX,pcre_get_substring) +#define pcre_get_substring_list PCRE_CONCAT(PCRE_PREFIX,pcre_get_substring_list) +#define pcre_info PCRE_CONCAT(PCRE_PREFIX,pcre_info) +#define pcre_fullinfo PCRE_CONCAT(PCRE_PREFIX,pcre_fullinfo) +#define pcre_maketables PCRE_CONCAT(PCRE_PREFIX,pcre_maketables) +#define pcre_study PCRE_CONCAT(PCRE_PREFIX,pcre_study) +#define pcre_version PCRE_CONCAT(PCRE_PREFIX,pcre_version) +#endif + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define PCRE_CASELESS 0x0001 +#define PCRE_MULTILINE 0x0002 +#define PCRE_DOTALL 0x0004 +#define PCRE_EXTENDED 0x0008 +#define PCRE_ANCHORED 0x0010 +#define PCRE_DOLLAR_ENDONLY 0x0020 +#define PCRE_EXTRA 0x0040 +#define PCRE_NOTBOL 0x0080 +#define PCRE_NOTEOL 0x0100 +#define PCRE_UNGREEDY 0x0200 +#define PCRE_NOTEMPTY 0x0400 +#define PCRE_UTF8 0x0800 + +#define PCRE_ERROR_NOMATCH (-1) +#define PCRE_ERROR_NULL (-2) +#define PCRE_ERROR_BADOPTION (-3) +#define PCRE_ERROR_BADMAGIC (-4) +#define PCRE_ERROR_UNKNOWN_NODE (-5) +#define PCRE_ERROR_NOMEMORY (-6) +#define PCRE_ERROR_NOSUBSTRING (-7) + +#define PCRE_INFO_OPTIONS 0 +#define PCRE_INFO_SIZE 1 +#define PCRE_INFO_CAPTURECOUNT 2 +#define PCRE_INFO_BACKREFMAX 3 +#define PCRE_INFO_FIRSTCHAR 4 +#define PCRE_INFO_FIRSTTABLE 5 +#define PCRE_INFO_LASTLITERAL 6 + +struct pcre_st; +struct pcre_extra_st; + +typedef struct pcre_st pcre; +typedef struct pcre_extra_st pcre_extra; + +extern void *(*pcre_malloc)(size_t); +extern void (*pcre_free)(void *); + +extern pcre *pcre_compile(const char *, int, const char **, int *, + const unsigned char *); +extern int pcre_copy_substring(const char *, int *, int, int, char *, int); +extern int pcre_exec(const pcre *, const pcre_extra *, const char *, + int, int, int, int *, int); +extern void pcre_free_substring(const char *); +extern void pcre_free_substring_list(const char **); +extern int pcre_get_substring(const char *, int *, int, int, const char **); +extern int pcre_get_substring_list(const char *, int *, int, const char ***); +extern int pcre_info(const pcre *, int *, int *); +extern int pcre_fullinfo(const pcre *, const pcre_extra *, int, void *); +extern const unsigned char *pcre_maketables(void); +extern pcre_extra *pcre_study(const pcre *, int, const char **); +extern const char *pcre_version(void); + +#ifdef __cplusplus +} +#endif + +#endif