ossp-pkg/l2/l2_ut_pcre.c
/*
* Perl Compatible Regular Expression (PCRE) Library
* Copyright (c) 1997-2001 Philip Hazel <ph10@cam.ac.uk>
* Copyright (c) 1997-2001 University of Cambridge
*
* DO NOT EDIT THIS FILE, IT WAS AUTOMATICALLY GENERATED!
*
* This is an automatically generated, extremely stripped down
* version of the PCRE 3.9 library from the Philip Hazel.
* This version is still distributed under the same original PCRE
* Open Source license, but Philip Hazel is no longer responsible
* for this version.
*/
/*
This is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language. See
the file Tech.Notes for some information on the internals.
Written by: Philip Hazel <ph10@cam.ac.uk>
Copyright (c) 1997-2001 University of Cambridge
-----------------------------------------------------------------------------
Permission is granted to anyone to use this software for any purpose on any
computer system, and to redistribute it freely, subject to the following
restrictions:
1. This software is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.
2. The origin of this software must not be misrepresented, either by explicit
claim or by omission.
3. Altered versions must be plainly marked as such, and must not be
misrepresented as being the original software.
4. If PCRE is embedded in any software that is released under the GNU General
Purpose Licence (GPL), then the terms of that licence shall supersede any
condition above with which it is incompatible.
----------------------------------------------------------------------------- */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <ctype.h>
#include <limits.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "l2_ut_pcre.h"
#ifndef offsetof
#define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))
#endif
#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)
#define PCRE_FIRSTSET 0x40000000
#define PCRE_REQCHSET 0x20000000
#define PCRE_STARTLINE 0x10000000
#define PCRE_INGROUP 0x08000000
#define PCRE_ICHANGED 0x04000000
#define PCRE_STUDY_MAPPED 0x01
#define PUBLIC_OPTIONS \
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8)
#define PUBLIC_EXEC_OPTIONS \
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY)
#define PUBLIC_STUDY_OPTIONS 0
#define MAGIC_NUMBER 0x50435245UL
typedef int BOOL;
#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif
#ifndef ESC_E
#define ESC_E 27
#endif
#ifndef ESC_F
#define ESC_F '\f'
#endif
#ifndef ESC_N
#define ESC_N '\n'
#endif
#ifndef ESC_R
#define ESC_R '\r'
#endif
#ifndef ESC_T
#define ESC_T '\t'
#endif
enum { ESC_A = 1, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w,
ESC_Z, ESC_z, ESC_REF
};
enum {
OP_END,
OP_SOD,
OP_NOT_WORD_BOUNDARY,
OP_WORD_BOUNDARY,
OP_NOT_DIGIT,
OP_DIGIT,
OP_NOT_WHITESPACE,
OP_WHITESPACE,
OP_NOT_WORDCHAR,
OP_WORDCHAR,
OP_EODN,
OP_EOD,
OP_OPT,
OP_CIRC,
OP_DOLL,
OP_ANY,
OP_CHARS,
OP_NOT,
OP_STAR,
OP_MINSTAR,
OP_PLUS,
OP_MINPLUS,
OP_QUERY,
OP_MINQUERY,
OP_UPTO,
OP_MINUPTO,
OP_EXACT,
OP_NOTSTAR,
OP_NOTMINSTAR,
OP_NOTPLUS,
OP_NOTMINPLUS,
OP_NOTQUERY,
OP_NOTMINQUERY,
OP_NOTUPTO,
OP_NOTMINUPTO,
OP_NOTEXACT,
OP_TYPESTAR,
OP_TYPEMINSTAR,
OP_TYPEPLUS,
OP_TYPEMINPLUS,
OP_TYPEQUERY,
OP_TYPEMINQUERY,
OP_TYPEUPTO,
OP_TYPEMINUPTO,
OP_TYPEEXACT,
OP_CRSTAR,
OP_CRMINSTAR,
OP_CRPLUS,
OP_CRMINPLUS,
OP_CRQUERY,
OP_CRMINQUERY,
OP_CRRANGE,
OP_CRMINRANGE,
OP_CLASS,
OP_REF,
OP_RECURSE,
OP_ALT,
OP_KET,
OP_KETRMAX,
OP_KETRMIN,
OP_ASSERT,
OP_ASSERT_NOT,
OP_ASSERTBACK,
OP_ASSERTBACK_NOT,
OP_REVERSE,
OP_ONCE,
OP_COND,
OP_CREF,
OP_BRAZERO,
OP_BRAMINZERO,
OP_BRANUMBER,
OP_BRA
};
#define EXTRACT_BASIC_MAX 150
#define ERR1 "\\ at end of pattern"
#define ERR2 "\\c at end of pattern"
#define ERR3 "unrecognized character follows \\"
#define ERR4 "numbers out of order in {} quantifier"
#define ERR5 "number too big in {} quantifier"
#define ERR6 "missing terminating ] for character class"
#define ERR7 "invalid escape sequence in character class"
#define ERR8 "range out of order in character class"
#define ERR9 "nothing to repeat"
#define ERR10 "operand of unlimited repeat could match the empty string"
#define ERR11 "internal error: unexpected repeat"
#define ERR12 "unrecognized character after (?"
#define ERR13 "unused error"
#define ERR14 "missing )"
#define ERR15 "back reference to non-existent subpattern"
#define ERR16 "erroffset passed as NULL"
#define ERR17 "unknown option bit(s) set"
#define ERR18 "missing ) after comment"
#define ERR19 "parentheses nested too deeply"
#define ERR20 "regular expression too large"
#define ERR21 "failed to get memory"
#define ERR22 "unmatched parentheses"
#define ERR23 "internal error: code overflow"
#define ERR24 "unrecognized character after (?<"
#define ERR25 "lookbehind assertion is not fixed length"
#define ERR26 "malformed number after (?("
#define ERR27 "conditional group contains more than two branches"
#define ERR28 "assertion expected after (?("
#define ERR29 "(?p must be followed by )"
#define ERR30 "unknown POSIX class name"
#define ERR31 "POSIX collating elements are not supported"
#define ERR32 "this version of PCRE is not compiled with PCRE_UTF8 support"
#define ERR33 "characters with values > 255 are not yet supported in classes"
#define ERR34 "character value in \\x{...} sequence is too large"
#define ERR35 "invalid condition (?(0)"
typedef unsigned char uschar;
typedef struct pcre_st {
unsigned long int magic_number;
size_t size;
const unsigned char *tables;
unsigned long int options;
unsigned short int top_bracket;
unsigned short int top_backref;
uschar first_char;
uschar req_char;
uschar code[1];
} pcre_st;
typedef struct pcre_extra_st {
uschar options;
uschar start_bits[32];
} pcre_extra_st;
typedef struct compile_data {
const uschar *lcc;
const uschar *fcc;
const uschar *cbits;
const uschar *ctypes;
} compile_data;
typedef struct match_data {
int errorcode;
int *offset_vector;
int offset_end;
int offset_max;
const uschar *lcc;
const uschar *ctypes;
BOOL offset_overflow;
BOOL notbol;
BOOL noteol;
BOOL utf8;
BOOL endonly;
BOOL notempty;
const uschar *start_pattern;
const uschar *start_subject;
const uschar *end_subject;
const uschar *start_match;
const uschar *end_match_ptr;
int end_offset_top;
} match_data;
#define ctype_space 0x01
#define ctype_letter 0x02
#define ctype_digit 0x04
#define ctype_xdigit 0x08
#define ctype_word 0x10
#define ctype_meta 0x80
#define cbit_space 0
#define cbit_xdigit 32
#define cbit_digit 64
#define cbit_upper 96
#define cbit_lower 128
#define cbit_word 160
#define cbit_graph 192
#define cbit_print 224
#define cbit_punct 256
#define cbit_cntrl 288
#define cbit_length 320
#define lcc_offset 0
#define fcc_offset 256
#define cbits_offset 512
#define ctypes_offset (cbits_offset + cbit_length)
#define tables_length (ctypes_offset + 256)
#ifndef L2_UT_PCRE_TAB
#ifdef __cplusplus
#define class pcre_class
#endif
#define BRASTACK_SIZE 200
#ifdef L2_UT_PCRE_SUPPORT_UTF8
#define MAXLIT 250
#else
#define MAXLIT 255
#endif
static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
static const short int escapes[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, ':', ';', '<', '=', '>', '?',
'@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W,
0, 0, -ESC_Z, '[', '\\', ']', '^', '_',
'`', 7, -ESC_b, 0, -ESC_d, ESC_E, ESC_F, 0,
0, 0, 0, 0, 0, 0, ESC_N, 0,
0, 0, ESC_R, -ESC_s, ESC_T, 0, 0, -ESC_w,
0, 0, -ESC_z
};
static const char *posix_names[] = {
"alpha", "lower", "upper",
"alnum", "ascii", "cntrl", "digit", "graph",
"print", "punct", "space", "word", "xdigit"
};
static const uschar posix_name_lengths[] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0
};
static const int posix_class_maps[] = {
cbit_lower, cbit_upper, -1,
cbit_lower, -1, -1,
cbit_upper, -1, -1,
cbit_digit, cbit_lower, cbit_upper,
cbit_print, cbit_cntrl, -1,
cbit_cntrl, -1, -1,
cbit_digit, -1, -1,
cbit_graph, -1, -1,
cbit_print, -1, -1,
cbit_punct, -1, -1,
cbit_space, -1, -1,
cbit_word, -1, -1,
cbit_xdigit, -1, -1
};
static BOOL
compile_regex(int, int, int *, uschar **, const uschar **, const char **,
BOOL, int, int *, int *, compile_data *);
typedef struct eptrblock {
struct eptrblock *prev;
const uschar *saved_eptr;
} eptrblock;
#define match_condassert 0x01
#define match_isgroup 0x02
void *(*pcre_malloc) (size_t) = malloc;
void (*pcre_free) (void *) = free;
#ifndef L2_UT_PCRE_SUPPORT_UTF8
#define GETCHARINC(c, eptr) c = *eptr++;
#define GETCHARLEN(c, eptr, len) c = *eptr;
#define BACKCHAR(eptr)
#else
#define GETCHARINC(c, eptr) \
c = *eptr++; \
if (md->utf8 && (c & 0xc0) == 0xc0) \
{ \
int a = utf8_table4[c & 0x3f]; \
int s = 6*a; \
c = (c & utf8_table3[a]) << s; \
while (a-- > 0) \
{ \
s -= 6; \
c |= (*eptr++ & 0x3f) << s; \
} \
}
#define GETCHARLEN(c, eptr, len) \
c = *eptr; \
len = 1; \
if (md->utf8 && (c & 0xc0) == 0xc0) \
{ \
int i; \
int a = utf8_table4[c & 0x3f]; \
int s = 6*a; \
c = (c & utf8_table3[a]) << s; \
for (i = 1; i <= a; i++) \
{ \
s -= 6; \
c |= (eptr[i] & 0x3f) << s; \
} \
len += a; \
}
#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--;
#endif
#include "l2_ut_pcre.tab"
#ifdef L2_UT_PCRE_SUPPORT_UTF8
static int utf8_table1[] =
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff };
static int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc };
static int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
static uschar utf8_table4[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
};
static int ord2utf8(int cvalue, uschar * buffer)
{
register int i, j;
for (i = 0; i < sizeof (utf8_table1) / sizeof (int); i++)
if (cvalue <= utf8_table1[i])
break;
buffer += i;
for (j = i; j > 0; j--) {
*buffer-- = 0x80 | (cvalue & 0x3f);
cvalue >>= 6;
}
*buffer = utf8_table2[i] | cvalue;
return i + 1;
}
#endif
#define STRING(a) # a
#define XSTRING(s) STRING(s)
const char *pcre_version(void)
{
return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
}
int pcre_info(const pcre * external_re, int *optptr, int *first_char)
{
const pcre_st *re = (const pcre_st *)external_re;
if (re == NULL)
return PCRE_ERROR_NULL;
if (re->magic_number != MAGIC_NUMBER)
return PCRE_ERROR_BADMAGIC;
if (optptr != NULL)
*optptr = (int)(re->options & PUBLIC_OPTIONS);
if (first_char != NULL)
*first_char = ((re->options & PCRE_FIRSTSET) != 0) ? re->first_char :
((re->options & PCRE_STARTLINE) != 0) ? -1 : -2;
return re->top_bracket;
}
int
pcre_fullinfo(const pcre * external_re, const pcre_extra * study_data,
int what, void *where)
{
const pcre_st *re = (const pcre_st *)external_re;
const pcre_extra_st *study = (const pcre_extra_st *)study_data;
if (re == NULL || where == NULL)
return PCRE_ERROR_NULL;
if (re->magic_number != MAGIC_NUMBER)
return PCRE_ERROR_BADMAGIC;
switch (what) {
case PCRE_INFO_OPTIONS:
*((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
break;
case PCRE_INFO_SIZE:
*((size_t *) where) = re->size;
break;
case PCRE_INFO_CAPTURECOUNT:
*((int *)where) = re->top_bracket;
break;
case PCRE_INFO_BACKREFMAX:
*((int *)where) = re->top_backref;
break;
case PCRE_INFO_FIRSTCHAR:
*((int *)where) =
((re->options & PCRE_FIRSTSET) != 0) ? re->first_char :
((re->options & PCRE_STARTLINE) != 0) ? -1 : -2;
break;
case PCRE_INFO_FIRSTTABLE:
*((const uschar **)where) =
(study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0) ?
study->start_bits : NULL;
break;
case PCRE_INFO_LASTLITERAL:
*((int *)where) =
((re->options & PCRE_REQCHSET) != 0) ? re->req_char : -1;
break;
default:
return PCRE_ERROR_BADOPTION;
}
return 0;
}
static int
check_escape(const uschar ** ptrptr, const char **errorptr, int bracount,
int options, BOOL isclass, compile_data * cd)
{
const uschar *ptr = *ptrptr;
int c, i;
c = *(++ptr);
if (c == 0)
*errorptr = ERR1;
else if (c < '0' || c > 'z') {
}
else if ((i = escapes[c - '0']) != 0)
c = i;
else {
const uschar *oldptr;
switch (c) {
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (!isclass) {
oldptr = ptr;
c -= '0';
while ((cd->ctypes[ptr[1]] & ctype_digit) != 0)
c = c * 10 + *(++ptr) - '0';
if (c < 10 || c <= bracount) {
c = -(ESC_REF + c);
break;
}
ptr = oldptr;
}
if ((c = *ptr) >= '8') {
ptr--;
c = 0;
break;
}
case '0':
c -= '0';
while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 &&
ptr[1] != '8' && ptr[1] != '9')
c = c * 8 + *(++ptr) - '0';
c &= 255;
break;
case 'x':
#ifdef L2_UT_PCRE_SUPPORT_UTF8
if (ptr[1] == '{' && (options & PCRE_UTF8) != 0) {
const uschar *pt = ptr + 2;
register int count = 0;
c = 0;
while ((cd->ctypes[*pt] & ctype_xdigit) != 0) {
count++;
c = c * 16 + cd->lcc[*pt] -
(((cd->ctypes[*pt] & ctype_digit) !=
0) ? '0' : 'W');
pt++;
}
if (*pt == '}') {
if (c < 0 || count > 8)
*errorptr = ERR34;
ptr = pt;
break;
}
}
#endif
c = 0;
while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0) {
ptr++;
c = c * 16 + cd->lcc[*ptr] -
(((cd->ctypes[*ptr] & ctype_digit) != 0) ? '0' : 'W');
}
break;
case 'c':
c = *(++ptr);
if (c == 0) {
*errorptr = ERR2;
return 0;
}
if (c >= 'a' && c <= 'z')
c = cd->fcc[c];
c ^= 0x40;
break;
default:
if ((options & PCRE_EXTRA) != 0)
switch (c) {
default:
*errorptr = ERR3;
break;
}
break;
}
}
*ptrptr = ptr;
return c;
}
static BOOL is_counted_repeat(const uschar * p, compile_data * cd)
{
if ((cd->ctypes[*p++] & ctype_digit) == 0)
return FALSE;
while ((cd->ctypes[*p] & ctype_digit) != 0)
p++;
if (*p == '}')
return TRUE;
if (*p++ != ',')
return FALSE;
if (*p == '}')
return TRUE;
if ((cd->ctypes[*p++] & ctype_digit) == 0)
return FALSE;
while ((cd->ctypes[*p] & ctype_digit) != 0)
p++;
return (*p == '}');
}
static const uschar *read_repeat_counts(const uschar * p, int *minp,
int *maxp, const char **errorptr,
compile_data * cd)
{
int min = 0;
int max = -1;
while ((cd->ctypes[*p] & ctype_digit) != 0)
min = min * 10 + *p++ - '0';
if (min < 0 || min > 65535) {
*errorptr = ERR5;
return p;
}
if (*p == '}')
max = min;
else {
if (*(++p) != '}') {
max = 0;
while ((cd->ctypes[*p] & ctype_digit) != 0)
max = max * 10 + *p++ - '0';
if (max < 0 || max > 65535) {
*errorptr = ERR5;
return p;
}
if (max < min) {
*errorptr = ERR4;
return p;
}
}
}
if (min > 65535 || max > 65535)
*errorptr = ERR5;
else {
*minp = min;
*maxp = max;
}
return p;
}
static int find_fixedlength(uschar * code, int options)
{
int length = -1;
register int branchlength = 0;
register uschar *cc = code + 3;
for (;;) {
int d;
register int op = *cc;
if (op >= OP_BRA)
op = OP_BRA;
switch (op) {
case OP_BRA:
case OP_ONCE:
case OP_COND:
d = find_fixedlength(cc, options);
if (d < 0)
return -1;
branchlength += d;
do
cc += (cc[1] << 8) + cc[2];
while (*cc == OP_ALT);
cc += 3;
break;
case OP_ALT:
case OP_KET:
case OP_KETRMAX:
case OP_KETRMIN:
case OP_END:
if (length < 0)
length = branchlength;
else if (length != branchlength)
return -1;
if (*cc != OP_ALT)
return length;
cc += 3;
branchlength = 0;
break;
case OP_ASSERT:
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
do
cc += (cc[1] << 8) + cc[2];
while (*cc == OP_ALT);
cc += 3;
break;
case OP_REVERSE:
case OP_BRANUMBER:
case OP_CREF:
cc++;
case OP_OPT:
cc++;
case OP_SOD:
case OP_EOD:
case OP_EODN:
case OP_CIRC:
case OP_DOLL:
case OP_NOT_WORD_BOUNDARY:
case OP_WORD_BOUNDARY:
cc++;
break;
case OP_CHARS:
branchlength += *(++cc);
#ifdef L2_UT_PCRE_SUPPORT_UTF8
for (d = 1; d <= *cc; d++)
if ((cc[d] & 0xc0) == 0x80)
branchlength--;
#endif
cc += *cc + 1;
break;
case OP_EXACT:
case OP_TYPEEXACT:
branchlength += (cc[1] << 8) + cc[2];
cc += 4;
break;
case OP_NOT_DIGIT:
case OP_DIGIT:
case OP_NOT_WHITESPACE:
case OP_WHITESPACE:
case OP_NOT_WORDCHAR:
case OP_WORDCHAR:
case OP_ANY:
branchlength++;
cc++;
break;
case OP_CLASS:
cc += 33;
switch (*cc) {
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
return -1;
case OP_CRRANGE:
case OP_CRMINRANGE:
if ((cc[1] << 8) + cc[2] != (cc[3] << 8) + cc[4])
return -1;
branchlength += (cc[1] << 8) + cc[2];
cc += 5;
break;
default:
branchlength++;
}
break;
default:
return -1;
}
}
}
static BOOL
check_posix_syntax(const uschar * ptr, const uschar ** endptr,
compile_data * cd)
{
int terminator;
terminator = *(++ptr);
if (*(++ptr) == '^')
ptr++;
while ((cd->ctypes[*ptr] & ctype_letter) != 0)
ptr++;
if (*ptr == terminator && ptr[1] == ']') {
*endptr = ptr;
return TRUE;
}
return FALSE;
}
static int check_posix_name(const uschar * ptr, int len)
{
register int yield = 0;
while (posix_name_lengths[yield] != 0) {
if (len == posix_name_lengths[yield] &&
strncmp((const char *)ptr, posix_names[yield], len) == 0)
return yield;
yield++;
}
return -1;
}
static BOOL
compile_branch(int options, int *brackets, uschar ** codeptr,
const uschar ** ptrptr, const char **errorptr, int *optchanged,
int *reqchar, int *countlits, compile_data * cd)
{
int repeat_type, op_type;
int repeat_min, repeat_max;
int bravalue, length;
int greedy_default, greedy_non_default;
int prevreqchar;
int condcount = 0;
int subcountlits = 0;
register int c;
register uschar *code = *codeptr;
uschar *tempcode;
const uschar *ptr = *ptrptr;
const uschar *tempptr;
uschar *previous = NULL;
uschar class[32];
greedy_default = ((options & PCRE_UNGREEDY) != 0);
greedy_non_default = greedy_default ^ 1;
*reqchar = prevreqchar = -1;
*countlits = 0;
for (;; ptr++) {
BOOL negate_class;
int class_charcount;
int class_lastchar;
int newoptions;
int skipbytes;
int subreqchar;
c = *ptr;
if ((options & PCRE_EXTENDED) != 0) {
if ((cd->ctypes[c] & ctype_space) != 0)
continue;
if (c == '#') {
while ((c = *(++ptr)) != 0 && c != '\n') ;
continue;
}
}
switch (c) {
case 0:
case '|':
case ')':
*codeptr = code;
*ptrptr = ptr;
return TRUE;
case '^':
previous = NULL;
*code++ = OP_CIRC;
break;
case '$':
previous = NULL;
*code++ = OP_DOLL;
break;
case '.':
previous = code;
*code++ = OP_ANY;
break;
case '[':
previous = code;
*code++ = OP_CLASS;
if ((c = *(++ptr)) == '^') {
negate_class = TRUE;
c = *(++ptr);
}
else
negate_class = FALSE;
class_charcount = 0;
class_lastchar = -1;
memset(class, 0, 32 * sizeof (uschar));
do {
if (c == 0) {
*errorptr = ERR6;
goto FAILED;
}
if (c == '[' &&
(ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
check_posix_syntax(ptr, &tempptr, cd)) {
BOOL local_negate = FALSE;
int posix_class, i;
register const uschar *cbits = cd->cbits;
if (ptr[1] != ':') {
*errorptr = ERR31;
goto FAILED;
}
ptr += 2;
if (*ptr == '^') {
local_negate = TRUE;
ptr++;
}
posix_class = check_posix_name(ptr, tempptr - ptr);
if (posix_class < 0) {
*errorptr = ERR30;
goto FAILED;
}
if ((options & PCRE_CASELESS) != 0
&& posix_class <= 2)
posix_class = 0;
posix_class *= 3;
for (i = 0; i < 3; i++) {
int taboffset = posix_class_maps[posix_class + i];
if (taboffset < 0)
break;
if (local_negate)
for (c = 0; c < 32; c++)
class[c] |= ~cbits[c + taboffset];
else
for (c = 0; c < 32; c++)
class[c] |= cbits[c + taboffset];
}
ptr = tempptr + 1;
class_charcount = 10;
continue;
}
if (c == '\\') {
c = check_escape(&ptr, errorptr, *brackets, options,
TRUE, cd);
if (-c == ESC_b)
c = '\b';
else if (c < 0) {
register const uschar *cbits = cd->cbits;
class_charcount = 10;
switch (-c) {
case ESC_d:
for (c = 0; c < 32; c++)
class[c] |= cbits[c + cbit_digit];
continue;
case ESC_D:
for (c = 0; c < 32; c++)
class[c] |= ~cbits[c + cbit_digit];
continue;
case ESC_w:
for (c = 0; c < 32; c++)
class[c] |= cbits[c + cbit_word];
continue;
case ESC_W:
for (c = 0; c < 32; c++)
class[c] |= ~cbits[c + cbit_word];
continue;
case ESC_s:
for (c = 0; c < 32; c++)
class[c] |= cbits[c + cbit_space];
continue;
case ESC_S:
for (c = 0; c < 32; c++)
class[c] |= ~cbits[c + cbit_space];
continue;
default:
*errorptr = ERR7;
goto FAILED;
}
}
#ifdef L2_UT_PCRE_SUPPORT_UTF8
if (c > 255) {
*errorptr = ERR33;
goto FAILED;
}
#endif
}
if (ptr[1] == '-' && ptr[2] != ']') {
int d;
ptr += 2;
d = *ptr;
if (d == 0) {
*errorptr = ERR6;
goto FAILED;
}
if (d == '\\') {
const uschar *oldptr = ptr;
d = check_escape(&ptr, errorptr, *brackets,
options, TRUE, cd);
#ifdef L2_UT_PCRE_SUPPORT_UTF8
if (d > 255) {
*errorptr = ERR33;
goto FAILED;
}
#endif
if (d < 0) {
if (d == -ESC_b)
d = '\b';
else {
ptr = oldptr - 2;
goto SINGLE_CHARACTER;
}
}
}
if (d < c) {
*errorptr = ERR8;
goto FAILED;
}
for (; c <= d; c++) {
class[c / 8] |= (1 << (c & 7));
if ((options & PCRE_CASELESS) != 0) {
int uc = cd->fcc[c];
class[uc / 8] |= (1 << (uc & 7));
}
class_charcount++;
class_lastchar = c;
}
continue;
}
SINGLE_CHARACTER:
class[c / 8] |= (1 << (c & 7));
if ((options & PCRE_CASELESS) != 0) {
c = cd->fcc[c];
class[c / 8] |= (1 << (c & 7));
}
class_charcount++;
class_lastchar = c;
}
while ((c = *(++ptr)) != ']');
if (class_charcount == 1 && class_lastchar >= 0) {
if (negate_class) {
code[-1] = OP_NOT;
}
else {
code[-1] = OP_CHARS;
*code++ = 1;
}
*code++ = class_lastchar;
}
else {
if (negate_class)
for (c = 0; c < 32; c++)
code[c] = ~class[c];
else
memcpy(code, class, 32);
code += 32;
}
break;
case '{':
if (!is_counted_repeat(ptr + 1, cd))
goto NORMAL_CHAR;
ptr =
read_repeat_counts(ptr + 1, &repeat_min, &repeat_max,
errorptr, cd);
if (*errorptr != NULL)
goto FAILED;
goto REPEAT;
case '*':
repeat_min = 0;
repeat_max = -1;
goto REPEAT;
case '+':
repeat_min = 1;
repeat_max = -1;
goto REPEAT;
case '?':
repeat_min = 0;
repeat_max = 1;
REPEAT:
if (previous == NULL) {
*errorptr = ERR9;
goto FAILED;
}
if (ptr[1] == '?') {
repeat_type = greedy_non_default;
ptr++;
}
else
repeat_type = greedy_default;
if (*previous == OP_CHARS) {
int len = previous[1];
if (repeat_min == 0)
*reqchar = prevreqchar;
*countlits += repeat_min - 1;
if (len == 1) {
c = previous[2];
code = previous;
}
else {
c = previous[len + 1];
previous[1]--;
code--;
}
op_type = 0;
goto OUTPUT_SINGLE_REPEAT;
}
else if ((int)*previous == OP_NOT) {
op_type = OP_NOTSTAR - OP_STAR;
c = previous[1];
code = previous;
goto OUTPUT_SINGLE_REPEAT;
}
else if ((int)*previous < OP_EODN || *previous == OP_ANY) {
op_type = OP_TYPESTAR - OP_STAR;
c = *previous;
code = previous;
OUTPUT_SINGLE_REPEAT:
if (repeat_max == 0)
goto END_REPEAT;
repeat_type += op_type;
if (repeat_min == 0) {
if (repeat_max == -1)
*code++ = OP_STAR + repeat_type;
else if (repeat_max == 1)
*code++ = OP_QUERY + repeat_type;
else {
*code++ = OP_UPTO + repeat_type;
*code++ = repeat_max >> 8;
*code++ = (repeat_max & 255);
}
}
else if (repeat_min == 1 && repeat_max == -1)
*code++ = OP_PLUS + repeat_type;
else {
if (repeat_min != 1) {
*code++ = OP_EXACT + op_type;
*code++ = repeat_min >> 8;
*code++ = (repeat_min & 255);
}
else if (*previous == OP_CHARS) {
if (code == previous)
code += 2;
else
previous[1]++;
}
else if (*previous == OP_NOT)
code++;
if (repeat_max < 0) {
*code++ = c;
*code++ = OP_STAR + repeat_type;
}
else if (repeat_max != repeat_min) {
*code++ = c;
repeat_max -= repeat_min;
*code++ = OP_UPTO + repeat_type;
*code++ = repeat_max >> 8;
*code++ = (repeat_max & 255);
}
}
*code++ = c;
}
else if (*previous == OP_CLASS || *previous == OP_REF) {
if (repeat_max == 0) {
code = previous;
goto END_REPEAT;
}
if (repeat_min == 0 && repeat_max == -1)
*code++ = OP_CRSTAR + repeat_type;
else if (repeat_min == 1 && repeat_max == -1)
*code++ = OP_CRPLUS + repeat_type;
else if (repeat_min == 0 && repeat_max == 1)
*code++ = OP_CRQUERY + repeat_type;
else {
*code++ = OP_CRRANGE + repeat_type;
*code++ = repeat_min >> 8;
*code++ = repeat_min & 255;
if (repeat_max == -1)
repeat_max = 0;
*code++ = repeat_max >> 8;
*code++ = repeat_max & 255;
}
}
else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE
|| (int)*previous == OP_COND) {
register int i;
int ketoffset = 0;
int len = code - previous;
uschar *bralink = NULL;
if (repeat_max == -1) {
register uschar *ket = previous;
do
ket += (ket[1] << 8) + ket[2];
while (*ket != OP_KET);
ketoffset = code - ket;
}
if (repeat_min == 0) {
if (subcountlits > 0) {
*reqchar = prevreqchar;
*countlits -= subcountlits;
}
if (repeat_max == 0) {
code = previous;
goto END_REPEAT;
}
if (repeat_max <= 1) {
memmove(previous + 1, previous, len);
code++;
*previous++ = OP_BRAZERO + repeat_type;
}
else {
int offset;
memmove(previous + 4, previous, len);
code += 4;
*previous++ = OP_BRAZERO + repeat_type;
*previous++ = OP_BRA;
offset =
(bralink == NULL) ? 0 : previous - bralink;
bralink = previous;
*previous++ = offset >> 8;
*previous++ = offset & 255;
}
repeat_max--;
}
else {
for (i = 1; i < repeat_min; i++) {
memcpy(code, previous, len);
code += len;
}
if (repeat_max > 0)
repeat_max -= repeat_min;
}
if (repeat_max >= 0) {
for (i = repeat_max - 1; i >= 0; i--) {
*code++ = OP_BRAZERO + repeat_type;
if (i != 0) {
int offset;
*code++ = OP_BRA;
offset =
(bralink == NULL) ? 0 : code - bralink;
bralink = code;
*code++ = offset >> 8;
*code++ = offset & 255;
}
memcpy(code, previous, len);
code += len;
}
while (bralink != NULL) {
int oldlinkoffset;
int offset = code - bralink + 1;
uschar *bra = code - offset;
oldlinkoffset = (bra[1] << 8) + bra[2];
bralink =
(oldlinkoffset ==
0) ? NULL : bralink - oldlinkoffset;
*code++ = OP_KET;
*code++ = bra[1] = offset >> 8;
*code++ = bra[2] = (offset & 255);
}
}
else
code[-ketoffset] = OP_KETRMAX + repeat_type;
}
else {
*errorptr = ERR11;
goto FAILED;
}
END_REPEAT:
previous = NULL;
break;
case '(':
newoptions = options;
skipbytes = 0;
if (*(++ptr) == '?') {
int set, unset;
int *optset;
switch (*(++ptr)) {
case '#':
ptr++;
while (*ptr != ')')
ptr++;
continue;
case ':':
bravalue = OP_BRA;
ptr++;
break;
case '(':
bravalue = OP_COND;
if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0) {
int condref = *ptr - '0';
while (*(++ptr) != ')')
condref = condref * 10 + *ptr - '0';
if (condref == 0) {
*errorptr = ERR35;
goto FAILED;
}
ptr++;
code[3] = OP_CREF;
code[4] = condref >> 8;
code[5] = condref & 255;
skipbytes = 3;
}
else
ptr--;
break;
case '=':
bravalue = OP_ASSERT;
ptr++;
break;
case '!':
bravalue = OP_ASSERT_NOT;
ptr++;
break;
case '<':
switch (*(++ptr)) {
case '=':
bravalue = OP_ASSERTBACK;
ptr++;
break;
case '!':
bravalue = OP_ASSERTBACK_NOT;
ptr++;
break;
default:
*errorptr = ERR24;
goto FAILED;
}
break;
case '>':
bravalue = OP_ONCE;
ptr++;
break;
case 'R':
*code++ = OP_RECURSE;
ptr++;
continue;
default:
set = unset = 0;
optset = &set;
while (*ptr != ')' && *ptr != ':') {
switch (*ptr++) {
case '-':
optset = &unset;
break;
case 'i':
*optset |= PCRE_CASELESS;
break;
case 'm':
*optset |= PCRE_MULTILINE;
break;
case 's':
*optset |= PCRE_DOTALL;
break;
case 'x':
*optset |= PCRE_EXTENDED;
break;
case 'U':
*optset |= PCRE_UNGREEDY;
break;
case 'X':
*optset |= PCRE_EXTRA;
break;
default:
*errorptr = ERR12;
goto FAILED;
}
}
newoptions = (options | set) & (~unset);
if (*ptr == ')') {
if ((options & PCRE_INGROUP) != 0 &&
(options & PCRE_IMS) !=
(newoptions & PCRE_IMS)) {
*code++ = OP_OPT;
*code++ = *optchanged =
newoptions & PCRE_IMS;
}
options = newoptions;
previous = NULL;
continue;
}
bravalue = OP_BRA;
ptr++;
}
}
else {
if (++(*brackets) > EXTRACT_BASIC_MAX) {
bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1;
code[3] = OP_BRANUMBER;
code[4] = *brackets >> 8;
code[5] = *brackets & 255;
skipbytes = 3;
}
else
bravalue = OP_BRA + *brackets;
}
previous = (bravalue >= OP_ONCE) ? code : NULL;
*code = bravalue;
tempcode = code;
if (!compile_regex(options | PCRE_INGROUP,
((options & PCRE_IMS) !=
(newoptions & PCRE_IMS)) ? newoptions &
PCRE_IMS : -1, brackets, &tempcode, &ptr,
errorptr, (bravalue == OP_ASSERTBACK
|| bravalue ==
OP_ASSERTBACK_NOT), skipbytes,
&subreqchar, &subcountlits, cd))
goto FAILED;
else if (bravalue == OP_COND) {
uschar *tc = code;
condcount = 0;
do {
condcount++;
tc += (tc[1] << 8) | tc[2];
}
while (*tc != OP_KET);
if (condcount > 2) {
*errorptr = ERR27;
goto FAILED;
}
}
if (subreqchar > 0 &&
(bravalue >= OP_BRA || bravalue == OP_ONCE
|| bravalue == OP_ASSERT || (bravalue == OP_COND
&& condcount == 2))) {
prevreqchar = *reqchar;
*reqchar = subreqchar;
if (bravalue != OP_ASSERT)
*countlits += subcountlits;
}
code = tempcode;
if (*ptr != ')') {
*errorptr = ERR14;
goto FAILED;
}
break;
case '\\':
tempptr = ptr;
c = check_escape(&ptr, errorptr, *brackets, options, FALSE,
cd);
if (c < 0) {
if (-c >= ESC_REF) {
int number = -c - ESC_REF;
previous = code;
*code++ = OP_REF;
*code++ = number >> 8;
*code++ = number & 255;
}
else {
previous = (-c > ESC_b && -c < ESC_Z) ? code : NULL;
*code++ = -c;
}
continue;
}
ptr = tempptr;
c = '\\';
NORMAL_CHAR:
default:
previous = code;
*code = OP_CHARS;
code += 2;
length = 0;
do {
if ((options & PCRE_EXTENDED) != 0) {
if ((cd->ctypes[c] & ctype_space) != 0)
continue;
if (c == '#') {
while ((c = *(++ptr)) != 0 && c != '\n') ;
if (c == 0)
break;
continue;
}
}
if (c == '\\') {
tempptr = ptr;
c = check_escape(&ptr, errorptr, *brackets, options,
FALSE, cd);
if (c < 0) {
ptr = tempptr;
break;
}
#ifdef L2_UT_PCRE_SUPPORT_UTF8
if (c > 127 && (options & PCRE_UTF8) != 0) {
uschar buffer[8];
int len = ord2utf8(c, buffer);
for (c = 0; c < len; c++)
*code++ = buffer[c];
length += len;
continue;
}
#endif
}
*code++ = c;
length++;
}
while (length < MAXLIT
&& (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
prevreqchar = (length > 1) ? code[-2] : *reqchar;
*reqchar = code[-1];
*countlits += length;
previous[1] = length;
if (length < MAXLIT)
ptr--;
break;
}
}
FAILED:
*ptrptr = ptr;
return FALSE;
}
static BOOL
compile_regex(int options, int optchanged, int *brackets, uschar ** codeptr,
const uschar ** ptrptr, const char **errorptr, BOOL lookbehind,
int skipbytes, int *reqchar, int *countlits, compile_data * cd)
{
const uschar *ptr = *ptrptr;
uschar *code = *codeptr;
uschar *last_branch = code;
uschar *start_bracket = code;
uschar *reverse_count = NULL;
int oldoptions = options & PCRE_IMS;
int branchreqchar, branchcountlits;
*reqchar = -1;
*countlits = INT_MAX;
code += 3 + skipbytes;
for (;;) {
int length;
if (optchanged >= 0) {
*code++ = OP_OPT;
*code++ = optchanged;
options = (options & ~PCRE_IMS) | optchanged;
}
if (lookbehind) {
*code++ = OP_REVERSE;
reverse_count = code;
*code++ = 0;
*code++ = 0;
}
if (!compile_branch
(options, brackets, &code, &ptr, errorptr, &optchanged,
&branchreqchar, &branchcountlits, cd)) {
*ptrptr = ptr;
return FALSE;
}
length = code - last_branch;
last_branch[1] = length >> 8;
last_branch[2] = length & 255;
if (*reqchar != -2) {
if (branchreqchar >= 0) {
if (*reqchar == -1)
*reqchar = branchreqchar;
else if (*reqchar != branchreqchar)
*reqchar = -2;
}
else
*reqchar = -2;
}
if (branchcountlits < *countlits)
*countlits = branchcountlits;
if (lookbehind) {
*code = OP_END;
length = find_fixedlength(last_branch, options);
if (length < 0) {
*errorptr = ERR25;
*ptrptr = ptr;
return FALSE;
}
reverse_count[0] = (length >> 8);
reverse_count[1] = length & 255;
}
if (*ptr != '|') {
length = code - start_bracket;
*code++ = OP_KET;
*code++ = length >> 8;
*code++ = length & 255;
if (optchanged >= 0) {
*code++ = OP_OPT;
*code++ = oldoptions;
}
*codeptr = code;
*ptrptr = ptr;
return TRUE;
}
*code = OP_ALT;
last_branch = code;
code += 3;
ptr++;
}
}
static const uschar *first_significant_code(const uschar * code, int *options,
int optbit, BOOL optstop)
{
for (;;) {
switch ((int)*code) {
case OP_OPT:
if (optbit > 0
&& ((int)code[1] & optbit) != (*options & optbit)) {
if (optstop)
return code;
*options = (int)code[1];
}
code += 2;
break;
case OP_CREF:
case OP_BRANUMBER:
code += 3;
break;
case OP_WORD_BOUNDARY:
case OP_NOT_WORD_BOUNDARY:
code++;
break;
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
do
code += (code[1] << 8) + code[2];
while (*code == OP_ALT);
code += 3;
break;
default:
return code;
}
}
}
static BOOL is_anchored(register const uschar * code, int *options)
{
do {
const uschar *scode = first_significant_code(code + 3, options,
PCRE_MULTILINE, FALSE);
register int op = *scode;
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) {
if (!is_anchored(scode, options))
return FALSE;
}
else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&
(*options & PCRE_DOTALL) != 0) {
if (scode[1] != OP_ANY)
return FALSE;
}
else if (op != OP_SOD &&
((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
return FALSE;
code += (code[1] << 8) + code[2];
}
while (*code == OP_ALT);
return TRUE;
}
static BOOL is_startline(const uschar * code)
{
do {
const uschar *scode =
first_significant_code(code + 3, NULL, 0, FALSE);
register int op = *scode;
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) {
if (!is_startline(scode))
return FALSE;
}
else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR) {
if (scode[1] != OP_ANY)
return FALSE;
}
else if (op != OP_CIRC)
return FALSE;
code += (code[1] << 8) + code[2];
}
while (*code == OP_ALT);
return TRUE;
}
static int find_firstchar(const uschar * code, int *options)
{
register int c = -1;
do {
int d;
const uschar *scode = first_significant_code(code + 3, options,
PCRE_CASELESS, TRUE);
register int op = *scode;
if (op >= OP_BRA)
op = OP_BRA;
switch (op) {
default:
return -1;
case OP_BRA:
case OP_ASSERT:
case OP_ONCE:
case OP_COND:
if ((d = find_firstchar(scode, options)) < 0)
return -1;
if (c < 0)
c = d;
else if (c != d)
return -1;
break;
case OP_EXACT:
scode++;
case OP_CHARS:
scode++;
case OP_PLUS:
case OP_MINPLUS:
if (c < 0)
c = scode[1];
else if (c != scode[1])
return -1;
break;
}
code += (code[1] << 8) + code[2];
}
while (*code == OP_ALT);
return c;
}
pcre *pcre_compile(const char *pattern, int options, const char **errorptr,
int *erroroffset, const unsigned char *tables)
{
pcre_st *re;
int length = 3;
int runlength;
int c, reqchar, countlits;
int bracount = 0;
int top_backref = 0;
int branch_extra = 0;
int branch_newextra;
unsigned int brastackptr = 0;
size_t size;
uschar *code;
const uschar *ptr;
compile_data compile_block;
int brastack[BRASTACK_SIZE];
uschar bralenstack[BRASTACK_SIZE];
#ifndef L2_UT_PCRE_SUPPORT_UTF8
if ((options & PCRE_UTF8) != 0) {
*errorptr = ERR32;
return NULL;
}
#endif
if (errorptr == NULL)
return NULL;
*errorptr = NULL;
if (erroroffset == NULL) {
*errorptr = ERR16;
return NULL;
}
*erroroffset = 0;
if ((options & ~PUBLIC_OPTIONS) != 0) {
*errorptr = ERR17;
return NULL;
}
if (tables == NULL)
tables = pcre_default_tables;
compile_block.lcc = tables + lcc_offset;
compile_block.fcc = tables + fcc_offset;
compile_block.cbits = tables + cbits_offset;
compile_block.ctypes = tables + ctypes_offset;
ptr = (const uschar *)(pattern - 1);
while ((c = *(++ptr)) != 0) {
int min, max;
int class_charcount;
int bracket_length;
if ((options & PCRE_EXTENDED) != 0) {
if ((compile_block.ctypes[c] & ctype_space) != 0)
continue;
if (c == '#') {
while ((c = *(++ptr)) != 0 && c != '\n') ;
continue;
}
}
switch (c) {
case '\\':
{
const uschar *save_ptr = ptr;
c = check_escape(&ptr, errorptr, bracount, options, FALSE,
&compile_block);
if (*errorptr != NULL)
goto PCRE_ERROR_RETURN;
if (c >= 0) {
ptr = save_ptr;
c = '\\';
goto NORMAL_CHAR;
}
}
length++;
if (c <= -ESC_REF) {
int refnum = -c - ESC_REF;
if (refnum > top_backref)
top_backref = refnum;
length += 2;
if (ptr[1] == '{'
&& is_counted_repeat(ptr + 2, &compile_block)) {
ptr =
read_repeat_counts(ptr + 2, &min, &max, errorptr,
&compile_block);
if (*errorptr != NULL)
goto PCRE_ERROR_RETURN;
if ((min == 0 && (max == 1 || max == -1)) ||
(min == 1 && max == -1))
length++;
else
length += 5;
if (ptr[1] == '?')
ptr++;
}
}
continue;
case '^':
case '.':
case '$':
case '*':
case '+':
case '?':
length++;
continue;
case '{':
if (!is_counted_repeat(ptr + 1, &compile_block))
goto NORMAL_CHAR;
ptr =
read_repeat_counts(ptr + 1, &min, &max, errorptr,
&compile_block);
if (*errorptr != NULL)
goto PCRE_ERROR_RETURN;
if ((min == 0 && (max == 1 || max == -1)) ||
(min == 1 && max == -1))
length++;
else {
length--;
if (min == 1)
length++;
else if (min > 0)
length += 4;
if (max > 0)
length += 4;
else
length += 2;
}
if (ptr[1] == '?')
ptr++;
continue;
case '|':
length += 3 + branch_extra;
continue;
case '[':
class_charcount = 0;
if (*(++ptr) == '^')
ptr++;
do {
if (*ptr == '\\') {
int ch =
check_escape(&ptr, errorptr, bracount, options,
TRUE,
&compile_block);
if (*errorptr != NULL)
goto PCRE_ERROR_RETURN;
if (-ch == ESC_b)
class_charcount++;
else
class_charcount = 10;
}
else
class_charcount++;
ptr++;
}
while (*ptr != 0 && *ptr != ']');
if (class_charcount == 1)
length += 3;
else {
length += 33;
if (*ptr != 0 && ptr[1] == '{'
&& is_counted_repeat(ptr + 2, &compile_block)) {
ptr =
read_repeat_counts(ptr + 2, &min, &max, errorptr,
&compile_block);
if (*errorptr != NULL)
goto PCRE_ERROR_RETURN;
if ((min == 0 && (max == 1 || max == -1)) ||
(min == 1 && max == -1))
length++;
else
length += 5;
if (ptr[1] == '?')
ptr++;
}
}
continue;
case '(':
branch_newextra = 0;
bracket_length = 3;
if (ptr[1] == '?') {
int set, unset;
int *optset;
switch (c = ptr[2]) {
case '#':
ptr += 3;
while (*ptr != 0 && *ptr != ')')
ptr++;
if (*ptr == 0) {
*errorptr = ERR18;
goto PCRE_ERROR_RETURN;
}
continue;
case ':':
case '=':
case '!':
case '>':
ptr += 2;
break;
case 'R':
if (ptr[3] != ')') {
*errorptr = ERR29;
goto PCRE_ERROR_RETURN;
}
ptr += 3;
length += 1;
break;
case '<':
if (ptr[3] == '=' || ptr[3] == '!') {
ptr += 3;
branch_newextra = 3;
length += 3;
break;
}
*errorptr = ERR24;
goto PCRE_ERROR_RETURN;
case '(':
if ((compile_block.
ctypes[ptr[3]] & ctype_digit) != 0) {
ptr += 4;
length += 3;
while ((compile_block.
ctypes[*ptr] & ctype_digit) != 0)
ptr++;
if (*ptr != ')') {
*errorptr = ERR26;
goto PCRE_ERROR_RETURN;
}
}
else {
ptr++;
if (ptr[2] != '?' ||
(ptr[3] != '=' && ptr[3] != '!'
&& ptr[3] != '<')) {
ptr += 2;
*errorptr = ERR28;
goto PCRE_ERROR_RETURN;
}
}
break;
default:
set = unset = 0;
optset = &set;
ptr += 2;
for (;; ptr++) {
c = *ptr;
switch (c) {
case 'i':
*optset |= PCRE_CASELESS;
continue;
case 'm':
*optset |= PCRE_MULTILINE;
continue;
case 's':
*optset |= PCRE_DOTALL;
continue;
case 'x':
*optset |= PCRE_EXTENDED;
continue;
case 'X':
*optset |= PCRE_EXTRA;
continue;
case 'U':
*optset |= PCRE_UNGREEDY;
continue;
case '-':
optset = &unset;
continue;
case ')':
if (brastackptr == 0) {
options =
(options | set) & (~unset);
set = unset = 0;
}
case ':':
if (((set | unset) & PCRE_IMS) != 0) {
length += 4;
branch_newextra = 2;
if (((set | unset) &
PCRE_CASELESS) != 0)
options |= PCRE_ICHANGED;
}
goto END_OPTIONS;
default:
*errorptr = ERR12;
goto PCRE_ERROR_RETURN;
}
}
END_OPTIONS:
if (c == ')') {
if (branch_newextra == 2
&& (branch_extra == 0
|| branch_extra == 3))
branch_extra += branch_newextra;
continue;
}
}
}
else {
bracount++;
if (bracount > EXTRACT_BASIC_MAX)
bracket_length += 3;
}
if (brastackptr >= sizeof (brastack) / sizeof (int)) {
*errorptr = ERR19;
goto PCRE_ERROR_RETURN;
}
bralenstack[brastackptr] = branch_extra;
branch_extra = branch_newextra;
brastack[brastackptr++] = length;
length += bracket_length;
continue;
case ')':
length += 3;
{
int minval = 1;
int maxval = 1;
int duplength;
if (brastackptr > 0) {
duplength = length - brastack[--brastackptr];
branch_extra = bralenstack[brastackptr];
}
else
duplength = 0;
if ((c = ptr[1]) == '{'
&& is_counted_repeat(ptr + 2, &compile_block)) {
ptr =
read_repeat_counts(ptr + 2, &minval, &maxval,
errorptr, &compile_block);
if (*errorptr != NULL)
goto PCRE_ERROR_RETURN;
}
else if (c == '*') {
minval = 0;
maxval = -1;
ptr++;
}
else if (c == '+') {
maxval = -1;
ptr++;
}
else if (c == '?') {
minval = 0;
ptr++;
}
if (minval == 0) {
length++;
if (maxval > 0)
length += (maxval - 1) * (duplength + 7);
}
else {
length += (minval - 1) * duplength;
if (maxval > minval)
length += (maxval - minval) * (duplength + 7) - 6;
}
}
continue;
NORMAL_CHAR:
default:
length += 2;
runlength = 0;
do {
if ((options & PCRE_EXTENDED) != 0) {
if ((compile_block.ctypes[c] & ctype_space) != 0)
continue;
if (c == '#') {
while ((c = *(++ptr)) != 0 && c != '\n') ;
continue;
}
}
if (c == '\\') {
const uschar *saveptr = ptr;
c = check_escape(&ptr, errorptr, bracount, options,
FALSE, &compile_block);
if (*errorptr != NULL)
goto PCRE_ERROR_RETURN;
if (c < 0) {
ptr = saveptr;
break;
}
#ifdef L2_UT_PCRE_SUPPORT_UTF8
if (c > 127 && (options & PCRE_UTF8) != 0) {
int i;
for (i = 0;
i < sizeof (utf8_table1) / sizeof (int); i++)
if (c <= utf8_table1[i])
break;
runlength += i;
}
#endif
}
runlength++;
}
while (runlength < MAXLIT &&
(compile_block.ctypes[c = *(++ptr)] & ctype_meta) ==
0);
ptr--;
length += runlength;
continue;
}
}
length += 4;
if (length > 65539) {
*errorptr = ERR20;
return NULL;
}
size = length + offsetof(pcre_st, code[0]);
re = (pcre_st *) (pcre_malloc) (size);
if (re == NULL) {
*errorptr = ERR21;
return NULL;
}
re->magic_number = MAGIC_NUMBER;
re->size = size;
re->options = options;
re->tables = tables;
ptr = (const uschar *)pattern;
code = re->code;
*code = OP_BRA;
bracount = 0;
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE,
0, &reqchar, &countlits, &compile_block);
re->top_bracket = bracount;
re->top_backref = top_backref;
if (*errorptr == NULL && *ptr != 0)
*errorptr = ERR22;
*code++ = OP_END;
if (code - re->code > length)
*errorptr = ERR23;
if (top_backref > re->top_bracket)
*errorptr = ERR15;
if (*errorptr != NULL) {
(pcre_free) (re);
PCRE_ERROR_RETURN:
*erroroffset = ptr - (const uschar *)pattern;
return NULL;
}
if ((options & PCRE_ANCHORED) == 0) {
int temp_options = options;
if (is_anchored(re->code, &temp_options))
re->options |= PCRE_ANCHORED;
else {
int ch = find_firstchar(re->code, &temp_options);
if (ch >= 0) {
re->first_char = ch;
re->options |= PCRE_FIRSTSET;
}
else if (is_startline(re->code))
re->options |= PCRE_STARTLINE;
}
}
if (reqchar >= 0 && (countlits > 1 || (re->options & PCRE_FIRSTSET) == 0)) {
re->req_char = reqchar;
re->options |= PCRE_REQCHSET;
}
return (pcre *) re;
}
static BOOL
match_ref(int offset, register const uschar * eptr, int length,
match_data * md, unsigned long int ims)
{
const uschar *p = md->start_subject + md->offset_vector[offset];
if (length > md->end_subject - eptr)
return FALSE;
if ((ims & PCRE_CASELESS) != 0) {
while (length-- > 0)
if (md->lcc[*p++] != md->lcc[*eptr++])
return FALSE;
}
else {
while (length-- > 0)
if (*p++ != *eptr++)
return FALSE;
}
return TRUE;
}
static BOOL
match(register const uschar * eptr, register const uschar * ecode,
int offset_top, match_data * md, unsigned long int ims,
eptrblock * eptrb, int flags)
{
unsigned long int original_ims = ims;
eptrblock newptrb;
if ((flags & match_isgroup) != 0) {
newptrb.prev = eptrb;
newptrb.saved_eptr = eptr;
eptrb = &newptrb;
}
for (;;) {
int op = (int)*ecode;
int min, max, ctype;
register int i;
register int c;
BOOL minimize = FALSE;
if (op > OP_BRA) {
int offset;
int number = op - OP_BRA;
if (number > EXTRACT_BASIC_MAX)
number = (ecode[4] << 8) | ecode[5];
offset = number << 1;
if (offset < md->offset_max) {
int save_offset1 = md->offset_vector[offset];
int save_offset2 = md->offset_vector[offset + 1];
int save_offset3 = md->offset_vector[md->offset_end - number];
md->offset_vector[md->offset_end - number] =
eptr - md->start_subject;
do {
if (match
(eptr, ecode + 3, offset_top, md, ims, eptrb,
match_isgroup))
return TRUE;
ecode += (ecode[1] << 8) + ecode[2];
}
while (*ecode == OP_ALT);
md->offset_vector[offset] = save_offset1;
md->offset_vector[offset + 1] = save_offset2;
md->offset_vector[md->offset_end - number] = save_offset3;
return FALSE;
}
else
op = OP_BRA;
}
switch (op) {
case OP_BRA:
do {
if (match
(eptr, ecode + 3, offset_top, md, ims, eptrb,
match_isgroup))
return TRUE;
ecode += (ecode[1] << 8) + ecode[2];
}
while (*ecode == OP_ALT);
return FALSE;
case OP_COND:
if (ecode[3] == OP_CREF) {
int offset = (ecode[4] << 9) | (ecode[5] << 1);
return match(eptr,
ecode +
((offset < offset_top
&& md->offset_vector[offset] >=
0) ? 6 : 3 + (ecode[1] << 8) + ecode[2]),
offset_top, md, ims, eptrb, match_isgroup);
}
else {
if (match(eptr, ecode + 3, offset_top, md, ims, NULL,
match_condassert | match_isgroup)) {
ecode += 3 + (ecode[4] << 8) + ecode[5];
while (*ecode == OP_ALT)
ecode += (ecode[1] << 8) + ecode[2];
}
else
ecode += (ecode[1] << 8) + ecode[2];
return match(eptr, ecode + 3, offset_top, md, ims, eptrb,
match_isgroup);
}
case OP_CREF:
case OP_BRANUMBER:
ecode += 3;
break;
case OP_END:
if (md->notempty && eptr == md->start_match)
return FALSE;
md->end_match_ptr = eptr;
md->end_offset_top = offset_top;
return TRUE;
case OP_OPT:
ims = ecode[1];
ecode += 2;
break;
case OP_ASSERT:
case OP_ASSERTBACK:
do {
if (match
(eptr, ecode + 3, offset_top, md, ims, NULL,
match_isgroup))
break;
ecode += (ecode[1] << 8) + ecode[2];
}
while (*ecode == OP_ALT);
if (*ecode == OP_KET)
return FALSE;
if ((flags & match_condassert) != 0)
return TRUE;
do
ecode += (ecode[1] << 8) + ecode[2];
while (*ecode == OP_ALT);
ecode += 3;
offset_top = md->end_offset_top;
continue;
case OP_ASSERT_NOT:
case OP_ASSERTBACK_NOT:
do {
if (match
(eptr, ecode + 3, offset_top, md, ims, NULL,
match_isgroup))
return FALSE;
ecode += (ecode[1] << 8) + ecode[2];
}
while (*ecode == OP_ALT);
if ((flags & match_condassert) != 0)
return TRUE;
ecode += 3;
continue;
case OP_REVERSE:
#ifdef L2_UT_PCRE_SUPPORT_UTF8
c = (ecode[1] << 8) + ecode[2];
for (i = 0; i < c; i++) {
eptr--;
BACKCHAR(eptr)
}
#else
eptr -= (ecode[1] << 8) + ecode[2];
#endif
if (eptr < md->start_subject)
return FALSE;
ecode += 3;
break;
case OP_RECURSE:
{
BOOL rc;
int *save;
int stacksave[15];
c = md->offset_max;
if (c < 16)
save = stacksave;
else {
save = (int *)(pcre_malloc) ((c + 1) * sizeof (int));
if (save == NULL) {
save = stacksave;
c = 15;
}
}
for (i = 1; i <= c; i++)
save[i] = md->offset_vector[md->offset_end - i];
rc = match(eptr, md->start_pattern, offset_top, md, ims,
eptrb, match_isgroup);
for (i = 1; i <= c; i++)
md->offset_vector[md->offset_end - i] = save[i];
if (save != stacksave)
(pcre_free) (save);
if (!rc)
return FALSE;
offset_top = md->end_offset_top;
eptr = md->end_match_ptr;
ecode++;
}
break;
case OP_ONCE:
{
const uschar *prev = ecode;
const uschar *saved_eptr = eptr;
do {
if (match
(eptr, ecode + 3, offset_top, md, ims, eptrb,
match_isgroup))
break;
ecode += (ecode[1] << 8) + ecode[2];
}
while (*ecode == OP_ALT);
if (*ecode != OP_ONCE && *ecode != OP_ALT)
return FALSE;
do
ecode += (ecode[1] << 8) + ecode[2];
while (*ecode == OP_ALT);
offset_top = md->end_offset_top;
eptr = md->end_match_ptr;
if (*ecode == OP_KET || eptr == saved_eptr) {
ecode += 3;
break;
}
if (ecode[3] == OP_OPT) {
ims = (ims & ~PCRE_IMS) | ecode[4];
}
if (*ecode == OP_KETRMIN) {
if (match
(eptr, ecode + 3, offset_top, md, ims, eptrb, 0)
|| match(eptr, prev, offset_top, md, ims, eptrb,
match_isgroup))
return TRUE;
}
else {
if (match
(eptr, prev, offset_top, md, ims, eptrb,
match_isgroup)
|| match(eptr, ecode + 3, offset_top, md, ims,
eptrb, 0))
return TRUE;
}
}
return FALSE;
case OP_ALT:
do
ecode += (ecode[1] << 8) + ecode[2];
while (*ecode == OP_ALT);
break;
case OP_BRAZERO:
{
const uschar *next = ecode + 1;
if (match
(eptr, next, offset_top, md, ims, eptrb,
match_isgroup))
return TRUE;
do
next += (next[1] << 8) + next[2];
while (*next == OP_ALT);
ecode = next + 3;
}
break;
case OP_BRAMINZERO:
{
const uschar *next = ecode + 1;
do
next += (next[1] << 8) + next[2];
while (*next == OP_ALT);
if (match
(eptr, next + 3, offset_top, md, ims, eptrb,
match_isgroup))
return TRUE;
ecode++;
}
break;
case OP_KET:
case OP_KETRMIN:
case OP_KETRMAX:
{
const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
const uschar *saved_eptr = eptrb->saved_eptr;
eptrb = eptrb->prev;
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT
|| *prev == OP_ONCE) {
md->end_match_ptr = eptr;
md->end_offset_top = offset_top;
return TRUE;
}
if (*prev != OP_COND) {
int offset;
int number = *prev - OP_BRA;
if (number > EXTRACT_BASIC_MAX)
number = (prev[4] << 8) | prev[5];
offset = number << 1;
if (number > 0) {
if (offset >= md->offset_max)
md->offset_overflow = TRUE;
else {
md->offset_vector[offset] =
md->offset_vector[md->offset_end -
number];
md->offset_vector[offset + 1] =
eptr - md->start_subject;
if (offset_top <= offset)
offset_top = offset + 2;
}
}
}
ims = original_ims;
if (*ecode == OP_KET || eptr == saved_eptr) {
ecode += 3;
break;
}
if (*ecode == OP_KETRMIN) {
if (match
(eptr, ecode + 3, offset_top, md, ims, eptrb, 0)
|| match(eptr, prev, offset_top, md, ims, eptrb,
match_isgroup))
return TRUE;
}
else {
if (match
(eptr, prev, offset_top, md, ims, eptrb,
match_isgroup)
|| match(eptr, ecode + 3, offset_top, md, ims,
eptrb, 0))
return TRUE;
}
}
return FALSE;
case OP_CIRC:
if (md->notbol && eptr == md->start_subject)
return FALSE;
if ((ims & PCRE_MULTILINE) != 0) {
if (eptr != md->start_subject && eptr[-1] != '\n')
return FALSE;
ecode++;
break;
}
case OP_SOD:
if (eptr != md->start_subject)
return FALSE;
ecode++;
break;
case OP_DOLL:
if ((ims & PCRE_MULTILINE) != 0) {
if (eptr < md->end_subject) {
if (*eptr != '\n')
return FALSE;
}
else {
if (md->noteol)
return FALSE;
}
ecode++;
break;
}
else {
if (md->noteol)
return FALSE;
if (!md->endonly) {
if (eptr < md->end_subject - 1 ||
(eptr == md->end_subject - 1 && *eptr != '\n'))
return FALSE;
ecode++;
break;
}
}
case OP_EOD:
if (eptr < md->end_subject)
return FALSE;
ecode++;
break;
case OP_EODN:
if (eptr < md->end_subject - 1 ||
(eptr == md->end_subject - 1 && *eptr != '\n'))
return FALSE;
ecode++;
break;
case OP_NOT_WORD_BOUNDARY:
case OP_WORD_BOUNDARY:
{
BOOL prev_is_word = (eptr != md->start_subject) &&
((md->ctypes[eptr[-1]] & ctype_word) != 0);
BOOL cur_is_word = (eptr < md->end_subject) &&
((md->ctypes[*eptr] & ctype_word) != 0);
if ((*ecode++ == OP_WORD_BOUNDARY) ?
cur_is_word == prev_is_word : cur_is_word !=
prev_is_word)
return FALSE;
}
break;
case OP_ANY:
if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject
&& *eptr == '\n')
return FALSE;
if (eptr++ >= md->end_subject)
return FALSE;
#ifdef L2_UT_PCRE_SUPPORT_UTF8
if (md->utf8)
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80)
eptr++;
#endif
ecode++;
break;
case OP_NOT_DIGIT:
if (eptr >= md->end_subject ||
(md->ctypes[*eptr++] & ctype_digit) != 0)
return FALSE;
ecode++;
break;
case OP_DIGIT:
if (eptr >= md->end_subject ||
(md->ctypes[*eptr++] & ctype_digit) == 0)
return FALSE;
ecode++;
break;
case OP_NOT_WHITESPACE:
if (eptr >= md->end_subject ||
(md->ctypes[*eptr++] & ctype_space) != 0)
return FALSE;
ecode++;
break;
case OP_WHITESPACE:
if (eptr >= md->end_subject ||
(md->ctypes[*eptr++] & ctype_space) == 0)
return FALSE;
ecode++;
break;
case OP_NOT_WORDCHAR:
if (eptr >= md->end_subject ||
(md->ctypes[*eptr++] & ctype_word) != 0)
return FALSE;
ecode++;
break;
case OP_WORDCHAR:
if (eptr >= md->end_subject ||
(md->ctypes[*eptr++] & ctype_word) == 0)
return FALSE;
ecode++;
break;
case OP_REF:
{
int length;
int offset = (ecode[1] << 9) | (ecode[2] << 1);
ecode += 3;
length = (offset >= offset_top
|| md->offset_vector[offset] <
0) ? md->end_subject - eptr +
1 : md->offset_vector[offset + 1] -
md->offset_vector[offset];
switch (*ecode) {
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
c = *ecode++ - OP_CRSTAR;
minimize = (c & 1) != 0;
min = rep_min[c];
max = rep_max[c];
if (max == 0)
max = INT_MAX;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
minimize = (*ecode == OP_CRMINRANGE);
min = (ecode[1] << 8) + ecode[2];
max = (ecode[3] << 8) + ecode[4];
if (max == 0)
max = INT_MAX;
ecode += 5;
break;
default:
if (!match_ref(offset, eptr, length, md, ims))
return FALSE;
eptr += length;
continue;
}
if (length == 0)
continue;
for (i = 1; i <= min; i++) {
if (!match_ref(offset, eptr, length, md, ims))
return FALSE;
eptr += length;
}
if (min == max)
continue;
if (minimize) {
for (i = min;; i++) {
if (match
(eptr, ecode, offset_top, md, ims, eptrb, 0))
return TRUE;
if (i >= max
|| !match_ref(offset, eptr, length, md, ims))
return FALSE;
eptr += length;
}
}
else {
const uschar *pp = eptr;
for (i = min; i < max; i++) {
if (!match_ref(offset, eptr, length, md, ims))
break;
eptr += length;
}
while (eptr >= pp) {
if (match
(eptr, ecode, offset_top, md, ims, eptrb, 0))
return TRUE;
eptr -= length;
}
return FALSE;
}
}
case OP_CLASS:
{
const uschar *data = ecode + 1;
ecode += 33;
switch (*ecode) {
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
c = *ecode++ - OP_CRSTAR;
minimize = (c & 1) != 0;
min = rep_min[c];
max = rep_max[c];
if (max == 0)
max = INT_MAX;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
minimize = (*ecode == OP_CRMINRANGE);
min = (ecode[1] << 8) + ecode[2];
max = (ecode[3] << 8) + ecode[4];
if (max == 0)
max = INT_MAX;
ecode += 5;
break;
default:
min = max = 1;
break;
}
for (i = 1; i <= min; i++) {
if (eptr >= md->end_subject)
return FALSE;
GETCHARINC(c, eptr)
#ifdef L2_UT_PCRE_SUPPORT_UTF8
if (c > 255)
return FALSE;
#endif
if ((data[c / 8] & (1 << (c & 7))) != 0)
continue;
return FALSE;
}
if (min == max)
continue;
if (minimize) {
for (i = min;; i++) {
if (match
(eptr, ecode, offset_top, md, ims, eptrb, 0))
return TRUE;
if (i >= max || eptr >= md->end_subject)
return FALSE;
GETCHARINC(c, eptr)
#ifdef L2_UT_PCRE_SUPPORT_UTF8
if (c > 255)
return FALSE;
#endif
if ((data[c / 8] & (1 << (c & 7))) != 0)
continue;
return FALSE;
}
}
else {
const uschar *pp = eptr;
int len = 1;
for (i = min; i < max; i++) {
if (eptr >= md->end_subject)
break;
GETCHARLEN(c, eptr, len)
#ifdef L2_UT_PCRE_SUPPORT_UTF8
if (c > 255)
break;
#endif
if ((data[c / 8] & (1 << (c & 7))) == 0)
break;
eptr += len;
}
while (eptr >= pp) {
if (match
(eptr--, ecode, offset_top, md, ims, eptrb,
0))
return TRUE;
#ifdef L2_UT_PCRE_SUPPORT_UTF8
BACKCHAR(eptr)
#endif
}
return FALSE;
}
}
case OP_CHARS:
{
register int length = ecode[1];
ecode += 2;
if (length > md->end_subject - eptr)
return FALSE;
if ((ims & PCRE_CASELESS) != 0) {
while (length-- > 0)
if (md->lcc[*ecode++] != md->lcc[*eptr++])
return FALSE;
}
else {
while (length-- > 0)
if (*ecode++ != *eptr++)
return FALSE;
}
}
break;
case OP_EXACT:
min = max = (ecode[1] << 8) + ecode[2];
ecode += 3;
goto REPEATCHAR;
case OP_UPTO:
case OP_MINUPTO:
min = 0;
max = (ecode[1] << 8) + ecode[2];
minimize = *ecode == OP_MINUPTO;
ecode += 3;
goto REPEATCHAR;
case OP_STAR:
case OP_MINSTAR:
case OP_PLUS:
case OP_MINPLUS:
case OP_QUERY:
case OP_MINQUERY:
c = *ecode++ - OP_STAR;
minimize = (c & 1) != 0;
min = rep_min[c];
max = rep_max[c];
if (max == 0)
max = INT_MAX;
REPEATCHAR:
if (min > md->end_subject - eptr)
return FALSE;
c = *ecode++;
if ((ims & PCRE_CASELESS) != 0) {
c = md->lcc[c];
for (i = 1; i <= min; i++)
if (c != md->lcc[*eptr++])
return FALSE;
if (min == max)
continue;
if (minimize) {
for (i = min;; i++) {
if (match
(eptr, ecode, offset_top, md, ims, eptrb, 0))
return TRUE;
if (i >= max || eptr >= md->end_subject ||
c != md->lcc[*eptr++])
return FALSE;
}
}
else {
const uschar *pp = eptr;
for (i = min; i < max; i++) {
if (eptr >= md->end_subject
|| c != md->lcc[*eptr])
break;
eptr++;
}
while (eptr >= pp)
if (match
(eptr--, ecode, offset_top, md, ims, eptrb,
0))
return TRUE;
return FALSE;
}
}
else {
for (i = 1; i <= min; i++)
if (c != *eptr++)
return FALSE;
if (min == max)
continue;
if (minimize) {
for (i = min;; i++) {
if (match
(eptr, ecode, offset_top, md, ims, eptrb, 0))
return TRUE;
if (i >= max || eptr >= md->end_subject
|| c != *eptr++)
return FALSE;
}
}
else {
const uschar *pp = eptr;
for (i = min; i < max; i++) {
if (eptr >= md->end_subject || c != *eptr)
break;
eptr++;
}
while (eptr >= pp)
if (match
(eptr--, ecode, offset_top, md, ims, eptrb,
0))
return TRUE;
return FALSE;
}
}
case OP_NOT:
if (eptr >= md->end_subject)
return FALSE;
ecode++;
if ((ims & PCRE_CASELESS) != 0) {
if (md->lcc[*ecode++] == md->lcc[*eptr++])
return FALSE;
}
else {
if (*ecode++ == *eptr++)
return FALSE;
}
break;
case OP_NOTEXACT:
min = max = (ecode[1] << 8) + ecode[2];
ecode += 3;
goto REPEATNOTCHAR;
case OP_NOTUPTO:
case OP_NOTMINUPTO:
min = 0;
max = (ecode[1] << 8) + ecode[2];
minimize = *ecode == OP_NOTMINUPTO;
ecode += 3;
goto REPEATNOTCHAR;
case OP_NOTSTAR:
case OP_NOTMINSTAR:
case OP_NOTPLUS:
case OP_NOTMINPLUS:
case OP_NOTQUERY:
case OP_NOTMINQUERY:
c = *ecode++ - OP_NOTSTAR;
minimize = (c & 1) != 0;
min = rep_min[c];
max = rep_max[c];
if (max == 0)
max = INT_MAX;
REPEATNOTCHAR:
if (min > md->end_subject - eptr)
return FALSE;
c = *ecode++;
if ((ims & PCRE_CASELESS) != 0) {
c = md->lcc[c];
for (i = 1; i <= min; i++)
if (c == md->lcc[*eptr++])
return FALSE;
if (min == max)
continue;
if (minimize) {
for (i = min;; i++) {
if (match
(eptr, ecode, offset_top, md, ims, eptrb, 0))
return TRUE;
if (i >= max || eptr >= md->end_subject ||
c == md->lcc[*eptr++])
return FALSE;
}
}
else {
const uschar *pp = eptr;
for (i = min; i < max; i++) {
if (eptr >= md->end_subject
|| c == md->lcc[*eptr])
break;
eptr++;
}
while (eptr >= pp)
if (match
(eptr--, ecode, offset_top, md, ims, eptrb,
0))
return TRUE;
return FALSE;
}
}
else {
for (i = 1; i <= min; i++)
if (c == *eptr++)
return FALSE;
if (min == max)
continue;
if (minimize) {
for (i = min;; i++) {
if (match
(eptr, ecode, offset_top, md, ims, eptrb, 0))
return TRUE;
if (i >= max || eptr >= md->end_subject
|| c == *eptr++)
return FALSE;
}
}
else {
const uschar *pp = eptr;
for (i = min; i < max; i++) {
if (eptr >= md->end_subject || c == *eptr)
break;
eptr++;
}
while (eptr >= pp)
if (match
(eptr--, ecode, offset_top, md, ims, eptrb,
0))
return TRUE;
return FALSE;
}
}
case OP_TYPEEXACT:
min = max = (ecode[1] << 8) + ecode[2];
minimize = TRUE;
ecode += 3;
goto REPEATTYPE;
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
min = 0;
max = (ecode[1] << 8) + ecode[2];
minimize = *ecode == OP_TYPEMINUPTO;
ecode += 3;
goto REPEATTYPE;
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
c = *ecode++ - OP_TYPESTAR;
minimize = (c & 1) != 0;
min = rep_min[c];
max = rep_max[c];
if (max == 0)
max = INT_MAX;
REPEATTYPE:
ctype = *ecode++;
if (min > md->end_subject - eptr)
return FALSE;
if (min > 0)
switch (ctype) {
case OP_ANY:
#ifdef L2_UT_PCRE_SUPPORT_UTF8
if (md->utf8) {
for (i = 1; i <= min; i++) {
if (eptr >= md->end_subject ||
(*eptr++ == '\n'
&& (ims & PCRE_DOTALL) == 0))
return FALSE;
while (eptr < md->end_subject
&& (*eptr & 0xc0) == 0x80)
eptr++;
}
break;
}
#endif
if ((ims & PCRE_DOTALL) == 0) {
for (i = 1; i <= min; i++)
if (*eptr++ == '\n')
return FALSE;
}
else
eptr += min;
break;
case OP_NOT_DIGIT:
for (i = 1; i <= min; i++)
if ((md->ctypes[*eptr++] & ctype_digit) != 0)
return FALSE;
break;
case OP_DIGIT:
for (i = 1; i <= min; i++)
if ((md->ctypes[*eptr++] & ctype_digit) == 0)
return FALSE;
break;
case OP_NOT_WHITESPACE:
for (i = 1; i <= min; i++)
if ((md->ctypes[*eptr++] & ctype_space) != 0)
return FALSE;
break;
case OP_WHITESPACE:
for (i = 1; i <= min; i++)
if ((md->ctypes[*eptr++] & ctype_space) == 0)
return FALSE;
break;
case OP_NOT_WORDCHAR:
for (i = 1; i <= min; i++)
if ((md->ctypes[*eptr++] & ctype_word) != 0)
return FALSE;
break;
case OP_WORDCHAR:
for (i = 1; i <= min; i++)
if ((md->ctypes[*eptr++] & ctype_word) == 0)
return FALSE;
break;
}
if (min == max)
continue;
if (minimize) {
for (i = min;; i++) {
if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
return TRUE;
if (i >= max || eptr >= md->end_subject)
return FALSE;
c = *eptr++;
switch (ctype) {
case OP_ANY:
if ((ims & PCRE_DOTALL) == 0 && c == '\n')
return FALSE;
#ifdef L2_UT_PCRE_SUPPORT_UTF8
if (md->utf8)
while (eptr < md->end_subject
&& (*eptr & 0xc0) == 0x80)
eptr++;
#endif
break;
case OP_NOT_DIGIT:
if ((md->ctypes[c] & ctype_digit) != 0)
return FALSE;
break;
case OP_DIGIT:
if ((md->ctypes[c] & ctype_digit) == 0)
return FALSE;
break;
case OP_NOT_WHITESPACE:
if ((md->ctypes[c] & ctype_space) != 0)
return FALSE;
break;
case OP_WHITESPACE:
if ((md->ctypes[c] & ctype_space) == 0)
return FALSE;
break;
case OP_NOT_WORDCHAR:
if ((md->ctypes[c] & ctype_word) != 0)
return FALSE;
break;
case OP_WORDCHAR:
if ((md->ctypes[c] & ctype_word) == 0)
return FALSE;
break;
}
}
}
else {
const uschar *pp = eptr;
switch (ctype) {
case OP_ANY:
#ifdef L2_UT_PCRE_SUPPORT_UTF8
if (md->utf8 && max < INT_MAX) {
if ((ims & PCRE_DOTALL) == 0) {
for (i = min; i < max; i++) {
if (eptr >= md->end_subject
|| *eptr++ == '\n')
break;
while (eptr < md->end_subject
&& (*eptr & 0xc0) == 0x80)
eptr++;
}
}
else {
for (i = min; i < max; i++) {
eptr++;
while (eptr < md->end_subject
&& (*eptr & 0xc0) == 0x80)
eptr++;
}
}
break;
}
#endif
if ((ims & PCRE_DOTALL) == 0) {
for (i = min; i < max; i++) {
if (eptr >= md->end_subject
|| *eptr == '\n')
break;
eptr++;
}
}
else {
c = max - min;
if (c > md->end_subject - eptr)
c = md->end_subject - eptr;
eptr += c;
}
break;
case OP_NOT_DIGIT:
for (i = min; i < max; i++) {
if (eptr >= md->end_subject
|| (md->ctypes[*eptr] & ctype_digit) != 0)
break;
eptr++;
}
break;
case OP_DIGIT:
for (i = min; i < max; i++) {
if (eptr >= md->end_subject
|| (md->ctypes[*eptr] & ctype_digit) == 0)
break;
eptr++;
}
break;
case OP_NOT_WHITESPACE:
for (i = min; i < max; i++) {
if (eptr >= md->end_subject
|| (md->ctypes[*eptr] & ctype_space) != 0)
break;
eptr++;
}
break;
case OP_WHITESPACE:
for (i = min; i < max; i++) {
if (eptr >= md->end_subject
|| (md->ctypes[*eptr] & ctype_space) == 0)
break;
eptr++;
}
break;
case OP_NOT_WORDCHAR:
for (i = min; i < max; i++) {
if (eptr >= md->end_subject
|| (md->ctypes[*eptr] & ctype_word) != 0)
break;
eptr++;
}
break;
case OP_WORDCHAR:
for (i = min; i < max; i++) {
if (eptr >= md->end_subject
|| (md->ctypes[*eptr] & ctype_word) == 0)
break;
eptr++;
}
break;
}
while (eptr >= pp) {
if (match
(eptr--, ecode, offset_top, md, ims, eptrb, 0))
return TRUE;
#ifdef L2_UT_PCRE_SUPPORT_UTF8
if (md->utf8)
while (eptr > pp && (*eptr & 0xc0) == 0x80)
eptr--;
#endif
}
return FALSE;
}
default:
md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
return FALSE;
}
}
}
int
pcre_exec(const pcre * external_re, const pcre_extra * external_extra,
const char *subject, int length, int start_offset, int options,
int *offsets, int offsetcount)
{
int resetcount, ocount;
int first_char = -1;
int req_char = -1;
int req_char2 = -1;
unsigned long int ims = 0;
match_data match_block;
const uschar *start_bits = NULL;
const uschar *start_match = (const uschar *)subject + start_offset;
const uschar *end_subject;
const uschar *req_char_ptr = start_match - 1;
const pcre_st *re = (const pcre_st *)external_re;
const pcre_extra_st *extra = (const pcre_extra_st *)external_extra;
BOOL using_temporary_offsets = FALSE;
BOOL anchored;
BOOL startline;
if ((options & ~PUBLIC_EXEC_OPTIONS) != 0)
return PCRE_ERROR_BADOPTION;
if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
return PCRE_ERROR_NULL;
if (re->magic_number != MAGIC_NUMBER)
return PCRE_ERROR_BADMAGIC;
anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
startline = (re->options & PCRE_STARTLINE) != 0;
match_block.start_pattern = re->code;
match_block.start_subject = (const uschar *)subject;
match_block.end_subject = match_block.start_subject + length;
end_subject = match_block.end_subject;
match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
match_block.utf8 = (re->options & PCRE_UTF8) != 0;
match_block.notbol = (options & PCRE_NOTBOL) != 0;
match_block.noteol = (options & PCRE_NOTEOL) != 0;
match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
match_block.errorcode = PCRE_ERROR_NOMATCH;
match_block.lcc = re->tables + lcc_offset;
match_block.ctypes = re->tables + ctypes_offset;
ims = re->options & (PCRE_CASELESS | PCRE_MULTILINE | PCRE_DOTALL);
ocount = offsetcount - (offsetcount % 3);
if (re->top_backref > 0 && re->top_backref >= ocount / 3) {
ocount = re->top_backref * 3 + 3;
match_block.offset_vector =
(int *)(pcre_malloc) (ocount * sizeof (int));
if (match_block.offset_vector == NULL)
return PCRE_ERROR_NOMEMORY;
using_temporary_offsets = TRUE;
}
else
match_block.offset_vector = offsets;
match_block.offset_end = ocount;
match_block.offset_max = (2 * ocount) / 3;
match_block.offset_overflow = FALSE;
resetcount = 2 + re->top_bracket * 2;
if (resetcount > offsetcount)
resetcount = ocount;
if (match_block.offset_vector != NULL) {
register int *iptr = match_block.offset_vector + ocount;
register int *iend = iptr - resetcount / 2 + 1;
while (--iptr >= iend)
*iptr = -1;
}
if (!anchored) {
if ((re->options & PCRE_FIRSTSET) != 0) {
first_char = re->first_char;
if ((ims & PCRE_CASELESS) != 0)
first_char = match_block.lcc[first_char];
}
else if (!startline && extra != NULL &&
(extra->options & PCRE_STUDY_MAPPED) != 0)
start_bits = extra->start_bits;
}
if ((re->options & PCRE_REQCHSET) != 0) {
req_char = re->req_char;
req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0) ?
(re->tables + fcc_offset)[req_char] : req_char;
}
do {
int rc;
register int *iptr = match_block.offset_vector;
register int *iend = iptr + resetcount;
while (iptr < iend)
*iptr++ = -1;
if (first_char >= 0) {
if ((ims & PCRE_CASELESS) != 0)
while (start_match < end_subject &&
match_block.lcc[*start_match] != first_char)
start_match++;
else
while (start_match < end_subject
&& *start_match != first_char)
start_match++;
}
else if (startline) {
if (start_match > match_block.start_subject + start_offset) {
while (start_match < end_subject && start_match[-1] != '\n')
start_match++;
}
}
else if (start_bits != NULL) {
while (start_match < end_subject) {
register int c = *start_match;
if ((start_bits[c / 8] & (1 << (c & 7))) == 0)
start_match++;
else
break;
}
}
if (req_char >= 0) {
register const uschar *p =
start_match + ((first_char >= 0) ? 1 : 0);
if (p > req_char_ptr) {
if (req_char == req_char2) {
while (p < end_subject) {
if (*p++ == req_char) {
p--;
break;
}
}
}
else {
while (p < end_subject) {
register int pp = *p++;
if (pp == req_char || pp == req_char2) {
p--;
break;
}
}
}
if (p >= end_subject)
break;
req_char_ptr = p;
}
}
match_block.start_match = start_match;
if (!match
(start_match, re->code, 2, &match_block, ims, NULL,
match_isgroup))
continue;
if (using_temporary_offsets) {
if (offsetcount >= 4) {
memcpy(offsets + 2, match_block.offset_vector + 2,
(offsetcount - 2) * sizeof (int));
}
if (match_block.end_offset_top > offsetcount)
match_block.offset_overflow = TRUE;
(pcre_free) (match_block.offset_vector);
}
rc = match_block.offset_overflow ? 0 : match_block.end_offset_top / 2;
if (offsetcount < 2)
rc = 0;
else {
offsets[0] = start_match - match_block.start_subject;
offsets[1] =
match_block.end_match_ptr - match_block.start_subject;
}
return rc;
}
while (!anchored &&
match_block.errorcode == PCRE_ERROR_NOMATCH &&
start_match++ < end_subject);
if (using_temporary_offsets) {
(pcre_free) (match_block.offset_vector);
}
return match_block.errorcode;
}
static void
set_bit(uschar * start_bits, int c, BOOL caseless, compile_data * cd)
{
start_bits[c / 8] |= (1 << (c & 7));
if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
start_bits[cd->fcc[c] / 8] |= (1 << (cd->fcc[c] & 7));
}
static BOOL
set_start_bits(const uschar * code, uschar * start_bits, BOOL caseless,
compile_data * cd)
{
register int c;
volatile int dummy;
do {
const uschar *tcode = code + 3;
BOOL try_next = TRUE;
while (try_next) {
if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT) {
if (!set_start_bits(tcode, start_bits, caseless, cd))
return FALSE;
try_next = FALSE;
}
else
switch (*tcode) {
default:
return FALSE;
case OP_BRANUMBER:
tcode += 3;
break;
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
do
tcode += (tcode[1] << 8) + tcode[2];
while (*tcode == OP_ALT);
tcode += 3;
break;
case OP_OPT:
caseless = (tcode[1] & PCRE_CASELESS) != 0;
tcode += 2;
break;
case OP_BRAZERO:
case OP_BRAMINZERO:
if (!set_start_bits
(++tcode, start_bits, caseless, cd))
return FALSE;
dummy = 1;
do
tcode += (tcode[1] << 8) + tcode[2];
while (*tcode == OP_ALT);
tcode += 3;
break;
case OP_STAR:
case OP_MINSTAR:
case OP_QUERY:
case OP_MINQUERY:
set_bit(start_bits, tcode[1], caseless, cd);
tcode += 2;
break;
case OP_UPTO:
case OP_MINUPTO:
set_bit(start_bits, tcode[3], caseless, cd);
tcode += 4;
break;
case OP_EXACT:
tcode++;
case OP_CHARS:
tcode++;
case OP_PLUS:
case OP_MINPLUS:
set_bit(start_bits, tcode[1], caseless, cd);
try_next = FALSE;
break;
case OP_NOT_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c + cbit_digit];
try_next = FALSE;
break;
case OP_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c + cbit_digit];
try_next = FALSE;
break;
case OP_NOT_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c + cbit_space];
try_next = FALSE;
break;
case OP_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c + cbit_space];
try_next = FALSE;
break;
case OP_NOT_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c + cbit_word];
try_next = FALSE;
break;
case OP_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c + cbit_word];
try_next = FALSE;
break;
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
tcode++;
break;
case OP_TYPEEXACT:
tcode += 3;
break;
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
tcode += 2;
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
switch (tcode[1]) {
case OP_NOT_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |=
~cd->cbits[c + cbit_digit];
break;
case OP_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |=
cd->cbits[c + cbit_digit];
break;
case OP_NOT_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |=
~cd->cbits[c + cbit_space];
break;
case OP_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |=
cd->cbits[c + cbit_space];
break;
case OP_NOT_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |=
~cd->cbits[c + cbit_word];
break;
case OP_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c + cbit_word];
break;
}
tcode += 2;
break;
case OP_CLASS:
{
tcode++;
for (c = 0; c < 32; c++)
start_bits[c] |= tcode[c];
tcode += 32;
switch (*tcode) {
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
tcode++;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
if (((tcode[1] << 8) + tcode[2]) == 0)
tcode += 5;
else
try_next = FALSE;
break;
default:
try_next = FALSE;
break;
}
}
break;
}
}
code += (code[1] << 8) + code[2];
}
while (*code == OP_ALT);
return TRUE;
}
pcre_extra *pcre_study(const pcre * external_re, int options,
const char **errorptr)
{
uschar start_bits[32];
pcre_extra_st *extra;
const pcre_st *re = (const pcre_st *)external_re;
compile_data compile_block;
*errorptr = NULL;
if (re == NULL || re->magic_number != MAGIC_NUMBER) {
*errorptr = "argument is not a compiled regular expression";
return NULL;
}
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) {
*errorptr = "unknown or incorrect option bit(s) set";
return NULL;
}
if ((re->options & (PCRE_ANCHORED | PCRE_FIRSTSET | PCRE_STARTLINE)) != 0)
return NULL;
compile_block.lcc = re->tables + lcc_offset;
compile_block.fcc = re->tables + fcc_offset;
compile_block.cbits = re->tables + cbits_offset;
compile_block.ctypes = re->tables + ctypes_offset;
memset(start_bits, 0, 32 * sizeof (uschar));
if (!set_start_bits
(re->code, start_bits, (re->options & PCRE_CASELESS) != 0,
&compile_block))
return NULL;
extra = (pcre_extra_st *) (pcre_malloc) (sizeof (pcre_extra_st));
if (extra == NULL) {
*errorptr = "failed to get memory";
return NULL;
}
extra->options = PCRE_STUDY_MAPPED;
memcpy(extra->start_bits, start_bits, sizeof (start_bits));
return (pcre_extra *) extra;
}
int
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, char *buffer, int size)
{
int yield;
if (stringnumber < 0 || stringnumber >= stringcount)
return PCRE_ERROR_NOSUBSTRING;
stringnumber *= 2;
yield = ovector[stringnumber + 1] - ovector[stringnumber];
if (size < yield + 1)
return PCRE_ERROR_NOMEMORY;
memcpy(buffer, subject + ovector[stringnumber], yield);
buffer[yield] = 0;
return yield;
}
int
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
const char ***listptr)
{
int i;
int size = sizeof (char *);
int double_count = stringcount * 2;
char **stringlist;
char *p;
for (i = 0; i < double_count; i += 2)
size += sizeof (char *) + ovector[i + 1] - ovector[i] + 1;
stringlist = (char **)(pcre_malloc) (size);
if (stringlist == NULL)
return PCRE_ERROR_NOMEMORY;
*listptr = (const char **)stringlist;
p = (char *)(stringlist + stringcount + 1);
for (i = 0; i < double_count; i += 2) {
int len = ovector[i + 1] - ovector[i];
memcpy(p, subject + ovector[i], len);
*stringlist++ = p;
p += len;
*p++ = 0;
}
*stringlist = NULL;
return 0;
}
void pcre_free_substring_list(const char **pointer)
{
(pcre_free) ((void *)pointer);
}
int
pcre_get_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, const char **stringptr)
{
int yield;
char *substring;
if (stringnumber < 0 || stringnumber >= stringcount)
return PCRE_ERROR_NOSUBSTRING;
stringnumber *= 2;
yield = ovector[stringnumber + 1] - ovector[stringnumber];
substring = (char *)(pcre_malloc) (yield + 1);
if (substring == NULL)
return PCRE_ERROR_NOMEMORY;
memcpy(substring, subject + ovector[stringnumber], yield);
substring[yield] = 0;
*stringptr = substring;
return yield;
}
void pcre_free_substring(const char *pointer)
{
(pcre_free) ((void *)pointer);
}
#endif
const unsigned char *pcre_maketables(void)
{
unsigned char *yield, *p;
int i;
#ifndef L2_UT_PCRE_TAB
yield = (unsigned char *)(pcre_malloc) (tables_length);
#else
yield = (unsigned char *)malloc(tables_length);
#endif
if (yield == NULL)
return NULL;
p = yield;
for (i = 0; i < 256; i++)
*p++ = tolower(i);
for (i = 0; i < 256; i++)
*p++ = islower(i) ? toupper(i) : tolower(i);
memset(p, 0, cbit_length);
for (i = 0; i < 256; i++) {
if (isdigit(i)) {
p[cbit_digit + i / 8] |= 1 << (i & 7);
p[cbit_word + i / 8] |= 1 << (i & 7);
}
if (isupper(i)) {
p[cbit_upper + i / 8] |= 1 << (i & 7);
p[cbit_word + i / 8] |= 1 << (i & 7);
}
if (islower(i)) {
p[cbit_lower + i / 8] |= 1 << (i & 7);
p[cbit_word + i / 8] |= 1 << (i & 7);
}
if (i == '_')
p[cbit_word + i / 8] |= 1 << (i & 7);
if (isspace(i))
p[cbit_space + i / 8] |= 1 << (i & 7);
if (isxdigit(i))
p[cbit_xdigit + i / 8] |= 1 << (i & 7);
if (isgraph(i))
p[cbit_graph + i / 8] |= 1 << (i & 7);
if (isprint(i))
p[cbit_print + i / 8] |= 1 << (i & 7);
if (ispunct(i))
p[cbit_punct + i / 8] |= 1 << (i & 7);
if (iscntrl(i))
p[cbit_cntrl + i / 8] |= 1 << (i & 7);
}
p += cbit_length;
for (i = 0; i < 256; i++) {
int x = 0;
if (isspace(i))
x += ctype_space;
if (isalpha(i))
x += ctype_letter;
if (isdigit(i))
x += ctype_digit;
if (isxdigit(i))
x += ctype_xdigit;
if (isalnum(i) || i == '_')
x += ctype_word;
if (strchr("*+?{^.$|()[", i) != 0)
x += ctype_meta;
*p++ = x;
}
return yield;
}
#ifdef L2_UT_PCRE_TAB
#include <ctype.h>
#include <stdio.h>
#include <string.h>
int main(void)
{
int i;
const unsigned char *tables = pcre_maketables();
printf("/*************************************************\n"
"* Perl-Compatible Regular Expressions *\n"
"*************************************************/\n\n"
"static unsigned char pcre_default_tables[] = {\n\n"
"/* This table is a lower casing table. */\n\n");
printf(" ");
for (i = 0; i < 256; i++) {
if ((i & 7) == 0 && i != 0)
printf("\n ");
printf("%3d", *tables++);
if (i != 255)
printf(",");
}
printf(",\n\n");
printf("/* This table is a case flipping table. */\n\n");
printf(" ");
for (i = 0; i < 256; i++) {
if ((i & 7) == 0 && i != 0)
printf("\n ");
printf("%3d", *tables++);
if (i != 255)
printf(",");
}
printf(",\n\n");
printf("/* This table contains bit maps for various character classes.\n"
"Each map is 32 bytes long and the bits run from the least\n"
"significant end of each byte. The classes that have their own\n"
"maps are: space, xdigit, digit, upper, lower, word, graph\n"
"print, punct, and cntrl. Other classes are built from combinations. */\n\n");
printf(" ");
for (i = 0; i < cbit_length; i++) {
if ((i & 7) == 0 && i != 0) {
if ((i & 31) == 0)
printf("\n");
printf("\n ");
}
printf("0x%02x", *tables++);
if (i != cbit_length - 1)
printf(",");
}
printf(",\n\n");
printf
("/* This table identifies various classes of character by individual bits:\n"
" 0x%02x white space character\n" " 0x%02x letter\n"
" 0x%02x decimal digit\n" " 0x%02x hexadecimal digit\n"
" 0x%02x alphanumeric or '_'\n"
" 0x%02x regular expression metacharacter or binary zero\n*/\n\n",
ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word,
ctype_meta);
printf(" ");
for (i = 0; i < 256; i++) {
if ((i & 7) == 0 && i != 0) {
printf(" \n ");
}
printf("0x%02x", *tables++);
if (i != 255)
printf(",");
}
printf("};\n\n\n");
return 0;
}
#endif