OSSP: CVS Repository: Check-in [1518]

Check-in Number:

1518

Date:

2002-Jan-07 16:21:06 (local)
2002-Jan-07 15:21:06 (UTC)

User:

rse

Branch:

Comment:

upgrade to PCRE 3.8

Tickets:

Inspections:

Files:

ossp-pkg/pcre/.cvsignore	added-> 1.5
ossp-pkg/pcre/ChangeLog	1.8 -> 1.9	30 inserted, 0 deleted
ossp-pkg/pcre/Makefile.in	1.6 -> 1.7	3 inserted, 3 deleted
ossp-pkg/pcre/pcre.3	1.5 -> 1.6	209 inserted, 28 deleted
ossp-pkg/pcre/pcre.c	1.7 -> 1.8	115 inserted, 91 deleted
ossp-pkg/pcre/pcre.h	1.7 -> 1.8	9 inserted, 6 deleted
ossp-pkg/pcre/pcre_dftables.c	added-> 1.4
ossp-pkg/pcre/pcre_get.c	1.2 -> 1.3	1 inserted, 1 deleted
ossp-pkg/pcre/pcre_internal.h	1.6 -> 1.7	42 inserted, 11 deleted
ossp-pkg/pcre/pcre_maketables.c	added-> 1.3
ossp-pkg/pcre/pcre_study.c	added-> 1.3
ossp-pkg/pcre/pcre_test.c	1.5 -> 1.6	115 inserted, 78 deleted
ossp-pkg/pcre/pcre_test.d/testinput1	1.3 -> 1.4	33 inserted, 4 deleted
ossp-pkg/pcre/pcre_test.d/testinput2	1.5 -> 1.6	11 inserted, 0 deleted
ossp-pkg/pcre/pcre_test.d/testinput6	1.1 -> 1.2	26 inserted, 0 deleted
ossp-pkg/pcre/pcre_test.d/testoutput1	1.7 -> 1.8	63 inserted, 10 deleted
ossp-pkg/pcre/pcre_test.d/testoutput2	1.7 -> 1.8	306 inserted, 1 deleted
ossp-pkg/pcre/pcre_test.d/testoutput3	1.7 -> 1.8	1 inserted, 1 deleted
ossp-pkg/pcre/pcre_test.d/testoutput4	1.7 -> 1.8	1 inserted, 1 deleted
ossp-pkg/pcre/pcre_test.d/testoutput5	1.2 -> 1.3	1 inserted, 1 deleted
ossp-pkg/pcre/pcre_test.d/testoutput6	1.2 -> 1.3	157 inserted, 23 deleted
ossp-pkg/pcre/pcre_test.sh	added-> 1.2
ossp-pkg/pcre/pcregrep.1	added-> 1.1
ossp-pkg/pcre/pcregrep.c	added-> 1.1
ossp-pkg/pcre/pcreposix.3	1.2 -> 1.3	5 inserted, 5 deleted
ossp-pkg/pcre/pcreposix.c	1.4 -> 1.5	4 inserted, 4 deleted
ossp-pkg/pcre/pcreposix.h	added-> 1.3
ossp-pkg/pcre/pgrep.1	1.2->removed
ossp-pkg/pcre/pgrep.c	1.3->removed

ossp-pkg/pcre/.cvsignore -> 1.5

*** /dev/null Thu Apr 17 04:32:15 2025 --- - Thu Apr 17 04:32:42 2025 *************** *** 0 **** --- 1,22 ---- + Makefile + config.log + config.cache + config.status + config.h + pcre-config + pcre_dftables + pcre_test + pcre_chartables.c + libtool + .libs + *.o + *.la + *.lo + pcregrep + shtool + config.guess + config.sub + ltmain.sh + libtool.m4 + configure + config.h.in

ossp-pkg/pcre/ChangeLog 1.8 -> 1.9

--- ChangeLog 2002/01/07 14:36:54 1.8 +++ ChangeLog 2002/01/07 15:21:06 1.9 @@ -1,6 +1,36 @@ ChangeLog for PCRE ------------------ +Version 3.8 18-Dec-01 +--------------------- + +1. The experimental UTF-8 code was completely screwed up. It was packing the +bytes in the wrong order. How dumb can you get? + + +Version 3.7 29-Oct-01 +--------------------- + +1. In updating pcretest to check change 1 of version 3.6, I screwed up. +This caused pcretest, when used on the test data, to segfault. Unfortunately, +this didn't happen under Solaris 8, where I normally test things. + +2. The Makefile had to be changed to make it work on BSD systems, where 'make' +doesn't seem to recognize that ./xxx and xxx are the same file. (This entry +isn't in ChangeLog distributed with 3.7 because I forgot when I hastily made +this fix an hour or so after the initial 3.7 release.) + + +Version 3.6 23-Oct-01 +--------------------- + +1. Crashed with /(sens|respons)e and \1ibility/ and "sense and sensibility" if +offsets passed as NULL with zero offset count. + +2. The config.guess and config.sub files had not been updated when I moved to +the latest autoconf. + + Version 3.5 15-Aug-01 ---------------------

ossp-pkg/pcre/Makefile.in 1.6 -> 1.7

--- Makefile.in 2001/08/16 10:08:06 1.6 +++ Makefile.in 2002/01/07 15:21:06 1.7 @@ -56,8 +56,8 @@ pcre_test: pcre_test.lo libpcre.la libpcreposix.la $(LIBTOOL) --quiet --mode=link $(CC) $(LDFLAGS) -o pcre_test pcre_test.lo libpcre.la libpcreposix.la -pgrep: pgrep.lo libpcre.la - $(LIBTOOL) --quiet --mode=link $(CC) $(LDFLAGS) -o pgrep pgrep.lo libpcre.la +pcregrep: pcregrep.lo libpcre.la + $(LIBTOOL) --quiet --mode=link $(CC) $(LDFLAGS) -o pcregrep pcregrep.lo libpcre.la check: test test: all pcre_test @@ -78,7 +78,7 @@ clean: $(RM) *.la *.lo *.o - $(RM) pgrep + $(RM) pcregrep distclean: clean $(RM) -r .libs

ossp-pkg/pcre/pcre.3 1.5 -> 1.6

--- pcre.3 2000/08/29 19:24:17 1.5 +++ pcre.3 2002/01/07 15:21:06 1.6 @@ -92,7 +92,9 @@ use these to include support for different releases. The functions \fBpcre_compile()\fR, \fBpcre_study()\fR, and \fBpcre_exec()\fR -are used for compiling and matching regular expressions. +are used for compiling and matching regular expressions. A sample program that +demonstrates the simplest way of using them is given in the file +\fIpcredemo.c\fR. The last section of this man page describes how to run it. The functions \fBpcre_copy_substring()\fR, \fBpcre_get_substring()\fR, and \fBpcre_get_substring_list()\fR are convenience functions for extracting @@ -129,18 +131,22 @@ The function \fBpcre_compile()\fR is called to compile a pattern into an internal form. The pattern is a C string terminated by a binary zero, and is passed in the argument \fIpattern\fR. A pointer to a single block of memory -that is obtained via \fBpcre_malloc\fR is returned. This contains the -compiled code and related data. The \fBpcre\fR type is defined for this for -convenience, but in fact \fBpcre\fR is just a typedef for \fBvoid\fR, since the -contents of the block are not externally defined. It is up to the caller to -free the memory when it is no longer required. -.PP +that is obtained via \fBpcre_malloc\fR is returned. This contains the compiled +code and related data. The \fBpcre\fR type is defined for the returned block; +this is a typedef for a structure whose contents are not externally defined. It +is up to the caller to free the memory when it is no longer required. + +Although the compiled code of a PCRE regex is relocatable, that is, it does not +depend on memory location, the complete \fBpcre\fR data block is not +fully relocatable, because it contains a copy of the \fItableptr\fR argument, +which is an address (see below). + The size of a compiled pattern is roughly proportional to the length of the pattern string, except that each character class (other than those containing just a single character, negated or not) requires 33 bytes, and repeat quantifiers with a minimum greater than one or a bounded maximum cause the relevant portions of the compiled pattern to be replicated. -.PP + The \fIoptions\fR argument contains independent bits that affect the compilation. It should be zero if no options are required. Some of the options, in particular, those that are compatible with Perl, can also be set and unset @@ -149,19 +155,31 @@ their initial settings at the start of compilation and execution. The PCRE_ANCHORED option can be set at the time of matching as well as at compile time. -.PP + If \fIerrptr\fR is NULL, \fBpcre_compile()\fR returns NULL immediately. Otherwise, if compilation of a pattern fails, \fBpcre_compile()\fR returns NULL, and sets the variable pointed to by \fIerrptr\fR to point to a textual error message. The offset from the start of the pattern to the character where the error was discovered is placed in the variable pointed to by \fIerroffset\fR, which must not be NULL. If it is, an immediate error is given. -.PP + If the final argument, \fItableptr\fR, is NULL, PCRE uses a default set of character tables which are built when it is compiled, using the default C locale. Otherwise, \fItableptr\fR must be the result of a call to \fBpcre_maketables()\fR. See the section on locale support below. -.PP + +This code fragment shows a typical straightforward call to \fBpcre_compile()\fR: + + pcre *re; + const char *error; + int erroffset; + re = pcre_compile( + "^A.*Z", /* the pattern */ + 0, /* default options */ + &error, /* for error message */ + &erroffset, /* for error offset */ + NULL); /* use default character tables */ + The following option bits are defined in the header file: PCRE_ANCHORED @@ -248,10 +266,10 @@ When a pattern is going to be used several times, it is worth spending more time analyzing it in order to speed up the time taken for matching. The function \fBpcre_study()\fR takes a pointer to a compiled pattern as its first -argument, and returns a pointer to a \fBpcre_extra\fR block (another \fBvoid\fR -typedef) containing additional information about the pattern; this can be -passed to \fBpcre_exec()\fR. If no additional information is available, NULL -is returned. +argument, and returns a pointer to a \fBpcre_extra\fR block (another typedef +for a structure with hidden contents) containing additional information about +the pattern; this can be passed to \fBpcre_exec()\fR. If no additional +information is available, NULL is returned. The second argument contains option bits. At present, no options are defined for \fBpcre_study()\fR, and this argument should always be zero. @@ -260,6 +278,14 @@ studying succeeds (even if no data is returned), the variable it points to is set to NULL. Otherwise it points to a textual error message. +This is a typical call to \fBpcre_study\fR(): + + pcre_extra *pe; + pe = pcre_study( + re, /* result of pcre_compile() */ + 0, /* no options exist */ + &error); /* set to NULL or points to a message */ + At present, studying a pattern is useful only for non-anchored patterns that do not have a single fixed starting character. A bitmap of possible starting characters is created. @@ -309,13 +335,24 @@ PCRE_ERROR_BADMAGIC the "magic number" was not found PCRE_ERROR_BADOPTION the value of \fIwhat\fR was invalid +Here is a typical call of \fBpcre_fullinfo()\fR, to obtain the length of the +compiled pattern: + + int rc; + unsigned long int length; + rc = pcre_fullinfo( + re, /* result of pcre_compile() */ + pe, /* result of pcre_study(), or NULL */ + PCRE_INFO_SIZE, /* what is required */ + &length); /* where to put the data */ + The possible values for the third argument are defined in \fBpcre.h\fR, and are as follows: PCRE_INFO_OPTIONS Return a copy of the options with which the pattern was compiled. The fourth -argument should point to au \fBunsigned long int\fR variable. These option bits +argument should point to an \fBunsigned long int\fR variable. These option bits are those specified in the call to \fBpcre_compile()\fR, modified by any top-level option settings within the pattern itself, and with the PCRE_ANCHORED bit forcibly set if the form of the pattern implies that it can match only at @@ -396,6 +433,20 @@ pattern has been studied, the result of the study should be passed in the \fIextra\fR argument. Otherwise this must be NULL. +Here is an example of a simple call to \fBpcre_exec()\fR: + + int rc; + int ovector[30]; + rc = pcre_exec( + re, /* result of pcre_compile() */ + NULL, /* we didn't study the pattern */ + "some string", /* the subject string */ + 11, /* the length of the subject string */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + ovector, /* vector for substring information */ + 30); /* number of elements in the vector */ + The PCRE_ANCHORED option can be passed in the \fIoptions\fR argument, whose unused bits must be zero. However, if a pattern was compiled with PCRE_ANCHORED, or turned out to be anchored by virtue of its contents, it @@ -437,9 +488,9 @@ The subject string is passed as a pointer in \fIsubject\fR, a length in \fIlength\fR, and a starting offset in \fIstartoffset\fR. Unlike the pattern -string, it may contain binary zero characters. When the starting offset is -zero, the search for a match starts at the beginning of the subject, and this -is by far the most common case. +string, the subject may contain binary zero characters. When the starting +offset is zero, the search for a match starts at the beginning of the subject, +and this is by far the most common case. A non-zero starting offset is useful when searching for another match in the same subject by calling \fBpcre_exec()\fR again after a previous success. @@ -626,8 +677,9 @@ practice be relevant. The maximum length of a compiled pattern is 65539 (sic) bytes. All values in repeating quantifiers must be less than 65536. -The maximum number of capturing subpatterns is 99. -The maximum number of all parenthesized subpatterns, including capturing +There maximum number of capturing subpatterns is 65535. +There is no limit to the number of non-capturing subpatterns, but the maximum +depth of nesting of all kinds of parenthesized subpattern, including capturing subpatterns, assertions, and other types of subpattern, is 200. The maximum length of a subject string is the largest positive number that an @@ -949,7 +1001,7 @@ Note that the sequences \\A, \\Z, and \\z can be used to match the start and end of the subject in both modes, and if all branches of a pattern start with -\\A is it always anchored, whether PCRE_MULTILINE is set or not. +\\A it is always anchored, whether PCRE_MULTILINE is set or not. .SH FULL STOP (PERIOD, DOT) @@ -1053,7 +1105,7 @@ [12[:^digit:]] -matches "1", "2", or any non-digit. PCRE (and Perl) also recogize the POSIX +matches "1", "2", or any non-digit. PCRE (and Perl) also recognize the POSIX syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not supported, and an error is given if they are encountered. @@ -1151,7 +1203,7 @@ the ((red|white) (king|queen)) the captured substrings are "red king", "red", and "king", and are numbered 1, -2, and 3. +2, and 3, respectively. The fact that plain parentheses fulfil two functions is not always helpful. There are often times when a grouping subpattern is required without a @@ -1792,6 +1844,137 @@ 2. The use of Unicode tables and properties and escapes \\p, \\P, and \\X. + +.SH SAMPLE PROGRAM +The code below is a simple, complete demonstration program, to get you started +with using PCRE. This code is also supplied in the file \fIpcredemo.c\fR in the +PCRE distribution. + +The program compiles the regular expression that is its first argument, and +matches it against the subject string in its second argument. No options are +set, and default character tables are used. If matching succeeds, the program +outputs the portion of the subject that matched, together with the contents of +any captured substrings. + +On a Unix system that has PCRE installed in \fI/usr/local\fR, you can compile +the demonstration program using a command like this: + + gcc -o pcredemo pcredemo.c -I/usr/local/include -L/usr/local/lib -lpcre + +Then you can run simple tests like this: + + ./pcredemo 'cat|dog' 'the cat sat on the mat' + +Note that there is a much more comprehensive test program, called +\fBpcretest\fR, which supports many more facilities for testing regular +expressions. The \fBpcredemo\fR program is provided as a simple coding example. + +On some operating systems (e.g. Solaris) you may get an error like this when +you try to run \fBpcredemo\fR: + + ld.so.1: a.out: fatal: libpcre.so.0: open failed: No such file or directory + +This is caused by the way shared library support works on those systems. You +need to add + + -R/usr/local/lib + +to the compile command to get round this problem. Here's the code: + + #include <stdio.h> + #include <string.h> + #include <pcre.h> + + #define OVECCOUNT 30 /* should be a multiple of 3 */ + + int main(int argc, char **argv) + { + pcre *re; + const char *error; + int erroffset; + int ovector[OVECCOUNT]; + int rc, i; + + if (argc != 3) + { + printf("Two arguments required: a regex and a " + "subject string\\n"); + return 1; + } + + /* Compile the regular expression in the first argument */ + + re = pcre_compile( + argv[1], /* the pattern */ + 0, /* default options */ + &error, /* for error message */ + &erroffset, /* for error offset */ + NULL); /* use default character tables */ + + /* Compilation failed: print the error message and exit */ + + if (re == NULL) + { + printf("PCRE compilation failed at offset %d: %s\\n", + erroffset, error); + return 1; + } + + /* Compilation succeeded: match the subject in the second + argument */ + + rc = pcre_exec( + re, /* the compiled pattern */ + NULL, /* we didn't study the pattern */ + argv[2], /* the subject string */ + (int)strlen(argv[2]), /* the length of the subject */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + ovector, /* vector for substring information */ + OVECCOUNT); /* number of elements in the vector */ + + /* Matching failed: handle error cases */ + + if (rc < 0) + { + switch(rc) + { + case PCRE_ERROR_NOMATCH: printf("No match\\n"); break; + /* + Handle other special cases if you like + */ + default: printf("Matching error %d\\n", rc); break; + } + return 1; + } + + /* Match succeded */ + + printf("Match succeeded\\n"); + + /* The output vector wasn't big enough */ + + if (rc == 0) + { + rc = OVECCOUNT/3; + printf("ovector only has room for %d captured " + substrings\\n", rc - 1); + } + + /* Show substrings stored in the output vector */ + + for (i = 0; i < rc; i++) + { + char *substring_start = argv[2] + ovector[2*i]; + int substring_length = ovector[2*i+1] - ovector[2*i]; + printf("%2d: %.*s\\n", i, substring_length, + substring_start); + } + + return 0; + } + + .SH AUTHOR Philip Hazel <ph10@cam.ac.uk> .br @@ -1803,8 +1986,6 @@ .br Phone: +44 1223 334714 -Last updated: 28 August 2000, -.br - the 250th anniversary of the death of J.S. Bach. +Last updated: 15 August 2001 .br -Copyright (c) 1997-2000 University of Cambridge. +Copyright (c) 1997-2001 University of Cambridge.

ossp-pkg/pcre/pcre.c 1.7 -> 1.8

--- pcre.c 2000/08/29 19:24:17 1.7 +++ pcre.c 2002/01/07 15:21:06 1.8 @@ -9,7 +9,7 @@ Written by: Philip Hazel <ph10@cam.ac.uk> - Copyright (c) 1997-2000 University of Cambridge + Copyright (c) 1997-2001 University of Cambridge ----------------------------------------------------------------------------- Permission is granted to anyone to use this software for any purpose on any @@ -60,8 +60,11 @@ #endif -/* Number of items on the nested bracket stacks at compile time. This should -not be set greater than 200. */ +/* Maximum number of items on the nested bracket stacks at compile time. This +applies to the nesting of all kinds of parentheses. It does not limit +un-nested, non-capturing parentheses. This number can be made bigger if +necessary - it is used to dimension one int and one unsigned char vector at +compile time. */ #define BRASTACK_SIZE 200 @@ -95,7 +98,7 @@ "class", "Ref", "Recurse", "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref", - "Brazero", "Braminzero", "Bra" + "Brazero", "Braminzero", "Branumber", "Bra" }; #endif @@ -111,9 +114,9 @@ 0, 0, 0, 0, 0, 0, 0, 0, /* H - O */ 0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */ 0, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */ - '`', 7, -ESC_b, 0, -ESC_d, 27, '\f', 0, /* ` - g */ - 0, 0, 0, 0, 0, 0, '\n', 0, /* h - o */ - 0, 0, '\r', -ESC_s, '\t', 0, 0, -ESC_w, /* p - w */ + '`', 7, -ESC_b, 0, -ESC_d, ESC_E, ESC_F, 0, /* ` - g */ + 0, 0, 0, 0, 0, 0, ESC_N, 0, /* h - o */ + 0, 0, ESC_R, -ESC_s, ESC_T, 0, 0, -ESC_w, /* p - w */ 0, 0, -ESC_z /* x - z */ }; @@ -208,12 +211,12 @@ if (md->utf8 && (c & 0xc0) == 0xc0) \ { \ int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ - int s = 6 - a; /* Amount to shift next byte */ \ - c &= utf8_table3[a]; /* Low order bits from first byte */ \ + int s = 6*a; \ + c = (c & utf8_table3[a]) << s; \ while (a-- > 0) \ { \ + s -= 6; \ c |= (*eptr++ & 0x3f) << s; \ - s += 6; \ } \ } @@ -224,14 +227,14 @@ len = 1; \ if (md->utf8 && (c & 0xc0) == 0xc0) \ { \ - int _i; \ + int i; \ int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ - int s = 6 - a; /* Amount to shift next byte */ \ - c &= utf8_table3[a]; /* Low order bits from first byte */ \ - for (_i = 1; _i <= a; _i++) \ + int s = 6*a; \ + c = (c & utf8_table3[a]) << s; \ + for (i = 1; i <= a; i++) \ { \ + s -= 6; \ c |= (eptr[i] & 0x3f) << s; \ - s += 6; \ } \ len += a; \ } @@ -258,6 +261,7 @@ #include "pcre_chartables.c" + #ifdef SUPPORT_UTF8 /************************************************* * Tables for UTF-8 support * @@ -305,13 +309,13 @@ register int i, j; for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++) if (cvalue <= utf8_table1[i]) break; -*buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]); -cvalue >>= 6 - i; -for (j = 0; j < i; j++) - { - *buffer++ = 0x80 | (cvalue & 0x3f); - cvalue >>= 6; - } +buffer += i; +for (j = i; j > 0; j--) + { + *buffer-- = 0x80 | (cvalue & 0x3f); + cvalue >>= 6; + } +*buffer = utf8_table2[i] | cvalue; return i + 1; } #endif @@ -813,10 +817,11 @@ /* Skip over things that don't match chars */ case OP_REVERSE: + case OP_BRANUMBER: + case OP_CREF: cc++; /* Fall through */ - case OP_CREF: case OP_OPT: cc++; /* Fall through */ @@ -870,7 +875,7 @@ /* Check a class for variable quantification */ case OP_CLASS: - cc += (*cc == OP_REF)? 2 : 33; + cc += 33; switch (*cc) { @@ -977,7 +982,7 @@ Arguments: options the option bits - brackets points to number of brackets used + brackets points to number of extracting brackets used code points to the pointer to the current code point ptrptr points to the current pattern pointer errorptr points to pointer to error message @@ -1028,7 +1033,7 @@ int class_charcount; int class_lastchar; int newoptions; - int condref; + int skipbytes; int subreqchar; c = *ptr; @@ -1577,7 +1582,7 @@ OP_BRAZERO in front of it, and because the group appears once in the data, whereas in other cases it appears the minimum number of times. For this reason, it is simplest to treat this case separately, as otherwise - the code gets far too mess. There are several special subcases when the + the code gets far too messy. There are several special subcases when the minimum is zero. */ if (repeat_min == 0) @@ -1728,7 +1733,7 @@ case '(': newoptions = options; - condref = -1; + skipbytes = 0; if (*(++ptr) == '?') { @@ -1751,7 +1756,7 @@ bravalue = OP_COND; /* Conditional group */ if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0) { - condref = *ptr - '0'; + int condref = *ptr - '0'; while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; if (condref == 0) { @@ -1759,6 +1764,10 @@ goto FAILED; } ptr++; + code[3] = OP_CREF; + code[4] = condref >> 8; + code[5] = condref & 255; + skipbytes = 3; } else ptr--; break; @@ -1861,16 +1870,21 @@ } } - /* Else we have a referencing group; adjust the opcode. */ + /* Else we have a referencing group; adjust the opcode. If the bracket + number is greater than EXTRACT_BASIC_MAX, we set the opcode one higher, and + arrange for the true number to follow later, in an OP_BRANUMBER item. */ else { - if (++(*brackets) > EXTRACT_MAX) + if (++(*brackets) > EXTRACT_BASIC_MAX) { - *errorptr = ERR13; - goto FAILED; + bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1; + code[3] = OP_BRANUMBER; + code[4] = *brackets >> 8; + code[5] = *brackets & 255; + skipbytes = 3; } - bravalue = OP_BRA + *brackets; + else bravalue = OP_BRA + *brackets; } /* Process nested bracketed re. Assertions may not be repeated, but other @@ -1886,13 +1900,13 @@ options | PCRE_INGROUP, /* Set for all nested groups */ ((options & PCRE_IMS) != (newoptions & PCRE_IMS))? newoptions & PCRE_IMS : -1, /* Pass ims options if changed */ - brackets, /* Bracket level */ + brackets, /* Extracting bracket count */ &tempcode, /* Where to put code (updated) */ &ptr, /* Input pointer (updated) */ errorptr, /* Where to put an error message */ (bravalue == OP_ASSERTBACK || bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ - condref, /* Condition reference number */ + skipbytes, /* Skip over OP_COND/OP_BRANUMBER */ &subreqchar, /* For possible last char */ &subcountlits, /* For literal count */ cd)) /* Tables block */ @@ -1906,7 +1920,7 @@ /* If this is a conditional bracket, check that there are no more than two branches in the group. */ - if (bravalue == OP_COND) + else if (bravalue == OP_COND) { uschar *tc = code; condcount = 0; @@ -1973,9 +1987,11 @@ { if (-c >= ESC_REF) { + int number = -c - ESC_REF; previous = code; *code++ = OP_REF; - *code++ = -c - ESC_REF; + *code++ = number >> 8; + *code++ = number & 255; } else { @@ -2099,7 +2115,7 @@ ptrptr -> the address of the current pattern pointer errorptr -> pointer to error message lookbehind TRUE if this is a lookbehind assertion - condref >= 0 for OPT_CREF setting at start of conditional group + skipbytes skip this many bytes at start (for OP_COND, OP_BRANUMBER) reqchar -> place to put the last required character, or a negative number countlits -> place to put the shortest literal count of any branch cd points to the data block with tables pointers @@ -2109,7 +2125,7 @@ static BOOL compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, - const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref, + const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int skipbytes, int *reqchar, int *countlits, compile_data *cd) { const uschar *ptr = *ptrptr; @@ -2122,16 +2138,7 @@ *reqchar = -1; *countlits = INT_MAX; -code += 3; - -/* At the start of a reference-based conditional group, insert the reference -number as an OP_CREF item. */ - -if (condref >= 0) - { - *code++ = OP_CREF; - *code++ = condref; - } +code += 3 + skipbytes; /* Loop for each alternative branch */ @@ -2283,7 +2290,8 @@ break; case OP_CREF: - code += 2; + case OP_BRANUMBER: + code += 3; break; case OP_WORD_BOUNDARY: @@ -2546,6 +2554,7 @@ { int min, max; int class_charcount; + int bracket_length; if ((options & PCRE_EXTENDED) != 0) { @@ -2580,7 +2589,7 @@ } length++; - /* A back reference needs an additional char, plus either one or 5 + /* A back reference needs an additional 2 bytes, plus either one or 5 bytes for a repeat. We also need to keep the value of the highest back reference. */ @@ -2588,7 +2597,7 @@ { int refnum = -c - ESC_REF; if (refnum > top_backref) top_backref = refnum; - length++; /* For single back reference */ + length += 2; /* For single back reference */ if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) { ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); @@ -2686,6 +2695,7 @@ case '(': branch_newextra = 0; + bracket_length = 3; /* Handle special forms of bracket, which all start (? */ @@ -2753,7 +2763,7 @@ if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0) { ptr += 4; - length += 2; + length += 3; while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++; if (*ptr != ')') { @@ -2880,15 +2890,19 @@ } /* Extracting brackets must be counted so we can process escapes in a - Perlish way. */ + Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to + need an additional 3 bytes of store per extracting bracket. */ - else bracount++; + else + { + bracount++; + if (bracount > EXTRACT_BASIC_MAX) bracket_length += 3; + } - /* Non-special forms of bracket. Save length for computing whole length - at end if there's a repeat that requires duplication of the group. Also - save the current value of branch_extra, and start the new group with - the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3 - for a lookbehind assertion. */ + /* Save length for computing whole length at end if there's a repeat that + requires duplication of the group. Also save the current value of + branch_extra, and start the new group with the new value. If non-zero, this + will either be 2 for a (?imsx: group, or 3 for a lookbehind assertion. */ if (brastackptr >= sizeof(brastack)/sizeof(int)) { @@ -2900,7 +2914,7 @@ branch_extra = branch_newextra; brastack[brastackptr++] = length; - length += 3; + length += bracket_length; continue; /* Handle ket. Look for subsequent max/min; for certain sets of values we @@ -3061,7 +3075,7 @@ code = re->code; *code = OP_BRA; bracount = 0; -(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1, +(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, 0, &reqchar, &countlits, &compile_block); re->top_bracket = bracount; re->top_backref = top_backref; @@ -3175,7 +3189,10 @@ if (*code >= OP_BRA) { - printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA); + if (*code - OP_BRA > EXTRACT_BASIC_MAX) + printf("%3d Bra extra", (code[1] << 8) + code[2]); + else + printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA); code += 2; } @@ -3186,16 +3203,6 @@ code++; break; - case OP_COND: - printf("%3d Cond", (code[1] << 8) + code[2]); - code += 2; - break; - - case OP_CREF: - printf(" %.2d %s", code[1], OP_names[*code]); - code++; - break; - case OP_CHARS: charlength = *(++code); printf("%3d ", charlength); @@ -3212,11 +3219,10 @@ case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ONCE: - printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]); - code += 2; - break; - case OP_REVERSE: + case OP_BRANUMBER: + case OP_COND: + case OP_CREF: printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]); code += 2; break; @@ -3289,8 +3295,8 @@ break; case OP_REF: - printf(" \\%d", *(++code)); - code ++; + printf(" \\%d", (code[1] << 8) | code[2]); + code += 3; goto CLASS_REF_REPEAT; case OP_CLASS: @@ -3503,8 +3509,14 @@ if (op > OP_BRA) { + int offset; int number = op - OP_BRA; - int offset = number << 1; + + /* For extended extraction brackets (large number), we have to fish out the + number from a dummy opcode at the start. */ + + if (number > EXTRACT_BASIC_MAX) number = (ecode[4] << 8) | ecode[5]; + offset = number << 1; #ifdef DEBUG printf("start bracket %d subject=", number); @@ -3534,6 +3546,7 @@ md->offset_vector[offset] = save_offset1; md->offset_vector[offset+1] = save_offset2; md->offset_vector[md->offset_end - number] = save_offset3; + return FALSE; } @@ -3566,10 +3579,10 @@ case OP_COND: if (ecode[3] == OP_CREF) /* Condition is extraction test */ { - int offset = ecode[4] << 1; /* Doubled reference number */ + int offset = (ecode[4] << 9) | (ecode[5] << 1); /* Doubled ref number */ return match(eptr, ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)? - 5 : 3 + (ecode[1] << 8) + ecode[2]), + 6 : 3 + (ecode[1] << 8) + ecode[2]), offset_top, md, ims, eptrb, match_isgroup); } @@ -3589,10 +3602,12 @@ } /* Control never reaches here */ - /* Skip over conditional reference data if encountered (should not be) */ + /* Skip over conditional reference or large extraction number data if + encountered. */ case OP_CREF: - ecode += 2; + case OP_BRANUMBER: + ecode += 3; break; /* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched @@ -3858,8 +3873,14 @@ if (*prev != OP_COND) { + int offset; int number = *prev - OP_BRA; - int offset = number << 1; + + /* For extended extraction brackets (large number), we have to fish out + the number from a dummy opcode at the start. */ + + if (number > EXTRACT_BASIC_MAX) number = (prev[4] << 8) | prev[5]; + offset = number << 1; #ifdef DEBUG printf("end bracket %d", number); @@ -4053,8 +4074,8 @@ case OP_REF: { int length; - int offset = ecode[1] << 1; /* Doubled reference number */ - ecode += 2; /* Advance past the item */ + int offset = (ecode[1] << 9) | (ecode[2] << 1); /* Doubled ref number */ + ecode += 3; /* Advance past item */ /* If the reference is unset, set the length to be longer than the amount of subject left; this ensures that every attempt at a match fails. We @@ -4878,8 +4899,8 @@ const real_pcre *re = (const real_pcre *)external_re; const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; BOOL using_temporary_offsets = FALSE; -BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; -BOOL startline = (re->options & PCRE_STARTLINE) != 0; +BOOL anchored; +BOOL startline; if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; @@ -4887,6 +4908,9 @@ (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; +anchored = ((re->options | options) & PCRE_ANCHORED) != 0; +startline = (re->options & PCRE_STARTLINE) != 0; + match_block.start_pattern = re->code; match_block.start_subject = (const uschar *)subject; match_block.end_subject = match_block.start_subject + length; @@ -5120,7 +5144,7 @@ rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2; - if (match_block.offset_end < 2) rc = 0; else + if (offsetcount < 2) rc = 0; else { offsets[0] = start_match - match_block.start_subject; offsets[1] = match_block.end_match_ptr - match_block.start_subject;

ossp-pkg/pcre/pcre.h 1.7 -> 1.8

--- pcre.h 2000/08/29 19:24:17 1.7 +++ pcre.h 2002/01/07 15:21:06 1.8 @@ -2,14 +2,14 @@ * Perl-Compatible Regular Expressions * *************************************************/ -/* Copyright (c) 1997-2000 University of Cambridge */ +/* Copyright (c) 1997-2001 University of Cambridge */ #ifndef _PCRE_H #define _PCRE_H #define PCRE_MAJOR 3 -#define PCRE_MINOR 4 -#define PCRE_DATE 22-Aug-2000 +#define PCRE_MINOR 8 +#define PCRE_DATE 18-Dec-2001 #define __PCRE_STRING(a) #a #define __PCRE_XSTRING(s) _STRING(s) @@ -77,8 +77,11 @@ /* Types */ -typedef void pcre; -typedef void pcre_extra; +struct real_pcre; /* declaration; the definition is private */ +struct real_pcre_extra; /* declaration; the definition is private */ + +typedef struct real_pcre pcre; +typedef struct real_pcre_extra pcre_extra; /* Store get and free functions. These can be set to alternative malloc/free functions if required. Some magic is required for Win32 DLL; it is null on @@ -102,7 +105,7 @@ extern int pcre_get_substring_list(const char *, int *, int, const char ***); extern int pcre_info(const pcre *, int *, int *); extern int pcre_fullinfo(const pcre *, const pcre_extra *, int, void *); -extern unsigned const char *pcre_maketables(void); +extern const unsigned char *pcre_maketables(void); extern pcre_extra *pcre_study(const pcre *, int, const char **); extern const char *pcre_version(void); extern int pcre_match(const char *, const char *, ...);

ossp-pkg/pcre/pcre_dftables.c -> 1.4

*** /dev/null Thu Apr 17 04:32:15 2025 --- - Thu Apr 17 04:32:42 2025 *************** *** 0 **** --- 1,148 ---- + /************************************************* + * Perl-Compatible Regular Expressions * + *************************************************/ + + /* + PCRE is a library of functions to support regular expressions whose syntax + and semantics are as close as possible to those of the Perl 5 language. + + Written by: Philip Hazel <ph10@cam.ac.uk> + + Copyright (c) 1997-2001 University of Cambridge + + ----------------------------------------------------------------------------- + Permission is granted to anyone to use this software for any purpose on any + computer system, and to redistribute it freely, subject to the following + restrictions: + + 1. This software is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + 2. The origin of this software must not be misrepresented, either by + explicit claim or by omission. + + 3. Altered versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 4. If PCRE is embedded in any software that is released under the GNU + General Purpose Licence (GPL), then the terms of that licence shall + supersede any condition above with which it is incompatible. + ----------------------------------------------------------------------------- + + See the file Tech.Notes for some information on the internals. + */ + + + /* This is a support program to generate the file chartables.c, containing + character tables of various kinds. They are built according to the default C + locale and used as the default tables by PCRE. Now that pcre_maketables is + a function visible to the outside world, we make use of its code from here in + order to be consistent. */ + + #include <ctype.h> + #include <stdio.h> + #include <string.h> + + #include "pcre_internal.h" + + #define DFTABLES /* maketables.c notices this */ + #include "pcre_maketables.c" + + + int main(void) + { + int i; + const unsigned char *tables = pcre_maketables(); + + printf( + "/*************************************************\n" + "* Perl-Compatible Regular Expressions *\n" + "*************************************************/\n\n" + "/* This file is automatically written by the dftables auxiliary \n" + "program. If you edit it by hand, you might like to edit the Makefile to \n" + "prevent its ever being regenerated.\n\n" + "This file is #included in the compilation of pcre.c to build the default\n" + "character tables which are used when no tables are passed to the compile\n" + "function. */\n\n" + "static unsigned char pcre_default_tables[] = {\n\n" + "/* This table is a lower casing table. */\n\n"); + + printf(" "); + for (i = 0; i < 256; i++) + { + if ((i & 7) == 0 && i != 0) printf("\n "); + printf("%3d", *tables++); + if (i != 255) printf(","); + } + printf(",\n\n"); + + printf("/* This table is a case flipping table. */\n\n"); + + printf(" "); + for (i = 0; i < 256; i++) + { + if ((i & 7) == 0 && i != 0) printf("\n "); + printf("%3d", *tables++); + if (i != 255) printf(","); + } + printf(",\n\n"); + + printf( + "/* This table contains bit maps for various character classes.\n" + "Each map is 32 bytes long and the bits run from the least\n" + "significant end of each byte. The classes that have their own\n" + "maps are: space, xdigit, digit, upper, lower, word, graph\n" + "print, punct, and cntrl. Other classes are built from combinations. */\n\n"); + + printf(" "); + for (i = 0; i < cbit_length; i++) + { + if ((i & 7) == 0 && i != 0) + { + if ((i & 31) == 0) printf("\n"); + printf("\n "); + } + printf("0x%02x", *tables++); + if (i != cbit_length - 1) printf(","); + } + printf(",\n\n"); + + printf( + "/* This table identifies various classes of character by individual bits:\n" + " 0x%02x white space character\n" + " 0x%02x letter\n" + " 0x%02x decimal digit\n" + " 0x%02x hexadecimal digit\n" + " 0x%02x alphanumeric or '_'\n" + " 0x%02x regular expression metacharacter or binary zero\n*/\n\n", + ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word, + ctype_meta); + + printf(" "); + for (i = 0; i < 256; i++) + { + if ((i & 7) == 0 && i != 0) + { + printf(" /* "); + if (isprint(i-8)) printf(" %c -", i-8); + else printf("%3d-", i-8); + if (isprint(i-1)) printf(" %c ", i-1); + else printf("%3d", i-1); + printf(" */\n "); + } + printf("0x%02x", *tables++); + if (i != 255) printf(","); + } + + printf("};/* "); + if (isprint(i-8)) printf(" %c -", i-8); + else printf("%3d-", i-8); + if (isprint(i-1)) printf(" %c ", i-1); + else printf("%3d", i-1); + printf(" */\n\n/* End of chartables.c */\n"); + + return 0; + } + + /* End of dftables.c */

ossp-pkg/pcre/pcre_get.c 1.2 -> 1.3

--- pcre_get.c 2000/08/02 09:46:06 1.2 +++ pcre_get.c 2002/01/07 15:21:06 1.3 @@ -9,7 +9,7 @@ Written by: Philip Hazel <ph10@cam.ac.uk> - Copyright (c) 1997-2000 University of Cambridge + Copyright (c) 1997-2001 University of Cambridge ----------------------------------------------------------------------------- Permission is granted to anyone to use this software for any purpose on any

ossp-pkg/pcre/pcre_internal.h 1.6 -> 1.7

--- pcre_internal.h 2000/08/29 19:24:17 1.6 +++ pcre_internal.h 2002/01/07 15:21:06 1.7 @@ -9,7 +9,7 @@ Written by: Philip Hazel <ph10@cam.ac.uk> - Copyright (c) 1997-2000 University of Cambridge + Copyright (c) 1997-2001 University of Cambridge ----------------------------------------------------------------------------- Permission is granted to anyone to use this software for any purpose on any @@ -123,12 +123,36 @@ #define FALSE 0 #define TRUE 1 +/* Escape items that are just an encoding of a particular data value. Note that +ESC_N is defined as yet another macro, which is set in config.h to either \n +(the default) or \r (which some people want). */ + +#ifndef ESC_E +#define ESC_E 27 +#endif + +#ifndef ESC_F +#define ESC_F '\f' +#endif + +#ifndef ESC_N +#define ESC_N '\n' +#endif + +#ifndef ESC_R +#define ESC_R '\r' +#endif + +#ifndef ESC_T +#define ESC_T '\t' +#endif + /* These are escaped items that aren't just an encoding of a particular data value such as \n. They must have non-zero values, as check_escape() returns their negation. Also, they must appear in the same order as in the opcode definitions below, up to ESC_z. The final one must be ESC_REF as subsequent values are used for \1, \2, \3, etc. There is a test in the code for an escape -greater than ESC_b and less than ESC_X to detect the types that may be +greater than ESC_b and less than ESC_Z to detect the types that may be repeated. If any new escapes are put in-between that don't consume a character, that code will have to change. */ @@ -224,19 +248,26 @@ OP_ONCE, /* Once matched, don't back up into the subpattern */ OP_COND, /* Conditional group */ - OP_CREF, /* Used to hold an extraction string number */ + OP_CREF, /* Used to hold an extraction string number (cond ref) */ OP_BRAZERO, /* These two must remain together and in this */ OP_BRAMINZERO, /* order. */ + OP_BRANUMBER, /* Used for extracting brackets whose number is greater + than can fit into an opcode. */ + OP_BRA /* This and greater values are used for brackets that - extract substrings. */ + extract substrings up to a basic limit. After that, + use is made of OP_BRANUMBER. */ }; -/* The highest extraction number. This is limited by the number of opcodes -left after OP_BRA, i.e. 255 - OP_BRA. We actually set it somewhat lower. */ +/* The highest extraction number before we have to start using additional +bytes. (Originally PCRE didn't have support for extraction counts highter than +this number.) The value is limited by the number of opcodes left after OP_BRA, +i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional +opcodes. */ -#define EXTRACT_MAX 99 +#define EXTRACT_BASIC_MAX 150 /* The texts of compile-time error messages are defined as macros here so that they can be accessed by the POSIX wrapper and converted into error codes. Yes, @@ -255,13 +286,13 @@ #define ERR10 "operand of unlimited repeat could match the empty string" #define ERR11 "internal error: unexpected repeat" #define ERR12 "unrecognized character after (?" -#define ERR13 "too many capturing parenthesized sub-patterns" +#define ERR13 "unused error" #define ERR14 "missing )" #define ERR15 "back reference to non-existent subpattern" #define ERR16 "erroffset passed as NULL" #define ERR17 "unknown option bit(s) set" #define ERR18 "missing ) after comment" -#define ERR19 "too many sets of parentheses" +#define ERR19 "parentheses nested too deeply" #define ERR20 "regular expression too large" #define ERR21 "failed to get memory" #define ERR22 "unmatched parentheses" @@ -296,8 +327,8 @@ size_t size; const unsigned char *tables; unsigned long int options; - uschar top_bracket; - uschar top_backref; + unsigned short int top_bracket; + unsigned short int top_backref; uschar first_char; uschar req_char; uschar code[1];

ossp-pkg/pcre/pcre_maketables.c -> 1.3

*** /dev/null Thu Apr 17 04:32:15 2025 --- - Thu Apr 17 04:32:42 2025 *************** *** 0 **** --- 1,132 ---- + /************************************************* + * Perl-Compatible Regular Expressions * + *************************************************/ + + /* + PCRE is a library of functions to support regular expressions whose syntax + and semantics are as close as possible to those of the Perl 5 language. + + Written by: Philip Hazel <ph10@cam.ac.uk> + + Copyright (c) 1997-2001 University of Cambridge + + ----------------------------------------------------------------------------- + Permission is granted to anyone to use this software for any purpose on any + computer system, and to redistribute it freely, subject to the following + restrictions: + + 1. This software is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + 2. The origin of this software must not be misrepresented, either by + explicit claim or by omission. + + 3. Altered versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 4. If PCRE is embedded in any software that is released under the GNU + General Purpose Licence (GPL), then the terms of that licence shall + supersede any condition above with which it is incompatible. + ----------------------------------------------------------------------------- + + See the file Tech.Notes for some information on the internals. + */ + + + /* This file is compiled on its own as part of the PCRE library. However, + it is also included in the compilation of dftables.c, in which case the macro + DFTABLES is defined. */ + + #ifndef DFTABLES + #include "pcre_internal.h" + #endif + + + + /************************************************* + * Create PCRE character tables * + *************************************************/ + + /* This function builds a set of character tables for use by PCRE and returns + a pointer to them. They are build using the ctype functions, and consequently + their contents will depend upon the current locale setting. When compiled as + part of the library, the store is obtained via pcre_malloc(), but when compiled + inside dftables, use malloc(). + + Arguments: none + Returns: pointer to the contiguous block of data + */ + + const unsigned char * + pcre_maketables(void) + { + unsigned char *yield, *p; + int i; + + #ifndef DFTABLES + yield = (unsigned char*)(pcre_malloc)(tables_length); + #else + yield = (unsigned char*)malloc(tables_length); + #endif + + if (yield == NULL) return NULL; + p = yield; + + /* First comes the lower casing table */ + + for (i = 0; i < 256; i++) *p++ = tolower(i); + + /* Next the case-flipping table */ + + for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i); + + /* Then the character class tables. Don't try to be clever and save effort + on exclusive ones - in some locales things may be different. */ + + memset(p, 0, cbit_length); + for (i = 0; i < 256; i++) + { + if (isdigit(i)) + { + p[cbit_digit + i/8] |= 1 << (i&7); + p[cbit_word + i/8] |= 1 << (i&7); + } + if (isupper(i)) + { + p[cbit_upper + i/8] |= 1 << (i&7); + p[cbit_word + i/8] |= 1 << (i&7); + } + if (islower(i)) + { + p[cbit_lower + i/8] |= 1 << (i&7); + p[cbit_word + i/8] |= 1 << (i&7); + } + if (i == '_') p[cbit_word + i/8] |= 1 << (i&7); + if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7); + if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7); + if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7); + if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7); + if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7); + if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7); + } + p += cbit_length; + + /* Finally, the character type table */ + + for (i = 0; i < 256; i++) + { + int x = 0; + if (isspace(i)) x += ctype_space; + if (isalpha(i)) x += ctype_letter; + if (isdigit(i)) x += ctype_digit; + if (isxdigit(i)) x += ctype_xdigit; + if (isalnum(i) || i == '_') x += ctype_word; + if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta; + *p++ = x; + } + + return yield; + } + + /* End of maketables.c */

ossp-pkg/pcre/pcre_study.c -> 1.3

*** /dev/null Thu Apr 17 04:32:15 2025 --- - Thu Apr 17 04:32:42 2025 *************** *** 0 **** --- 1,401 ---- + /************************************************* + * Perl-Compatible Regular Expressions * + *************************************************/ + + /* + This is a library of functions to support regular expressions whose syntax + and semantics are as close as possible to those of the Perl 5 language. See + the file Tech.Notes for some information on the internals. + + Written by: Philip Hazel <ph10@cam.ac.uk> + + Copyright (c) 1997-2001 University of Cambridge + + ----------------------------------------------------------------------------- + Permission is granted to anyone to use this software for any purpose on any + computer system, and to redistribute it freely, subject to the following + restrictions: + + 1. This software is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + 2. The origin of this software must not be misrepresented, either by + explicit claim or by omission. + + 3. Altered versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 4. If PCRE is embedded in any software that is released under the GNU + General Purpose Licence (GPL), then the terms of that licence shall + supersede any condition above with which it is incompatible. + ----------------------------------------------------------------------------- + */ + + + /* Include the internals header, which itself includes Standard C headers plus + the external pcre header. */ + + #include "pcre_internal.h" + + + + /************************************************* + * Set a bit and maybe its alternate case * + *************************************************/ + + /* Given a character, set its bit in the table, and also the bit for the other + version of a letter if we are caseless. + + Arguments: + start_bits points to the bit map + c is the character + caseless the caseless flag + cd the block with char table pointers + + Returns: nothing + */ + + static void + set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd) + { + start_bits[c/8] |= (1 << (c&7)); + if (caseless && (cd->ctypes[c] & ctype_letter) != 0) + start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7)); + } + + + + /************************************************* + * Create bitmap of starting chars * + *************************************************/ + + /* This function scans a compiled unanchored expression and attempts to build a + bitmap of the set of initial characters. If it can't, it returns FALSE. As time + goes by, we may be able to get more clever at doing this. + + Arguments: + code points to an expression + start_bits points to a 32-byte table, initialized to 0 + caseless the current state of the caseless flag + cd the block with char table pointers + + Returns: TRUE if table built, FALSE otherwise + */ + + static BOOL + set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless, + compile_data *cd) + { + register int c; + + /* This next statement and the later reference to dummy are here in order to + trick the optimizer of the IBM C compiler for OS/2 into generating correct + code. Apparently IBM isn't going to fix the problem, and we would rather not + disable optimization (in this module it actually makes a big difference, and + the pcre module can use all the optimization it can get). */ + + volatile int dummy; + + do + { + const uschar *tcode = code + 3; + BOOL try_next = TRUE; + + while (try_next) + { + /* If a branch starts with a bracket or a positive lookahead assertion, + recurse to set bits from within them. That's all for this branch. */ + + if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT) + { + if (!set_start_bits(tcode, start_bits, caseless, cd)) + return FALSE; + try_next = FALSE; + } + + else switch(*tcode) + { + default: + return FALSE; + + /* Skip over extended extraction bracket number */ + + case OP_BRANUMBER: + tcode += 3; + break; + + /* Skip over lookbehind and negative lookahead assertions */ + + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT); + tcode += 3; + break; + + /* Skip over an option setting, changing the caseless flag */ + + case OP_OPT: + caseless = (tcode[1] & PCRE_CASELESS) != 0; + tcode += 2; + break; + + /* BRAZERO does the bracket, but carries on. */ + + case OP_BRAZERO: + case OP_BRAMINZERO: + if (!set_start_bits(++tcode, start_bits, caseless, cd)) + return FALSE; + dummy = 1; + do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT); + tcode += 3; + break; + + /* Single-char * or ? sets the bit and tries the next item */ + + case OP_STAR: + case OP_MINSTAR: + case OP_QUERY: + case OP_MINQUERY: + set_bit(start_bits, tcode[1], caseless, cd); + tcode += 2; + break; + + /* Single-char upto sets the bit and tries the next */ + + case OP_UPTO: + case OP_MINUPTO: + set_bit(start_bits, tcode[3], caseless, cd); + tcode += 4; + break; + + /* At least one single char sets the bit and stops */ + + case OP_EXACT: /* Fall through */ + tcode++; + + case OP_CHARS: /* Fall through */ + tcode++; + + case OP_PLUS: + case OP_MINPLUS: + set_bit(start_bits, tcode[1], caseless, cd); + try_next = FALSE; + break; + + /* Single character type sets the bits and stops */ + + case OP_NOT_DIGIT: + for (c = 0; c < 32; c++) + start_bits[c] |= ~cd->cbits[c+cbit_digit]; + try_next = FALSE; + break; + + case OP_DIGIT: + for (c = 0; c < 32; c++) + start_bits[c] |= cd->cbits[c+cbit_digit]; + try_next = FALSE; + break; + + case OP_NOT_WHITESPACE: + for (c = 0; c < 32; c++) + start_bits[c] |= ~cd->cbits[c+cbit_space]; + try_next = FALSE; + break; + + case OP_WHITESPACE: + for (c = 0; c < 32; c++) + start_bits[c] |= cd->cbits[c+cbit_space]; + try_next = FALSE; + break; + + case OP_NOT_WORDCHAR: + for (c = 0; c < 32; c++) + start_bits[c] |= ~cd->cbits[c+cbit_word]; + try_next = FALSE; + break; + + case OP_WORDCHAR: + for (c = 0; c < 32; c++) + start_bits[c] |= cd->cbits[c+cbit_word]; + try_next = FALSE; + break; + + /* One or more character type fudges the pointer and restarts, knowing + it will hit a single character type and stop there. */ + + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + tcode++; + break; + + case OP_TYPEEXACT: + tcode += 3; + break; + + /* Zero or more repeats of character types set the bits and then + try again. */ + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + tcode += 2; /* Fall through */ + + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + switch(tcode[1]) + { + case OP_NOT_DIGIT: + for (c = 0; c < 32; c++) + start_bits[c] |= ~cd->cbits[c+cbit_digit]; + break; + + case OP_DIGIT: + for (c = 0; c < 32; c++) + start_bits[c] |= cd->cbits[c+cbit_digit]; + break; + + case OP_NOT_WHITESPACE: + for (c = 0; c < 32; c++) + start_bits[c] |= ~cd->cbits[c+cbit_space]; + break; + + case OP_WHITESPACE: + for (c = 0; c < 32; c++) + start_bits[c] |= cd->cbits[c+cbit_space]; + break; + + case OP_NOT_WORDCHAR: + for (c = 0; c < 32; c++) + start_bits[c] |= ~cd->cbits[c+cbit_word]; + break; + + case OP_WORDCHAR: + for (c = 0; c < 32; c++) + start_bits[c] |= cd->cbits[c+cbit_word]; + break; + } + + tcode += 2; + break; + + /* Character class: set the bits and either carry on or not, + according to the repeat count. */ + + case OP_CLASS: + { + tcode++; + for (c = 0; c < 32; c++) start_bits[c] |= tcode[c]; + tcode += 32; + switch (*tcode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRQUERY: + case OP_CRMINQUERY: + tcode++; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5; + else try_next = FALSE; + break; + + default: + try_next = FALSE; + break; + } + } + break; /* End of class handling */ + + } /* End of switch */ + } /* End of try_next loop */ + + code += (code[1] << 8) + code[2]; /* Advance to next branch */ + } + while (*code == OP_ALT); + return TRUE; + } + + + + /************************************************* + * Study a compiled expression * + *************************************************/ + + /* This function is handed a compiled expression that it must study to produce + information that will speed up the matching. It returns a pcre_extra block + which then gets handed back to pcre_exec(). + + Arguments: + re points to the compiled expression + options contains option bits + errorptr points to where to place error messages; + set NULL unless error + + Returns: pointer to a pcre_extra block, + NULL on error or if no optimization possible + */ + + pcre_extra * + pcre_study(const pcre *external_re, int options, const char **errorptr) + { + uschar start_bits[32]; + real_pcre_extra *extra; + const real_pcre *re = (const real_pcre *)external_re; + compile_data compile_block; + + *errorptr = NULL; + + if (re == NULL || re->magic_number != MAGIC_NUMBER) + { + *errorptr = "argument is not a compiled regular expression"; + return NULL; + } + + if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) + { + *errorptr = "unknown or incorrect option bit(s) set"; + return NULL; + } + + /* For an anchored pattern, or an unchored pattern that has a first char, or a + multiline pattern that matches only at "line starts", no further processing at + present. */ + + if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0) + return NULL; + + /* Set the character tables in the block which is passed around */ + + compile_block.lcc = re->tables + lcc_offset; + compile_block.fcc = re->tables + fcc_offset; + compile_block.cbits = re->tables + cbits_offset; + compile_block.ctypes = re->tables + ctypes_offset; + + /* See if we can find a fixed set of initial characters for the pattern. */ + + memset(start_bits, 0, 32 * sizeof(uschar)); + if (!set_start_bits(re->code, start_bits, (re->options & PCRE_CASELESS) != 0, + &compile_block)) return NULL; + + /* Get an "extra" block and put the information therein. */ + + extra = (real_pcre_extra *)(pcre_malloc)(sizeof(real_pcre_extra)); + + if (extra == NULL) + { + *errorptr = "failed to get memory"; + return NULL; + } + + extra->options = PCRE_STUDY_MAPPED; + memcpy(extra->start_bits, start_bits, sizeof(start_bits)); + + return (pcre_extra *)extra; + } + + /* End of study.c */

ossp-pkg/pcre/pcre_test.c 1.5 -> 1.6

--- pcre_test.c 2000/08/02 09:46:06 1.5 +++ pcre_test.c 2002/01/07 15:21:06 1.6 @@ -73,13 +73,14 @@ if (cvalue <= utf8_table1[i]) break; if (i >= sizeof(utf8_table1)/sizeof(int)) return 0; if (cvalue < 0) return -1; -*buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]); -cvalue >>= 6 - i; -for (j = 0; j < i; j++) - { - *buffer++ = 0x80 | (cvalue & 0x3f); - cvalue >>= 6; - } + +buffer += i; +for (j = i; j > 0; j--) + { + *buffer-- = 0x80 | (cvalue & 0x3f); + cvalue >>= 6; + } +*buffer = utf8_table2[i] | cvalue; return i + 1; } @@ -99,7 +100,7 @@ -6 to 0 => malformed UTF-8 character at offset = (-return) */ -static int +int utf82ord(unsigned char *buffer, int *vptr) { int c = *buffer++; @@ -117,15 +118,15 @@ /* i now has a value in the range 1-5 */ -d = c & utf8_table3[i]; -s = 6 - i; +s = 6*i; +d = (c & utf8_table3[i]) << s; for (j = 0; j < i; j++) { c = *buffer++; if ((c & 0xc0) != 0x80) return -(j+1); + s -= 6; d |= (c & 0x3f) << s; - s += 6; } /* Check that encoding was the correct unique one */ @@ -159,7 +160,7 @@ "class", "Ref", "Recurse", "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref", - "Brazero", "Braminzero", "Bra" + "Brazero", "Braminzero", "Branumber", "Bra" }; @@ -178,7 +179,10 @@ if (*code >= OP_BRA) { - fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA); + if (*code - OP_BRA > EXTRACT_BASIC_MAX) + fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]); + else + fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA); code += 2; } @@ -194,16 +198,6 @@ code++; break; - case OP_COND: - fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]); - code += 2; - break; - - case OP_CREF: - fprintf(outfile, " %.2d %s", code[1], OP_names[*code]); - code++; - break; - case OP_CHARS: charlength = *(++code); fprintf(outfile, "%3d ", charlength); @@ -221,11 +215,10 @@ case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ONCE: - fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]); - code += 2; - break; - + case OP_COND: + case OP_BRANUMBER: case OP_REVERSE: + case OP_CREF: fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]); code += 2; break; @@ -298,8 +291,8 @@ break; case OP_REF: - fprintf(outfile, " \\%d", *(++code)); - code++; + fprintf(outfile, " \\%d", (code[1] << 8) | code[2]); + code += 3; goto CLASS_REF_REPEAT; case OP_CLASS: @@ -441,7 +434,12 @@ int timeit = 0; int showinfo = 0; int showstore = 0; +int size_offsets = 45; +int size_offsets_max; +int *offsets; +#if !defined NOPOSIX int posix = 0; +#endif int debug = 0; int done = 0; unsigned char buffer[30000]; @@ -455,27 +453,51 @@ while (argc > 1 && argv[op][0] == '-') { + char *endptr; + if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0) showstore = 1; else if (strcmp(argv[op], "-t") == 0) timeit = 1; else if (strcmp(argv[op], "-i") == 0) showinfo = 1; else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1; + else if (strcmp(argv[op], "-o") == 0 && argc > 2 && + ((size_offsets = (int)strtoul(argv[op+1], &endptr, 10)), *endptr == 0)) + { + op++; + argc--; + } +#if !defined NOPOSIX else if (strcmp(argv[op], "-p") == 0) posix = 1; +#endif else { - printf("*** Unknown option %s\n", argv[op]); - printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n"); - printf(" -d debug: show compiled code; implies -i\n" - " -i show information about compiled pattern\n" - " -p use POSIX interface\n" - " -s output store information\n" - " -t time compilation and execution\n"); + printf("** Unknown or malformed option %s\n", argv[op]); + printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n"); + printf(" -d debug: show compiled code; implies -i\n" + " -i show information about compiled pattern\n" + " -o <n> set size of offsets vector to <n>\n"); +#if !defined NOPOSIX + printf(" -p use POSIX interface\n"); +#endif + printf(" -s output store information\n" + " -t time compilation and execution\n"); return 1; } op++; argc--; } +/* Get the store for the offsets vector, and remember what it was */ + +size_offsets_max = size_offsets; +offsets = malloc(size_offsets_max * sizeof(int)); +if (offsets == NULL) + { + printf("** Failed to get %d bytes of memory for offsets vector\n", + size_offsets_max * sizeof(int)); + return 1; + } + /* Sort out the input and output files */ if (argc > 1) @@ -515,18 +537,18 @@ #if !defined NOPOSIX /* There are still compilers that require no indent */ regex_t preg; + int do_posix = 0; #endif const char *error; unsigned char *p, *pp, *ppp; - unsigned const char *tables = NULL; + const unsigned char *tables = NULL; int do_study = 0; int do_debug = debug; int do_G = 0; int do_g = 0; int do_showinfo = showinfo; int do_showrest = 0; - int do_posix = 0; int utf8 = 0; int erroroffset, len, delimiter; @@ -720,13 +742,14 @@ if (do_showinfo) { + unsigned long int get_options; int old_first_char, old_options, old_count; int count, backrefmax, first_char, need_char; size_t size; if (do_debug) print_internals(re); - new_info(re, NULL, PCRE_INFO_OPTIONS, &options); + new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options); new_info(re, NULL, PCRE_INFO_SIZE, &size); new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count); new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax); @@ -746,9 +769,9 @@ "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n", first_char, old_first_char); - if (old_options != options) fprintf(outfile, - "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options, - old_options); + if (old_options != (int)get_options) fprintf(outfile, + "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n", + get_options, old_options); } if (size != gotten_store) fprintf(outfile, @@ -758,17 +781,17 @@ fprintf(outfile, "Capturing subpattern count = %d\n", count); if (backrefmax > 0) fprintf(outfile, "Max back reference = %d\n", backrefmax); - if (options == 0) fprintf(outfile, "No options\n"); + if (get_options == 0) fprintf(outfile, "No options\n"); else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n", - ((options & PCRE_ANCHORED) != 0)? " anchored" : "", - ((options & PCRE_CASELESS) != 0)? " caseless" : "", - ((options & PCRE_EXTENDED) != 0)? " extended" : "", - ((options & PCRE_MULTILINE) != 0)? " multiline" : "", - ((options & PCRE_DOTALL) != 0)? " dotall" : "", - ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "", - ((options & PCRE_EXTRA) != 0)? " extra" : "", - ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "", - ((options & PCRE_UTF8) != 0)? " utf8" : ""); + ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "", + ((get_options & PCRE_CASELESS) != 0)? " caseless" : "", + ((get_options & PCRE_EXTENDED) != 0)? " extended" : "", + ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "", + ((get_options & PCRE_DOTALL) != 0)? " dotall" : "", + ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "", + ((get_options & PCRE_EXTRA) != 0)? " extra" : "", + ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "", + ((get_options & PCRE_UTF8) != 0)? " utf8" : ""); if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0) fprintf(outfile, "Case state changes\n"); @@ -871,6 +894,8 @@ { unsigned char *q; unsigned char *bptr = dbuffer; + int *use_offsets = offsets; + int use_size_offsets = size_offsets; int count, c; int copystrings = 0; int getstrings = 0; @@ -878,8 +903,6 @@ int gmatched = 0; int start_offset = 0; int g_notempty = 0; - int offsets[45]; - int size_offsets = sizeof(offsets)/sizeof(int); options = 0; @@ -934,11 +957,11 @@ c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W'); if (*pt == '}') { - unsigned char utf8_buffer[8]; + unsigned char buffer[8]; int ii, utn; - utn = ord2utf8(c, utf8_buffer); - for (ii = 0; ii < utn - 1; ii++) *q++ = utf8_buffer[ii]; - c = utf8_buffer[ii]; /* Last byte */ + utn = ord2utf8(c, buffer); + for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii]; + c = buffer[ii]; /* Last byte */ p = pt + 1; break; } @@ -987,7 +1010,20 @@ case 'O': while(isdigit(*p)) n = n * 10 + *p++ - '0'; - if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n; + if (n > size_offsets_max) + { + size_offsets_max = n; + free(offsets); + use_offsets = offsets = malloc(size_offsets_max * sizeof(int)); + if (offsets == NULL) + { + printf("** Failed to get %d bytes of memory for offsets vector\n", + size_offsets_max * sizeof(int)); + return 1; + } + } + use_size_offsets = n; + if (n == 0) use_offsets = NULL; continue; case 'Z': @@ -1007,11 +1043,11 @@ { int rc; int eflags = 0; - regmatch_t pmatch[sizeof(offsets)/sizeof(int)]; + regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets); if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL; if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL; - rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags); + rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags); if (rc != 0) { @@ -1021,7 +1057,7 @@ else { size_t i; - for (i = 0; i < size_offsets; i++) + for (i = 0; i < use_size_offsets; i++) { if (pmatch[i].rm_so >= 0) { @@ -1038,6 +1074,7 @@ } } } + free(pmatch); } /* Handle matching via the native interface - repeats for /g and /G */ @@ -1054,7 +1091,7 @@ clock_t start_time = clock(); for (i = 0; i < LOOPREPEAT; i++) count = pcre_exec(re, extra, (char *)bptr, len, - start_offset, options | g_notempty, offsets, size_offsets); + start_offset, options | g_notempty, use_offsets, use_size_offsets); time_taken = clock() - start_time; fprintf(outfile, "Execute time %.3f milliseconds\n", ((double)time_taken * 1000.0)/ @@ -1062,12 +1099,12 @@ } count = pcre_exec(re, extra, (char *)bptr, len, - start_offset, options | g_notempty, offsets, size_offsets); + start_offset, options | g_notempty, use_offsets, use_size_offsets); if (count == 0) { fprintf(outfile, "Matched, but too many substrings\n"); - count = size_offsets/3; + count = use_size_offsets/3; } /* Matched */ @@ -1077,19 +1114,19 @@ int i; for (i = 0; i < count * 2; i += 2) { - if (offsets[i] < 0) + if (use_offsets[i] < 0) fprintf(outfile, "%2d: <unset>\n", i/2); else { fprintf(outfile, "%2d: ", i/2); - pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8); + pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8); fprintf(outfile, "\n"); if (i == 0) { if (do_showrest) { fprintf(outfile, " 0+ "); - pchars(bptr + offsets[i+1], len - offsets[i+1], utf8); + pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8); fprintf(outfile, "\n"); } } @@ -1101,7 +1138,7 @@ if ((copystrings & (1 << i)) != 0) { char copybuffer[16]; - int rc = pcre_copy_substring((char *)bptr, offsets, count, + int rc = pcre_copy_substring((char *)bptr, use_offsets, count, i, copybuffer, sizeof(copybuffer)); if (rc < 0) fprintf(outfile, "copy substring %d failed %d\n", i, rc); @@ -1115,7 +1152,7 @@ if ((getstrings & (1 << i)) != 0) { const char *substring; - int rc = pcre_get_substring((char *)bptr, offsets, count, + int rc = pcre_get_substring((char *)bptr, use_offsets, count, i, &substring); if (rc < 0) fprintf(outfile, "get substring %d failed %d\n", i, rc); @@ -1131,7 +1168,7 @@ if (getlist) { const char **stringlist; - int rc = pcre_get_substring_list((char *)bptr, offsets, count, + int rc = pcre_get_substring_list((char *)bptr, use_offsets, count, &stringlist); if (rc < 0) fprintf(outfile, "get substring list failed %d\n", rc); @@ -1157,8 +1194,8 @@ { if (g_notempty != 0) { - offsets[0] = start_offset; - offsets[1] = start_offset + 1; + use_offsets[0] = start_offset; + use_offsets[1] = start_offset + 1; } else { @@ -1183,22 +1220,22 @@ character. */ g_notempty = 0; - if (offsets[0] == offsets[1]) + if (use_offsets[0] == use_offsets[1]) { - if (offsets[0] == len) break; + if (use_offsets[0] == len) break; g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED; } /* For /g, update the start offset, leaving the rest alone */ - if (do_g) start_offset = offsets[1]; + if (do_g) start_offset = use_offsets[1]; /* For /G, update the pointer and length */ else { - bptr += offsets[1]; - len -= offsets[1]; + bptr += use_offsets[1]; + len -= use_offsets[1]; } } /* End of loop for /g and /G */ } /* End of loop for data lines */

ossp-pkg/pcre/pcre_test.d/testinput1 1.3 -> 1.4

--- testinput1 2000/08/02 09:46:09 1.3 +++ testinput1 2002/01/07 15:21:07 1.4 @@ -1441,10 +1441,6 @@ ABCabc abcABC -/(main(O)?)+/ - mainmain - mainOmain - /ab{3cd/ ab{3cd @@ -1918,4 +1914,37 @@ acb a\nb +/^(b+?|a){1,2}?c/ + bac + bbac + bbbac + bbbbac + bbbbbac + +/^(b+|a){1,2}?c/ + bac + bbac + bbbac + bbbbac + bbbbbac + +/(?!\A)x/m + x\nb\n + a\bx\n + +/\x0{ab}/ + \0{ab} + +/(A|B)*?CD/ + CD + +/(A|B)*CD/ + CD + +/(AB)*?\1/ + ABABAB + +/(AB)*\1/ + ABABAB + / End of testinput1 /

ossp-pkg/pcre/pcre_test.d/testinput2 1.5 -> 1.6

--- testinput2 2000/08/29 19:24:19 1.5 +++ testinput2 2002/01/07 15:21:07 1.6 @@ -709,4 +709,15 @@ /^(?(0)f|b)oo/ +/This one's here because of the large output vector needed/ + +/(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\w+)\s+(\270)/ + \O900 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 ABC ABC + +/This one's here because Perl does this differently and PCRE can't at present/ + +/(main(O)?)+/ + mainmain + mainOmain + / End of testinput2 /

ossp-pkg/pcre/pcre_test.d/testinput6 1.1 -> 1.2

--- testinput6 2000/08/02 09:46:09 1.1 +++ testinput6 2002/01/07 15:21:07 1.2 @@ -27,6 +27,32 @@ /\xff/8D +/\x{0041}\x{2262}\x{0391}\x{002e}/D8 + \x{0041}\x{2262}\x{0391}\x{002e} + +/\x{D55c}\x{ad6d}\x{C5B4}/D8 + \x{D55c}\x{ad6d}\x{C5B4} + +/\x{65e5}\x{672c}\x{8a9e}/D8 + \x{65e5}\x{672c}\x{8a9e} + +/\x{80}/D8 + +/\x{084}/D8 + +/\x{104}/D8 + +/\x{861}/D8 + +/\x{212ab}/D8 + +/.{3,5}X/D8 + \x{212ab}\x{212ab}\x{212ab}\x{861}X + + +/.{3,5}?/D8 + \x{212ab}\x{212ab}\x{212ab}\x{861} + /-- These tests are here rather than in testinput5 because Perl 5.6 has --/ /-- some problems with UTF-8 support, in the area of \x{..} where the --/ /-- value is < 255. It grumbles about invalid UTF-8 strings. --/

ossp-pkg/pcre/pcre_test.d/testoutput1 1.7 -> 1.8

--- testoutput1 2000/08/29 19:24:19 1.7 +++ testoutput1 2002/01/07 15:21:07 1.8 @@ -1,4 +1,4 @@ -PCRE version 3.4 22-Aug-2000 +PCRE version 3.8 18-Dec-2001 /the quick brown fox/ the quick brown fox @@ -2079,15 +2079,6 @@ 0: abcABC 1: abc -/(main(O)?)+/ - mainmain - 0: mainmain - 1: main - mainOmain - 0: mainOmain - 1: main - 2: O - /ab{3cd/ ab{3cd 0: ab{3cd @@ -2961,5 +2952,67 @@ a\nb 0: a\x0ab +/^(b+?|a){1,2}?c/ + bac + 0: bac + 1: a + bbac + 0: bbac + 1: a + bbbac + 0: bbbac + 1: a + bbbbac + 0: bbbbac + 1: a + bbbbbac + 0: bbbbbac + 1: a + +/^(b+|a){1,2}?c/ + bac + 0: bac + 1: a + bbac + 0: bbac + 1: a + bbbac + 0: bbbac + 1: a + bbbbac + 0: bbbbac + 1: a + bbbbbac + 0: bbbbbac + 1: a + +/(?!\A)x/m + x\nb\n +No match + a\bx\n + 0: x + +/\x0{ab}/ + \0{ab} + 0: \x00{ab} + +/(A|B)*?CD/ + CD + 0: CD + +/(A|B)*CD/ + CD + 0: CD + +/(AB)*?\1/ + ABABAB + 0: ABAB + 1: AB + +/(AB)*\1/ + ABABAB + 0: ABABAB + 1: AB + / End of testinput1 /

ossp-pkg/pcre/pcre_test.d/testoutput2 1.7 -> 1.8

--- testoutput2 2000/08/29 19:24:19 1.7 +++ testoutput2 2002/01/07 15:21:07 1.8 @@ -1,4 +1,4 @@ -PCRE version 3.4 22-Aug-2000 +PCRE version 3.8 18-Dec-2001 /(a)b|/ Capturing subpattern count = 1 @@ -2067,6 +2067,311 @@ /^(?(0)f|b)oo/ Failed: invalid condition (?(0) at offset 5 +/This one's here because of the large output vector needed/ +Capturing subpattern count = 0 +No options +First char = 'T' +Need char = 'd' + +/(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\w+)\s+(\270)/ +Capturing subpattern count = 271 +Max back reference = 270 +No options +No first char +No need char + \O900 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 ABC ABC + 0: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 ABC ABC + 1: 1 + 2: 2 + 3: 3 + 4: 4 + 5: 5 + 6: 6 + 7: 7 + 8: 8 + 9: 9 +10: 10 +11: 11 +12: 12 +13: 13 +14: 14 +15: 15 +16: 16 +17: 17 +18: 18 +19: 19 +20: 20 +21: 21 +22: 22 +23: 23 +24: 24 +25: 25 +26: 26 +27: 27 +28: 28 +29: 29 +30: 30 +31: 31 +32: 32 +33: 33 +34: 34 +35: 35 +36: 36 +37: 37 +38: 38 +39: 39 +40: 40 +41: 41 +42: 42 +43: 43 +44: 44 +45: 45 +46: 46 +47: 47 +48: 48 +49: 49 +50: 50 +51: 51 +52: 52 +53: 53 +54: 54 +55: 55 +56: 56 +57: 57 +58: 58 +59: 59 +60: 60 +61: 61 +62: 62 +63: 63 +64: 64 +65: 65 +66: 66 +67: 67 +68: 68 +69: 69 +70: 70 +71: 71 +72: 72 +73: 73 +74: 74 +75: 75 +76: 76 +77: 77 +78: 78 +79: 79 +80: 80 +81: 81 +82: 82 +83: 83 +84: 84 +85: 85 +86: 86 +87: 87 +88: 88 +89: 89 +90: 90 +91: 91 +92: 92 +93: 93 +94: 94 +95: 95 +96: 96 +97: 97 +98: 98 +99: 99 +100: 100 +101: 101 +102: 102 +103: 103 +104: 104 +105: 105 +106: 106 +107: 107 +108: 108 +109: 109 +110: 110 +111: 111 +112: 112 +113: 113 +114: 114 +115: 115 +116: 116 +117: 117 +118: 118 +119: 119 +120: 120 +121: 121 +122: 122 +123: 123 +124: 124 +125: 125 +126: 126 +127: 127 +128: 128 +129: 129 +130: 130 +131: 131 +132: 132 +133: 133 +134: 134 +135: 135 +136: 136 +137: 137 +138: 138 +139: 139 +140: 140 +141: 141 +142: 142 +143: 143 +144: 144 +145: 145 +146: 146 +147: 147 +148: 148 +149: 149 +150: 150 +151: 151 +152: 152 +153: 153 +154: 154 +155: 155 +156: 156 +157: 157 +158: 158 +159: 159 +160: 160 +161: 161 +162: 162 +163: 163 +164: 164 +165: 165 +166: 166 +167: 167 +168: 168 +169: 169 +170: 170 +171: 171 +172: 172 +173: 173 +174: 174 +175: 175 +176: 176 +177: 177 +178: 178 +179: 179 +180: 180 +181: 181 +182: 182 +183: 183 +184: 184 +185: 185 +186: 186 +187: 187 +188: 188 +189: 189 +190: 190 +191: 191 +192: 192 +193: 193 +194: 194 +195: 195 +196: 196 +197: 197 +198: 198 +199: 199 +200: 200 +201: 201 +202: 202 +203: 203 +204: 204 +205: 205 +206: 206 +207: 207 +208: 208 +209: 209 +210: 210 +211: 211 +212: 212 +213: 213 +214: 214 +215: 215 +216: 216 +217: 217 +218: 218 +219: 219 +220: 220 +221: 221 +222: 222 +223: 223 +224: 224 +225: 225 +226: 226 +227: 227 +228: 228 +229: 229 +230: 230 +231: 231 +232: 232 +233: 233 +234: 234 +235: 235 +236: 236 +237: 237 +238: 238 +239: 239 +240: 240 +241: 241 +242: 242 +243: 243 +244: 244 +245: 245 +246: 246 +247: 247 +248: 248 +249: 249 +250: 250 +251: 251 +252: 252 +253: 253 +254: 254 +255: 255 +256: 256 +257: 257 +258: 258 +259: 259 +260: 260 +261: 261 +262: 262 +263: 263 +264: 264 +265: 265 +266: 266 +267: 267 +268: 268 +269: 269 +270: ABC +271: ABC + +/This one's here because Perl does this differently and PCRE can't at present/ +Capturing subpattern count = 0 +No options +First char = 'T' +Need char = 't' + +/(main(O)?)+/ +Capturing subpattern count = 2 +No options +First char = 'm' +Need char = 'n' + mainmain + 0: mainmain + 1: main + mainOmain + 0: mainOmain + 1: main + 2: O + / End of testinput2 / Capturing subpattern count = 0 No options

ossp-pkg/pcre/pcre_test.d/testoutput3 1.7 -> 1.8

--- testoutput3 2000/08/29 19:24:19 1.7 +++ testoutput3 2002/01/07 15:21:08 1.8 @@ -1,4 +1,4 @@ -PCRE version 3.4 22-Aug-2000 +PCRE version 3.8 18-Dec-2001 /(?<!bar)foo/ foo

ossp-pkg/pcre/pcre_test.d/testoutput4 1.7 -> 1.8

--- testoutput4 2000/08/29 19:24:19 1.7 +++ testoutput4 2002/01/07 15:21:08 1.8 @@ -1,4 +1,4 @@ -PCRE version 3.4 22-Aug-2000 +PCRE version 3.8 18-Dec-2001 /^[\w]+/ *** Failers

ossp-pkg/pcre/pcre_test.d/testoutput5 1.2 -> 1.3

--- testoutput5 2000/08/29 19:24:19 1.2 +++ testoutput5 2002/01/07 15:21:08 1.3 @@ -1,4 +1,4 @@ -PCRE version 3.4 22-Aug-2000 +PCRE version 3.8 18-Dec-2001 /-- Because of problems with Perl 5.6 in handling UTF-8 vs non UTF-8 --/ /-- strings automatically, do not use the \x{} construct except with --/

ossp-pkg/pcre/pcre_test.d/testoutput6 1.2 -> 1.3

--- testoutput6 2000/08/29 19:24:19 1.2 +++ testoutput6 2002/01/07 15:21:08 1.3 @@ -1,82 +1,82 @@ -PCRE version 3.4 22-Aug-2000 +PCRE version 3.8 18-Dec-2001 /\x{100}/8DM Memory allocation (code space): 11 ------------------------------------------------------------------ 0 7 Bra 0 - 3 2 \xc0\x88 + 3 2 \xc4\x80 7 7 Ket 10 End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 -First char = 192 -Need char = 136 +First char = 196 +Need char = 128 /\x{1000}/8DM Memory allocation (code space): 12 ------------------------------------------------------------------ 0 8 Bra 0 - 3 3 \xe0\x80\x84 + 3 3 \xe1\x80\x80 8 8 Ket 11 End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 -First char = 224 -Need char = 132 +First char = 225 +Need char = 128 /\x{10000}/8DM Memory allocation (code space): 13 ------------------------------------------------------------------ 0 9 Bra 0 - 3 4 \xf0\x80\x80\x82 + 3 4 \xf0\x90\x80\x80 9 9 Ket 12 End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 First char = 240 -Need char = 130 +Need char = 128 /\x{100000}/8DM Memory allocation (code space): 13 ------------------------------------------------------------------ 0 9 Bra 0 - 3 4 \xf0\x80\x80\xa0 + 3 4 \xf4\x80\x80\x80 9 9 Ket 12 End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 -First char = 240 -Need char = 160 +First char = 244 +Need char = 128 /\x{1000000}/8DM Memory allocation (code space): 14 ------------------------------------------------------------------ 0 10 Bra 0 - 3 5 \xf8\x80\x80\x80\x90 + 3 5 \xf9\x80\x80\x80\x80 10 10 Ket 13 End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 -First char = 248 -Need char = 144 +First char = 249 +Need char = 128 /\x{4000000}/8DM Memory allocation (code space): 15 ------------------------------------------------------------------ 0 11 Bra 0 - 3 6 \xfc\x80\x80\x80\x80\x82 + 3 6 \xfc\x84\x80\x80\x80\x80 11 11 Ket 14 End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 First char = 252 -Need char = 130 +Need char = 128 /\x{7fffFFFF}/8DM Memory allocation (code space): 15 @@ -121,26 +121,160 @@ /\x80/8D ------------------------------------------------------------------ 0 7 Bra 0 - 3 2 \xc0\x84 + 3 2 \xc2\x80 7 7 Ket 10 End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 -First char = 192 -Need char = 132 +First char = 194 +Need char = 128 /\xff/8D ------------------------------------------------------------------ 0 7 Bra 0 - 3 2 \xdf\x87 + 3 2 \xc3\xbf + 7 7 Ket + 10 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 195 +Need char = 191 + +/\x{0041}\x{2262}\x{0391}\x{002e}/D8 +------------------------------------------------------------------ + 0 12 Bra 0 + 3 7 A\xe2\x89\xa2\xce\x91. + 12 12 Ket + 15 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 'A' +Need char = '.' + \x{0041}\x{2262}\x{0391}\x{002e} + 0: A\x{2262}\x{391}. + +/\x{D55c}\x{ad6d}\x{C5B4}/D8 +------------------------------------------------------------------ + 0 14 Bra 0 + 3 9 \xed\x95\x9c\xea\xb5\xad\xec\x96\xb4 + 14 14 Ket + 17 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 237 +Need char = 180 + \x{D55c}\x{ad6d}\x{C5B4} + 0: \x{d55c}\x{ad6d}\x{c5b4} + +/\x{65e5}\x{672c}\x{8a9e}/D8 +------------------------------------------------------------------ + 0 14 Bra 0 + 3 9 \xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e + 14 14 Ket + 17 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 230 +Need char = 158 + \x{65e5}\x{672c}\x{8a9e} + 0: \x{65e5}\x{672c}\x{8a9e} + +/\x{80}/D8 +------------------------------------------------------------------ + 0 7 Bra 0 + 3 2 \xc2\x80 7 7 Ket 10 End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 -First char = 223 -Need char = 135 +First char = 194 +Need char = 128 + +/\x{084}/D8 +------------------------------------------------------------------ + 0 7 Bra 0 + 3 2 \xc2\x84 + 7 7 Ket + 10 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 194 +Need char = 132 + +/\x{104}/D8 +------------------------------------------------------------------ + 0 7 Bra 0 + 3 2 \xc4\x84 + 7 7 Ket + 10 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 196 +Need char = 132 + +/\x{861}/D8 +------------------------------------------------------------------ + 0 8 Bra 0 + 3 3 \xe0\xa1\xa1 + 8 8 Ket + 11 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 224 +Need char = 161 + +/\x{212ab}/D8 +------------------------------------------------------------------ + 0 9 Bra 0 + 3 4 \xf0\xa1\x8a\xab + 9 9 Ket + 12 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 240 +Need char = 171 + +/.{3,5}X/D8 +------------------------------------------------------------------ + 0 14 Bra 0 + 3 Any{3} + 7 Any{0,2} + 11 1 X + 14 14 Ket + 17 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +No first char +Need char = 'X' + \x{212ab}\x{212ab}\x{212ab}\x{861}X + 0: \x{212ab}\x{212ab}\x{212ab}\x{861}X + + +/.{3,5}?/D8 +------------------------------------------------------------------ + 0 11 Bra 0 + 3 Any{3} + 7 Any{0,2}? + 11 11 Ket + 14 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +No first char +No need char + \x{212ab}\x{212ab}\x{212ab}\x{861} + 0: \x{212ab}\x{212ab}\x{212ab} /-- These tests are here rather than in testinput5 because Perl 5.6 has --/ /-- some problems with UTF-8 support, in the area of \x{..} where the --/

ossp-pkg/pcre/pcre_test.sh -> 1.2

*** /dev/null Thu Apr 17 04:32:15 2025 --- - Thu Apr 17 04:32:42 2025 *************** *** 0 **** --- 1,149 ---- + #! /bin/sh + + # This file is generated by configure from RunTest.in. Make any changes + # to that file. + + # Run PCRE tests + + cf=diff + testdata=pcre_test.d + + # Select which tests to run; if no selection, run all + + do1=no + do2=no + do3=no + do4=no + do5=no + do6=no + + while [ $# -gt 0 ] ; do + case $1 in + 1) do1=yes;; + 2) do2=yes;; + 3) do3=yes;; + 4) do4=yes;; + 5) do5=yes;; + 6) do6=yes;; + *) echo "Unknown test number $1"; exit 1;; + esac + shift + done + + if [ "@UTF8@" = "" ] ; then + if [ $do5 = yes ] ; then + echo "Can't run test 5 because UFT8 support is not configured" + exit 1 + fi + if [ $do6 = yes ] ; then + echo "Can't run test 6 because UFT8 support is not configured" + exit 1 + fi + fi + + if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a\ + $do5 = no -a $do6 = no ] ; then + do1=yes + do2=yes + do3=yes + do4=yes + if [ "@UTF8@" != "" ] ; then do5=yes; fi + if [ "@UTF8@" != "" ] ; then do6=yes; fi + fi + + # Primary test, Perl-compatible + + if [ $do1 = yes ] ; then + echo "Testing main functionality (Perl compatible)" + ./pcre_test $testdata/testinput1 testtry + if [ $? = 0 ] ; then + $cf testtry $testdata/testoutput1 + if [ $? != 0 ] ; then exit 1; fi + else exit 1 + fi + fi + + # PCRE tests that are not Perl-compatible - API & error tests, mostly + + if [ $do2 = yes ] ; then + echo "Testing API and error handling (not Perl compatible)" + ./pcre_test -i $testdata/testinput2 testtry + if [ $? = 0 ] ; then + $cf testtry $testdata/testoutput2 + if [ $? != 0 ] ; then exit 1; fi + else exit 1 + fi + fi + + # Additional Perl-compatible tests for Perl 5.005's new features + + if [ $do3 = yes ] ; then + echo "Testing Perl 5.005 features (Perl 5.005 compatible)" + ./pcre_test $testdata/testinput3 testtry + if [ $? = 0 ] ; then + $cf testtry $testdata/testoutput3 + if [ $? != 0 ] ; then exit 1; fi + else exit 1 + fi + fi + + if [ $do1 = yes -a $do2 = yes -a $do3 = yes ] ; then + echo " " + echo "The three main tests all ran OK" + echo " " + fi + + # Locale-specific tests, provided the "fr" locale is available + + if [ $do4 = yes ] ; then + locale -a | grep '^fr$' >/dev/null + if [ $? -eq 0 ] ; then + echo "Testing locale-specific features (using 'fr' locale)" + ./pcre_test $testdata/testinput4 testtry + if [ $? = 0 ] ; then + $cf testtry $testdata/testoutput4 + if [ $? != 0 ] ; then + echo " " + echo "Locale test did not run entirely successfully." + echo "This usually means that there is a problem with the locale" + echo "settings rather than a bug in PCRE." + else + echo "Locale test ran OK" + fi + echo " " + else exit 1 + fi + else + echo "Cannot test locale-specific features - 'fr' locale not found," + echo "or the \"locale\" command is not available to check for it." + echo " " + fi + fi + + # Additional tests for UTF8 support + + if [ $do5 = yes ] ; then + echo "Testing experimental, incomplete UTF8 support (Perl compatible)" + ./pcre_test $testdata/testinput5 testtry + if [ $? = 0 ] ; then + $cf testtry $testdata/testoutput5 + if [ $? != 0 ] ; then exit 1; fi + else exit 1 + fi + echo "UTF8 test ran OK" + echo " " + fi + + if [ $do6 = yes ] ; then + echo "Testing API and internals for UTF8 support (not Perl compatible)" + ./pcre_test $testdata/testinput6 testtry + if [ $? = 0 ] ; then + $cf testtry $testdata/testoutput6 + if [ $? != 0 ] ; then exit 1; fi + else exit 1 + fi + echo "UTF8 internals test ran OK" + echo " " + fi + + # End

ossp-pkg/pcre/pcregrep.1 -> 1.1

*** /dev/null Thu Apr 17 04:32:15 2025 --- - Thu Apr 17 04:32:42 2025 *************** *** 0 **** --- 1,98 ---- + .TH PCREGREP 1 + .SH NAME + pcregrep - a grep with Perl-compatible regular expressions. + .SH SYNOPSIS + .B pcregrep [-Vcfhilnrsvx] pattern [file] ... + + + .SH DESCRIPTION + \fBpcregrep\fR searches files for character patterns, in the same way as other + grep commands do, but it uses the PCRE regular expression library to support + patterns that are compatible with the regular expressions of Perl 5. See + \fBpcre(3)\fR for a full description of syntax and semantics. + + If no files are specified, \fBpcregrep\fR reads the standard input. By default, + each line that matches the pattern is copied to the standard output, and if + there is more than one file, the file name is printed before each line of + output. However, there are options that can change how \fBpcregrep\fR behaves. + + Lines are limited to BUFSIZ characters. BUFSIZ is defined in \fB<stdio.h>\fR. + The newline character is removed from the end of each line before it is matched + against the pattern. + + + .SH OPTIONS + .TP 10 + \fB-V\fR + Write the version number of the PCRE library being used to the standard error + stream. + .TP + \fB-c\fR + Do not print individual lines; instead just print a count of the number of + lines that would otherwise have been printed. If several files are given, a + count is printed for each of them. + .TP + + versity of Cambridge for use on Unix systems connected to + the Internet. It is freely available under the terms of + the GNU General Public Licence. In style it is similar to + Smail 3, but its facilities are more extensive, and in + particular it has some defences against mail bombs and + unsolicited junk mail, in the form of options for refusing + messages from particular hosts, networks, or senders. + + Exim's command line takes the standard Unix form of a + sequence of options, each starting with a hyphen charac\fB-f\fIfilename\fR + Read patterns from the file, one per line, and match all patterns against each + line. There is a maximum of 100 patterns. Trailing white space is removed, and + blank lines are ignored. An empty file contains no patterns and therefore + matches nothing. + .TP + \fB-h\fR + Suppress printing of filenames when searching multiple files. + .TP + \fB-i\fR + Ignore upper/lower case distinctions during comparisons. + .TP + \fB-l\fR + Instead of printing lines from the files, just print the names of the files + containing lines that would have been printed. Each file name is printed + once, on a separate line. + .TP + \fB-n\fR + Precede each line by its line number in the file. + .TP + \fB-r\fR + If any file is a directory, recursively scan the files it contains. Without + \fB-r\fR a directory is scanned as a normal file. + .TP + \fB-s\fR + Work silently, that is, display nothing except error messages. + The exit status indicates whether any matches were found. + .TP + \fB-v\fR + Invert the sense of the match, so that lines which do \fInot\fR match the + pattern are now the ones that are found. + .TP + \fB-x\fR + Force the pattern to be anchored (it must start matching at the beginning of + the line) and in addition, require it to match the entire line. This is + equivalent to having ^ and $ characters at the start and end of each + alternative branch in the regular expression. + + + .SH SEE ALSO + \fBpcre(3)\fR, Perl 5 documentation + + + .SH DIAGNOSTICS + Exit status is 0 if any matches were found, 1 if no matches were found, and 2 + for syntax errors or inacessible files (even if matches were found). + + + .SH AUTHOR + Philip Hazel <ph10@cam.ac.uk> + + Last updated: 15 August 2001 + .br + Copyright (c) 1997-2001 University of Cambridge.

ossp-pkg/pcre/pcregrep.c -> 1.1

*** /dev/null Thu Apr 17 04:32:15 2025 --- - Thu Apr 17 04:32:42 2025 *************** *** 0 **** --- 1,540 ---- + /************************************************* + * pcregrep program * + *************************************************/ + + /* This is a grep program that uses the PCRE regular expression library to do + its pattern matching. On a Unix system it can recurse into directories. */ + + #include <ctype.h> + #include <stdio.h> + #include <string.h> + #include <stdlib.h> + #include <errno.h> + #include "config.h" + #include "pcre.h" + + #define FALSE 0 + #define TRUE 1 + + typedef int BOOL; + + #define VERSION "2.0 01-Aug-2001" + #define MAX_PATTERN_COUNT 100 + + + /************************************************* + * Global variables * + *************************************************/ + + static char *pattern_filename = NULL; + static int pattern_count = 0; + static pcre **pattern_list; + static pcre_extra **hints_list; + + static BOOL count_only = FALSE; + static BOOL filenames = TRUE; + static BOOL filenames_only = FALSE; + static BOOL invert = FALSE; + static BOOL number = FALSE; + static BOOL recurse = FALSE; + static BOOL silent = FALSE; + static BOOL whole_lines = FALSE; + + /* Structure for options and list of them */ + + typedef struct option_item { + int one_char; + char *long_name; + char *help_text; + } option_item; + + static option_item optionlist[] = { + { -1, "help", "display this help and exit" }, + { 'c', "count", "print only a count of matching lines per FILE" }, + { 'h', "no-filename", "suppress the prefixing filename on output" }, + { 'i', "ignore-case", "ignore case distinctions" }, + { 'l', "files-with-matches", "print only FILE names containing matches" }, + { 'n', "line-number", "print line number with output lines" }, + { 'r', "recursive", "recursively scan sub-directories" }, + { 's', "no-messages", "suppress error messages" }, + { 'V', "version", "print version information and exit" }, + { 'v', "invert-match", "select non-matching lines" }, + { 'x', "line-regex", "force PATTERN to match only whole lines" }, + { 'x', "line-regexp", "force PATTERN to match only whole lines" }, + { 0, NULL, NULL } + }; + + + /************************************************* + * Functions for directory scanning * + *************************************************/ + + /* These functions are defined so that they can be made system specific, + although at present the only ones are for Unix, and for "no directory recursion + support". */ + + + /************* Directory scanning in Unix ***********/ + + #if IS_UNIX + #include <sys/types.h> + #include <sys/stat.h> + #include <dirent.h> + + typedef DIR directory_type; + + int + isdirectory(char *filename) + { + struct stat statbuf; + if (stat(filename, &statbuf) < 0) + return 0; /* In the expectation that opening as a file will fail */ + return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0; + } + + directory_type * + opendirectory(char *filename) + { + return opendir(filename); + } + + char * + readdirectory(directory_type *dir) + { + for (;;) + { + struct dirent *dent = readdir(dir); + if (dent == NULL) return NULL; + if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) + return dent->d_name; + } + return NULL; /* Keep compiler happy; never executed */ + } + + void + closedirectory(directory_type *dir) + { + closedir(dir); + } + + + #else + + + /************* Directory scanning when we can't do it ***********/ + + /* The type is void, and apart from isdirectory(), the functions do nothing. */ + + typedef void directory_type; + + int isdirectory(char *filename) { return FALSE; } + directory_type * opendirectory(char *filename) {} + char *readdirectory(directory_type *dir) {} + void closedirectory(directory_type *dir) {} + + #endif + + + + #if ! HAVE_STRERROR + /************************************************* + * Provide strerror() for non-ANSI libraries * + *************************************************/ + + /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror() + in their libraries, but can provide the same facility by this simple + alternative function. */ + + extern int sys_nerr; + extern char *sys_errlist[]; + + char * + strerror(int n) + { + if (n < 0 || n >= sys_nerr) return "unknown error number"; + return sys_errlist[n]; + } + #endif /* HAVE_STRERROR */ + + + + /************************************************* + * Grep an individual file * + *************************************************/ + + static int + pcregrep(FILE *in, char *name) + { + int rc = 1; + int linenumber = 0; + int count = 0; + int offsets[99]; + char buffer[BUFSIZ]; + + while (fgets(buffer, sizeof(buffer), in) != NULL) + { + BOOL match = FALSE; + int i; + int length = (int)strlen(buffer); + if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0; + linenumber++; + + for (i = 0; !match && i < pattern_count; i++) + { + match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0, + offsets, 99) >= 0; + if (match && whole_lines && offsets[1] != length) match = FALSE; + } + + if (match != invert) + { + if (count_only) count++; + + else if (filenames_only) + { + fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name); + return 0; + } + + else if (silent) return 0; + + else + { + if (name != NULL) fprintf(stdout, "%s:", name); + if (number) fprintf(stdout, "%d:", linenumber); + fprintf(stdout, "%s\n", buffer); + } + + rc = 0; + } + } + + if (count_only) + { + if (name != NULL) fprintf(stdout, "%s:", name); + fprintf(stdout, "%d\n", count); + } + + return rc; + } + + + + + /************************************************* + * Grep a file or recurse into a directory * + *************************************************/ + + static int + grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames, + BOOL only_one_at_top) + { + int rc = 1; + int sep; + FILE *in; + + /* If the file is a directory and we are recursing, scan each file within it. + The scanning code is localized so it can be made system-specific. */ + + if ((sep = isdirectory(filename)) != 0 && recurse) + { + char buffer[1024]; + char *nextfile; + directory_type *dir = opendirectory(filename); + + if (dir == NULL) + { + fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename, + strerror(errno)); + return 2; + } + + while ((nextfile = readdirectory(dir)) != NULL) + { + int frc; + sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile); + frc = grep_or_recurse(buffer, recurse, TRUE, FALSE); + if (frc == 0 && rc == 1) rc = 0; + } + + closedirectory(dir); + return rc; + } + + /* If the file is not a directory, or we are not recursing, scan it. If this is + the first and only argument at top level, we don't show the file name. + Otherwise, control is via the show_filenames variable. */ + + in = fopen(filename, "r"); + if (in == NULL) + { + fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno)); + return 2; + } + + rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL); + fclose(in); + return rc; + } + + + + + /************************************************* + * Usage function * + *************************************************/ + + static int + usage(int rc) + { + fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n"); + fprintf(stderr, "Type `pcregrep --help' for more information.\n"); + return rc; + } + + + + + /************************************************* + * Help function * + *************************************************/ + + static void + help(void) + { + option_item *op; + + printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n"); + printf("Search for PATTERN in each FILE or standard input.\n"); + printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n"); + + printf("Options:\n"); + + for (op = optionlist; op->one_char != 0; op++) + { + int n; + char s[4]; + if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " "); + printf(" %s --%s%n", s, op->long_name, &n); + n = 30 - n; + if (n < 1) n = 1; + printf("%.*s%s\n", n, " ", op->help_text); + } + + printf("\n -f<filename> or --file=<filename>\n"); + printf(" Read patterns from <filename> instead of using a command line option.\n"); + printf(" Trailing white space is removed; blanks lines are ignored.\n"); + printf(" There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT); + + printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n"); + printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n"); + } + + + + + /************************************************* + * Handle an option * + *************************************************/ + + static int + handle_option(int letter, int options) + { + switch(letter) + { + case -1: help(); exit(0); + case 'c': count_only = TRUE; break; + case 'h': filenames = FALSE; break; + case 'i': options |= PCRE_CASELESS; break; + case 'l': filenames_only = TRUE; + case 'n': number = TRUE; break; + case 'r': recurse = TRUE; break; + case 's': silent = TRUE; break; + case 'v': invert = TRUE; break; + case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break; + + case 'V': + fprintf(stderr, "pcregrep version %s using ", VERSION); + fprintf(stderr, "PCRE version %s\n", pcre_version()); + exit(0); + break; + + default: + fprintf(stderr, "pcregrep: Unknown option -%c\n", letter); + exit(usage(2)); + } + + return options; + } + + + + + /************************************************* + * Main program * + *************************************************/ + + int + main(int argc, char **argv) + { + int i, j; + int rc = 1; + int options = 0; + int errptr; + const char *error; + BOOL only_one_at_top; + + /* Process the options */ + + for (i = 1; i < argc; i++) + { + if (argv[i][0] != '-') break; + + /* Long name options */ + + if (argv[i][1] == '-') + { + option_item *op; + + if (strncmp(argv[i]+2, "file=", 5) == 0) + { + pattern_filename = argv[i] + 7; + continue; + } + + for (op = optionlist; op->one_char != 0; op++) + { + if (strcmp(argv[i]+2, op->long_name) == 0) + { + options = handle_option(op->one_char, options); + break; + } + } + if (op->one_char == 0) + { + fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]); + exit(usage(2)); + } + } + + /* One-char options */ + + else + { + char *s = argv[i] + 1; + while (*s != 0) + { + if (*s == 'f') + { + pattern_filename = s + 1; + if (pattern_filename[0] == 0) + { + if (i >= argc - 1) + { + fprintf(stderr, "pcregrep: File name missing after -f\n"); + exit(usage(2)); + } + pattern_filename = argv[++i]; + } + break; + } + else options = handle_option(*s++, options); + } + } + } + + pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *)); + hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *)); + + if (pattern_list == NULL || hints_list == NULL) + { + fprintf(stderr, "pcregrep: malloc failed\n"); + return 2; + } + + /* Compile the regular expression(s). */ + + if (pattern_filename != NULL) + { + FILE *f = fopen(pattern_filename, "r"); + char buffer[BUFSIZ]; + if (f == NULL) + { + fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename, + strerror(errno)); + return 2; + } + while (fgets(buffer, sizeof(buffer), f) != NULL) + { + char *s = buffer + (int)strlen(buffer); + if (pattern_count >= MAX_PATTERN_COUNT) + { + fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n", + MAX_PATTERN_COUNT); + return 2; + } + while (s > buffer && isspace((unsigned char)(s[-1]))) s--; + if (s == buffer) continue; + *s = 0; + pattern_list[pattern_count] = pcre_compile(buffer, options, &error, + &errptr, NULL); + if (pattern_list[pattern_count++] == NULL) + { + fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n", + pattern_count, errptr, error); + return 2; + } + } + fclose(f); + } + + /* If no file name, a single regex must be given inline */ + + else + { + if (i >= argc) return usage(0); + pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL); + if (pattern_list[0] == NULL) + { + fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr, + error); + return 2; + } + pattern_count++; + } + + /* Study the regular expressions, as we will be running them may times */ + + for (j = 0; j < pattern_count; j++) + { + hints_list[j] = pcre_study(pattern_list[j], 0, &error); + if (error != NULL) + { + char s[16]; + if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j); + fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error); + return 2; + } + } + + /* If there are no further arguments, do the business on stdin and exit */ + + if (i >= argc) return pcregrep(stdin, NULL); + + /* Otherwise, work through the remaining arguments as files or directories. + Pass in the fact that there is only one argument at top level - this suppresses + the file name if the argument is not a directory. */ + + only_one_at_top = (i == argc - 1); + if (filenames_only) filenames = TRUE; + + for (; i < argc; i++) + { + int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top); + if (frc == 0 && rc == 1) rc = 0; + } + + return rc; + } + + /* End */

ossp-pkg/pcre/pcreposix.3 1.2 -> 1.3

--- pcreposix.3 2000/08/02 09:46:06 1.2 +++ pcreposix.3 2002/01/07 15:21:06 1.3 @@ -36,11 +36,11 @@ application which uses them. Because the POSIX functions call the native ones, it is also necessary to add \fR-lpcre\fR. -As I am pretty ignorant about POSIX, these functions must be considered as -experimental. I have implemented only those option bits that can be reasonably -mapped to PCRE native options. Other POSIX options are not even defined. It may -be that it is useful to define, but ignore, other options. Feedback from more -knowledgeable folk may cause this kind of detail to change. +I have implemented only those option bits that can be reasonably mapped to PCRE +native options. In addition, the options REG_EXTENDED and REG_NOSUB are defined +with the value zero. They have no effect, but since programs that are written +to the POSIX interface often use them, this makes it easier to slot in PCRE as +a replacement library. Other POSIX options are not even defined. When PCRE is called via these functions, it is only the API that is POSIX-like in style. The syntax and semantics of the regular expressions themselves are

ossp-pkg/pcre/pcreposix.c 1.4 -> 1.5

--- pcreposix.c 2000/08/29 19:24:17 1.4 +++ pcreposix.c 2002/01/07 15:21:06 1.5 @@ -12,7 +12,7 @@ Written by: Philip Hazel <ph10@cam.ac.uk> - Copyright (c) 1997-2000 University of Cambridge + Copyright (c) 1997-2001 University of Cambridge ----------------------------------------------------------------------------- Permission is granted to anyone to use this software for any purpose on any @@ -37,8 +37,8 @@ #include "pcre_internal.h" #include "pcreposix.h" +#include "stdlib.h" -#include <stdlib.h> /* Corresponding tables of PCRE error messages and POSIX error codes. */ @@ -62,13 +62,13 @@ REG_BADRPT, /* "operand of unlimited repeat could match the empty string" */ REG_ASSERT, /* "internal error: unexpected repeat" */ REG_BADPAT, /* "unrecognized character after (?" */ - REG_ESIZE, /* "too many capturing parenthesized sub-patterns" */ + REG_ASSERT, /* "unused error" */ REG_EPAREN, /* "missing )" */ REG_ESUBREG, /* "back reference to non-existent subpattern" */ REG_INVARG, /* "erroffset passed as NULL" */ REG_INVARG, /* "unknown option bit(s) set" */ REG_EPAREN, /* "missing ) after comment" */ - REG_ESIZE, /* "too many sets of parentheses" */ + REG_ESIZE, /* "parentheses nested too deeply" */ REG_ESIZE, /* "regular expression too large" */ REG_ESPACE, /* "failed to get memory" */ REG_EPAREN, /* "unmatched brackets" */

ossp-pkg/pcre/pcreposix.h -> 1.3

*** /dev/null Thu Apr 17 04:32:15 2025 --- - Thu Apr 17 04:32:42 2025 *************** *** 0 **** --- 1,88 ---- + /************************************************* + * Perl-Compatible Regular Expressions * + *************************************************/ + + /* Copyright (c) 1997-2001 University of Cambridge */ + + #ifndef _PCREPOSIX_H + #define _PCREPOSIX_H + + /* This is the header for the POSIX wrapper interface to the PCRE Perl- + Compatible Regular Expression library. It defines the things POSIX says should + be there. I hope. */ + + /* Have to include stdlib.h in order to ensure that size_t is defined. */ + + #include <stdlib.h> + + /* Allow for C++ users */ + + #ifdef __cplusplus + extern "C" { + #endif + + /* Options defined by POSIX. */ + + #define REG_ICASE 0x01 + #define REG_NEWLINE 0x02 + #define REG_NOTBOL 0x04 + #define REG_NOTEOL 0x08 + + /* These are not used by PCRE, but by defining them we make it easier + to slot PCRE into existing programs that make POSIX calls. */ + + #define REG_EXTENDED 0 + #define REG_NOSUB 0 + + /* Error values. Not all these are relevant or used by the wrapper. */ + + enum { + REG_ASSERT = 1, /* internal error ? */ + REG_BADBR, /* invalid repeat counts in {} */ + REG_BADPAT, /* pattern error */ + REG_BADRPT, /* ? * + invalid */ + REG_EBRACE, /* unbalanced {} */ + REG_EBRACK, /* unbalanced [] */ + REG_ECOLLATE, /* collation error - not relevant */ + REG_ECTYPE, /* bad class */ + REG_EESCAPE, /* bad escape sequence */ + REG_EMPTY, /* empty expression */ + REG_EPAREN, /* unbalanced () */ + REG_ERANGE, /* bad range inside [] */ + REG_ESIZE, /* expression too big */ + REG_ESPACE, /* failed to get memory */ + REG_ESUBREG, /* bad back reference */ + REG_INVARG, /* bad argument */ + REG_NOMATCH /* match failed */ + }; + + + /* The structure representing a compiled regular expression. */ + + typedef struct { + void *re_pcre; + size_t re_nsub; + size_t re_erroffset; + } regex_t; + + /* The structure in which a captured offset is returned. */ + + typedef int regoff_t; + + typedef struct { + regoff_t rm_so; + regoff_t rm_eo; + } regmatch_t; + + /* The functions */ + + extern int regcomp(regex_t *, const char *, int); + extern int regexec(regex_t *, const char *, size_t, regmatch_t *, int); + extern size_t regerror(int, const regex_t *, char *, size_t); + extern void regfree(regex_t *); + + #ifdef __cplusplus + } /* extern "C" */ + #endif + + #endif /* End of pcreposix.h */

ossp-pkg/pcre/pgrep.1 1.2 -> 1.3

ossp-pkg/pcre/pgrep.c 1.3 -> 1.4

OSSP CVS Repository