OSSP: CVS Repository: Check-in [1959]

Check-in Number:

1959

Date:

2002-Mar-07 10:14:05 (local)
2002-Mar-07 09:14:05 (UTC)

User:

rse

Branch:

Comment:

Major bugfixing and enhancing of search & replace operation: - finally fix ${name:s/$/foo/} type substitutions (zero-length matching!) - add s/.../.../mg matching support (Perl-style multiline) - make non-multiline matching the default

Tickets:

Inspections:

Files:

ossp-pkg/var/TODO	1.25 -> 1.26	0 inserted, 4 deleted
ossp-pkg/var/var.c	1.84 -> 1.85	51 inserted, 27 deleted
ossp-pkg/var/var.pod	1.27 -> 1.28	4 inserted, 2 deleted
ossp-pkg/var/var_test.c	1.41 -> 1.42	1 inserted, 1 deleted

ossp-pkg/var/TODO 1.25 -> 1.26

--- TODO 2002/03/06 15:30:35 1.25 +++ TODO 2002/03/07 09:14:05 1.26 @@ -7,10 +7,6 @@ and other dynamic stuff into the template inside variable constructs. Think about var_printf("${array[%d]}", i); - o ./var_play '${HOME:s/$/foo/}' does nothing because - it seems that internally a zero-size pattern match - does is not handled correctly. - o with PCRE active, ./var_play '${HOME:s/x$/foo/}' fails horribly with an impossible error invalid character class!

ossp-pkg/var/var.c 1.84 -> 1.85

--- var.c 2002/03/07 09:08:11 1.84 +++ var.c 2002/03/07 09:14:05 1.85 @@ -845,6 +845,7 @@ tokenbuf_t tmp; const char *p; int case_insensitive = 0; + int multiline = 0; int global = 0; int no_regex = 0; int rc; @@ -852,19 +853,22 @@ if (search->begin == search->end) return VAR_ERR_EMPTY_SEARCH_STRING; - for (p = flags->begin; p != flags->end; ++p) { + for (p = flags->begin; p != flags->end; p++) { switch (tolower(*p)) { - case 'i': - case_insensitive = 1; - break; - case 'g': - global = 1; - break; - case 't': - no_regex = 1; - break; - default: - return VAR_ERR_UNKNOWN_REPLACE_FLAG; + case 'm': + multiline = 1; + break; + case 'i': + case_insensitive = 1; + break; + case 'g': + global = 1; + break; + case 't': + no_regex = 1; + break; + default: + return VAR_ERR_UNKNOWN_REPLACE_FLAG; } } @@ -916,7 +920,10 @@ } /* compile the pattern. */ - rc = regcomp(&preg, tmp.begin, REG_NEWLINE|REG_EXTENDED|((case_insensitive)?REG_ICASE:0)); + rc = regcomp(&preg, tmp.begin, + ( REG_EXTENDED + | (multiline ? REG_NEWLINE : 0) + | (case_insensitive ? REG_ICASE : 0))); tokenbuf_free(&tmp); if (rc != 0) { tokenbuf_free(&mydata); @@ -925,31 +932,41 @@ /* match the pattern and create the result string in the tmp buffer */ tokenbuf_append(&tmp, "", 0); - for (p = mydata.begin; p != mydata.end; ) { + for (p = mydata.begin; p < mydata.end; ) { if (p == mydata.begin || p[-1] == '\n') regexec_flag = 0; else regexec_flag = REG_NOTBOL; rc = regexec(&preg, p, sizeof(pmatch) / sizeof(regmatch_t), pmatch, regexec_flag); - if (rc != 0 || p + pmatch[0].rm_so == mydata.end) { + /* XXX */ + if (rc != 0) { + /* no (more) matching */ tokenbuf_append(&tmp, p, mydata.end - p); break; - } else { - /* create replace string */ - rc = parse_regex_replace(var, ctx, p, replace, pmatch, &myreplace); - if (rc != VAR_OK) { + } + else if ( multiline + && (p + pmatch[0].rm_so) == mydata.end + && (pmatch[0].rm_eo - pmatch[0].rm_so) == 0) { + /* special case: found empty pattern (usually /^/ or /$/ only) + in multi-line at end of data (after the last newline) */ + tokenbuf_append(&tmp, p, mydata.end - p); + break; + } + else { + /* append prolog string */ + if (!tokenbuf_append(&tmp, p, pmatch[0].rm_so)) { regfree(&preg); tokenbuf_free(&tmp); tokenbuf_free(&mydata); - return rc; + return VAR_ERR_OUT_OF_MEMORY; } - /* append prolog string */ - if (!tokenbuf_append(&tmp, p, pmatch[0].rm_so)) { + /* create replace string */ + rc = parse_regex_replace(var, ctx, p, replace, pmatch, &myreplace); + if (rc != VAR_OK) { regfree(&preg); tokenbuf_free(&tmp); tokenbuf_free(&mydata); - tokenbuf_free(&myreplace); - return VAR_ERR_OUT_OF_MEMORY; + return rc; } /* append replace string */ if (!tokenbuf_append(&tmp, myreplace.begin, myreplace.end - myreplace.begin)) { @@ -959,19 +976,26 @@ tokenbuf_free(&myreplace); return VAR_ERR_OUT_OF_MEMORY; } + tokenbuf_free(&myreplace); + /* skip now processed data */ p += pmatch[0].rm_eo; - /* XXX??? */ + /* if pattern matched an empty part (think about + anchor-only regular expressions like /^/ or /$/) we + skip the next character to make sure we do not enter + an infinitive loop in matching */ if ((pmatch[0].rm_eo - pmatch[0].rm_so) == 0) { + if (p >= mydata.end) + break; if (!tokenbuf_append(&tmp, p, 1)) { regfree(&preg); tokenbuf_free(&tmp); tokenbuf_free(&mydata); - tokenbuf_free(&myreplace); return VAR_ERR_OUT_OF_MEMORY; } p++; } - tokenbuf_free(&myreplace); + /* append prolog string and stop processing if we + do not perform the search & replace globally */ if (!global) { if (!tokenbuf_append(&tmp, p, mydata.end - p)) { regfree(&preg);

ossp-pkg/var/var.pod 1.27 -> 1.28

--- var.pod 2002/03/06 10:18:19 1.27 +++ var.pod 2002/03/07 09:14:05 1.28 @@ -159,7 +159,9 @@ performed and only the first occurance of I<pattern> is replaced. Flag "C<i>" switches to case insensitive matching; flag "C<t>" switches to plain text pattern; flag "C<g>" switches to replacements of all -occurances. +occurances; flag "C<m>" switches to multi-line matching (That is, change +"C<^>" and "C<$>" from matching the start or end of the string to +matching the start or end of any line). =item C<${>I<name>C<:y/>I<ochars>C</>I<nchars>C</}> @@ -223,7 +225,7 @@ | '*' (TEXT_EXP|variable)+ | 's' '/' (TEXT_PATTERN)+ '/' (variable|TEXT_SUBST)* - '/' ('g'|'i'|'t')* + '/' ('m'|'g'|'i'|'t')* | 'y' '/' (variable|TEXT_SUBST)+ '/' (variable|TEXT_SUBST)* '/'

ossp-pkg/var/var_test.c 1.41 -> 1.42

--- var_test.c 2002/03/06 11:09:12 1.41 +++ var_test.c 2002/03/07 09:14:05 1.42 @@ -221,7 +221,7 @@ { "[${ARRAY[#+1]}-]", "entry1-entry2-entry3-" }, { "-[${ARRAY[#]}:]{1,$NUMBER}-", "-entry1:entry2:-" }, { "-[${ARRAY[#]}:]{1,3,5}-", "-entry1::-" }, - { "${MULTILINE:s/^/ | /g}", " | line1\n | line2\n" }, + { "${MULTILINE:s/^/ | /gm}", " | line1\n | line2\n" }, { "${HOME:%upper}", "/HOME/REGRESSION-TESTS" }, { "${HOME:%upper:%lower}", "/home/regression-tests" }, { "${EMPTY:%return($HOME)}", "/home/regression-tests" },

OSSP CVS Repository