OSSP CVS Repository

ossp - Check-in [1959]
Not logged in
[Honeypot]  [Browse]  [Home]  [Login]  [Reports
[Search]  [Ticket]  [Timeline
  [Patchset]  [Tagging/Branching

Check-in Number: 1959
Date: 2002-Mar-07 10:14:05 (local)
2002-Mar-07 09:14:05 (UTC)
User:rse
Branch:
Comment: Major bugfixing and enhancing of search & replace operation: - finally fix ${name:s/$/foo/} type substitutions (zero-length matching!) - add s/.../.../mg matching support (Perl-style multiline) - make non-multiline matching the default
Tickets:
Inspections:
Files:
ossp-pkg/var/TODO      1.25 -> 1.26     0 inserted, 4 deleted
ossp-pkg/var/var.c      1.84 -> 1.85     51 inserted, 27 deleted
ossp-pkg/var/var.pod      1.27 -> 1.28     4 inserted, 2 deleted
ossp-pkg/var/var_test.c      1.41 -> 1.42     1 inserted, 1 deleted

ossp-pkg/var/TODO 1.25 -> 1.26

--- TODO 2002/03/06 15:30:35     1.25
+++ TODO 2002/03/07 09:14:05     1.26
@@ -7,10 +7,6 @@
    and other dynamic stuff into the template inside variable constructs.
    Think about var_printf("${array[%d]}", i); 
 
- o ./var_play '${HOME:s/$/foo/}' does nothing because
-   it seems that internally a zero-size pattern match
-   does is not handled correctly.
-
  o with PCRE active, ./var_play '${HOME:s/x$/foo/}' fails
    horribly with an impossible error invalid character class!
 


ossp-pkg/var/var.c 1.84 -> 1.85

--- var.c        2002/03/07 09:08:11     1.84
+++ var.c        2002/03/07 09:14:05     1.85
@@ -845,6 +845,7 @@
     tokenbuf_t tmp;
     const char *p;
     int case_insensitive = 0;
+    int multiline = 0;
     int global = 0;
     int no_regex = 0;
     int rc;
@@ -852,19 +853,22 @@
     if (search->begin == search->end)
         return VAR_ERR_EMPTY_SEARCH_STRING;
 
-    for (p = flags->begin; p != flags->end; ++p) {
+    for (p = flags->begin; p != flags->end; p++) {
         switch (tolower(*p)) {
-        case 'i':
-            case_insensitive = 1;
-            break;
-        case 'g':
-            global = 1;
-            break;
-        case 't':
-            no_regex = 1;
-            break;
-        default:
-            return VAR_ERR_UNKNOWN_REPLACE_FLAG;
+            case 'm':
+                multiline = 1;
+                break;
+            case 'i':
+                case_insensitive = 1;
+                break;
+            case 'g':
+                global = 1;
+                break;
+            case 't':
+                no_regex = 1;
+                break;
+            default:
+                return VAR_ERR_UNKNOWN_REPLACE_FLAG;
         }
     }
 
@@ -916,7 +920,10 @@
         }
 
         /* compile the pattern. */
-        rc = regcomp(&preg, tmp.begin, REG_NEWLINE|REG_EXTENDED|((case_insensitive)?REG_ICASE:0));
+        rc = regcomp(&preg, tmp.begin, 
+                     (  REG_EXTENDED
+                      | (multiline ? REG_NEWLINE : 0)
+                      | (case_insensitive ? REG_ICASE : 0)));
         tokenbuf_free(&tmp);
         if (rc != 0) {
             tokenbuf_free(&mydata);
@@ -925,31 +932,41 @@
 
         /* match the pattern and create the result string in the tmp buffer */
         tokenbuf_append(&tmp, "", 0);
-        for (p = mydata.begin; p != mydata.end; ) {
+        for (p = mydata.begin; p < mydata.end; ) {
             if (p == mydata.begin || p[-1] == '\n')
                 regexec_flag = 0;
             else
                 regexec_flag = REG_NOTBOL;
             rc = regexec(&preg, p, sizeof(pmatch) / sizeof(regmatch_t), pmatch, regexec_flag);
-            if (rc != 0 || p + pmatch[0].rm_so == mydata.end) {
+            /* XXX */
+            if (rc != 0) {
+                /* no (more) matching */
                 tokenbuf_append(&tmp, p, mydata.end - p);
                 break;
-            } else {
-                /* create replace string */
-                rc = parse_regex_replace(var, ctx, p, replace, pmatch, &myreplace);
-                if (rc != VAR_OK) {
+            }
+            else if (   multiline 
+                     && (p + pmatch[0].rm_so) == mydata.end 
+                     && (pmatch[0].rm_eo - pmatch[0].rm_so) == 0) {
+                /* special case: found empty pattern (usually /^/ or /$/ only)
+                   in multi-line at end of data (after the last newline) */
+                tokenbuf_append(&tmp, p, mydata.end - p);
+                break;
+            }
+            else {
+                /* append prolog string */
+                if (!tokenbuf_append(&tmp, p, pmatch[0].rm_so)) {
                     regfree(&preg);
                     tokenbuf_free(&tmp);
                     tokenbuf_free(&mydata);
-                    return rc;
+                    return VAR_ERR_OUT_OF_MEMORY;
                 }
-                /* append prolog string */
-                if (!tokenbuf_append(&tmp, p, pmatch[0].rm_so)) {
+                /* create replace string */
+                rc = parse_regex_replace(var, ctx, p, replace, pmatch, &myreplace);
+                if (rc != VAR_OK) {
                     regfree(&preg);
                     tokenbuf_free(&tmp);
                     tokenbuf_free(&mydata);
-                    tokenbuf_free(&myreplace);
-                    return VAR_ERR_OUT_OF_MEMORY;
+                    return rc;
                 }
                 /* append replace string */
                 if (!tokenbuf_append(&tmp, myreplace.begin, myreplace.end - myreplace.begin)) {
@@ -959,19 +976,26 @@
                     tokenbuf_free(&myreplace);
                     return VAR_ERR_OUT_OF_MEMORY;
                 }
+                tokenbuf_free(&myreplace);
+                /* skip now processed data */
                 p += pmatch[0].rm_eo;
-                /* XXX??? */
+                /* if pattern matched an empty part (think about
+                   anchor-only regular expressions like /^/ or /$/) we
+                   skip the next character to make sure we do not enter
+                   an infinitive loop in matching */
                 if ((pmatch[0].rm_eo - pmatch[0].rm_so) == 0) {
+                    if (p >= mydata.end)
+                        break;
                     if (!tokenbuf_append(&tmp, p, 1)) {
                         regfree(&preg);
                         tokenbuf_free(&tmp);
                         tokenbuf_free(&mydata);
-                        tokenbuf_free(&myreplace);
                         return VAR_ERR_OUT_OF_MEMORY;
                     }
                     p++;
                 }
-                tokenbuf_free(&myreplace);
+                /* append prolog string and stop processing if we 
+                   do not perform the search & replace globally */
                 if (!global) {
                     if (!tokenbuf_append(&tmp, p, mydata.end - p)) {
                         regfree(&preg);


ossp-pkg/var/var.pod 1.27 -> 1.28

--- var.pod      2002/03/06 10:18:19     1.27
+++ var.pod      2002/03/07 09:14:05     1.28
@@ -159,7 +159,9 @@
 performed and only the first occurance of I<pattern> is replaced. Flag
 "C<i>" switches to case insensitive matching; flag "C<t>" switches
 to plain text pattern; flag "C<g>" switches to replacements of all
-occurances.
+occurances; flag "C<m>" switches to multi-line matching (That is, change
+"C<^>" and "C<$>" from matching the start or end of the string to
+matching the start or end of any line).
 
 =item C<${>I<name>C<:y/>I<ochars>C</>I<nchars>C</}>
 
@@ -223,7 +225,7 @@
                | '*' (TEXT_EXP|variable)+
                | 's' '/' (TEXT_PATTERN)+ 
                      '/' (variable|TEXT_SUBST)* 
-                     '/' ('g'|'i'|'t')*
+                     '/' ('m'|'g'|'i'|'t')*
                | 'y' '/' (variable|TEXT_SUBST)+ 
                      '/' (variable|TEXT_SUBST)* 
                      '/'


ossp-pkg/var/var_test.c 1.41 -> 1.42

--- var_test.c   2002/03/06 11:09:12     1.41
+++ var_test.c   2002/03/07 09:14:05     1.42
@@ -221,7 +221,7 @@
         { "[${ARRAY[#+1]}-]",             "entry1-entry2-entry3-"                          },
         { "-[${ARRAY[#]}:]{1,$NUMBER}-",  "-entry1:entry2:-"                               },
         { "-[${ARRAY[#]}:]{1,3,5}-",      "-entry1::-"                                     },
-        { "${MULTILINE:s/^/ | /g}",       " | line1\n | line2\n"                           },
+        { "${MULTILINE:s/^/ | /gm}",      " | line1\n | line2\n"                           },
         { "${HOME:%upper}",               "/HOME/REGRESSION-TESTS"                         },
         { "${HOME:%upper:%lower}",        "/home/regression-tests"                         },
         { "${EMPTY:%return($HOME)}",      "/home/regression-tests"                         },

CVSTrac 2.0.1