OSSP CVS Repository

ossp - Check-in [4868]
Not logged in
[Honeypot]  [Browse]  [Home]  [Login]  [Reports
[Search]  [Ticket]  [Timeline
  [Patchset]  [Tagging/Branching

Check-in Number: 4868
Date: 2004-Nov-27 20:40:33 (local)
2004-Nov-27 19:40:33 (UTC)
User:rse
Branch:
Comment: Ok, let's rock: add four optional post-processing functions which allow one to adjust an unpacked data structure in a way it can be used more conveniently and intuitive.
Tickets:
Inspections:
Files:
ossp-pkg/cfg/perl/cfg.pm      1.7 -> 1.8     129 inserted, 6 deleted
ossp-pkg/cfg/perl/cfg.pod      1.3 -> 1.4     52 inserted, 1 deleted

ossp-pkg/cfg/perl/cfg.pm 1.7 -> 1.8

--- cfg.pm       2004/11/20 17:09:35     1.7
+++ cfg.pm       2004/11/27 19:40:33     1.8
@@ -464,17 +464,17 @@
 }
 
 #   unpack Perl complex data structure from node tree
-sub unpack ($) {
-    my ($self) = @_;
+sub unpack ($;%) {
+    my ($self, %arg) = @_;
     my $cfg = $self->{-cfg};
 
     #   start traversing the node tree from the root node
     my $root = $cfg->node_root();
-    my $tree = &traverse($cfg, $root);
+    my $tree = &traverse($cfg, \%arg, $root);
 
     #   node processing function
     sub traverse {
-        my ($cfg, $node) = @_;
+        my ($cfg, $arg, $node) = @_;
         return if (not(ref($node) and $$node != 0));
         my $type = $cfg->node_get($node, $cfg->CFG_NODE_ATTR_TYPE);
         if ($type == $cfg->CFG_NODE_TYPE_SEQ) {
@@ -482,10 +482,15 @@
             my $seq = [];
             $node = $cfg->node_get($node, $cfg->CFG_NODE_ATTR_CHILD1);
             while (ref($node) and $$node != 0) {
-                my $tree = &traverse($cfg, $node); # recursion
+                my $tree = &traverse($cfg, $arg, $node); # recursion
                 push(@{$seq}, $tree) if (defined($tree));
                 $node = $cfg->node_get($node, $cfg->CFG_NODE_ATTR_RBROTH);
             }
+            #   optional post-processing of data structure
+            $seq = &unpack_postproc_merge   ($seq, $arg->{-merge})   if (defined($arg->{-merge}));
+            $seq = &unpack_postproc_index   ($seq, $arg->{-index})   if (defined($arg->{-index}));
+            $seq = &unpack_postproc_strip   ($seq, $arg->{-strip})   if (defined($arg->{-strip}));
+            $seq = &unpack_postproc_flatten ($seq, $arg->{-flatten}) if (defined($arg->{-flatten}));
             return $seq;
         }
         elsif ($type == $cfg->CFG_NODE_TYPE_DIR) {
@@ -493,7 +498,7 @@
             my $dir = [];
             $node = $cfg->node_get($node, $cfg->CFG_NODE_ATTR_CHILD1);
             while (ref($node) and $$node != 0) {
-                my $tree = &traverse($cfg, $node); # recursion
+                my $tree = &traverse($cfg, $arg, $node); # recursion
                 push(@{$dir}, $tree) if (defined($tree));
                 $node = $cfg->node_get($node, $cfg->CFG_NODE_ATTR_RBROTH);
             }
@@ -508,5 +513,123 @@
     return $tree;
 }
 
+#   unpack helper function:
+#   optionally merge arguments of DIRectives starting with same TOKen.
+#   complexity: O(n) (actually 3*n)
+sub unpack_postproc_merge {
+    my ($seq, $re) = @_;
+    
+    #   pass 1: determine directive names and their 
+    #   occurences in sequence. complexity: O(n)
+    my $dir = {};
+    for (my $i = 0; $i < @{$seq}; $i++) {
+        if (    ref($seq->[$i]) eq 'ARRAY' 
+            and not ref($seq->[$i]->[0])
+            and $seq->[$i]->[0] =~ m/^$re$/s) {
+            my $name = $seq->[$i]->[0];
+            $dir->{$name} = [] if (not defined($dir->{$name}));
+            push(@{$dir->{$name}}, $i);
+        }
+    }
+
+    #   pass 2: append arguments of second and following sequences
+    #   to arguments of first directive. complexity: O(n)
+    foreach my $name (keys(%{$dir})) {
+        if (@{$dir->{$name}} > 1) {
+            my $f = $dir->{$name}->[0];
+            foreach my $n (@{$dir->{$name}}[1..$#{$dir->{$name}}]) {
+                foreach my $tok (@{$seq->[$n]}[1..$#{$seq->[$n]}]) {
+                    push(@{$seq->[$f]}, $tok);
+                }
+                $seq->[$n] = undef;
+            }
+        }
+    }
+
+    #   pass 3: shrink sequence by eliminating removing entries
+    #   complexity: O(n)
+    for (my $i = 0; $i < @{$seq}; $i++) {
+        if (not defined($seq->[$i])) {
+            splice(@{$seq}, $i, 1);
+            $i--; # compensate for splice operation
+        }
+    }
+
+    return $seq;
+}
+
+#   unpack helper function:
+#   optionally annotate SEQuence nodes with a DIRective name indexing hash
+#   complexity: O(n)
+sub unpack_postproc_index {
+    my ($seq, $re) = @_;
+
+    #   create an index hash
+    my $idx = {};
+    for (my $i = 0; $i < @{$seq}; $i++) {
+        if (ref($seq->[$i]) eq 'ARRAY') {
+            #   determine name of directive, i.e.,
+            #   first non-sub-sequence element
+            my $name = {};
+            for (my $j = 0; $j < @{$seq->[$j]}; $j++) {
+                $name = $seq->[$i]->[$j];
+                last if (not ref($name));
+            }
+            if (    not ref($name)
+                and $name =~ m/^$re$/s 
+                and not defined($idx->{$name})) {
+                #    remember position of directive's
+                #    first occurrence in sequence
+                $idx->{$name} = ($i + 1);
+            }
+        }
+    }
+
+    #   insert index at first position of sequence array following
+    #   perlref(1)'s "Pseudo-hashes: Using an array as a hash" approach.
+    unshift(@{$seq}, $idx);
+
+    return $seq;
+}
+
+#   unpack helper function:
+#   strip directive command/start token
+#   complexity: O(n)
+sub unpack_postproc_strip {
+    my ($seq, $re) = @_;
+
+    foreach my $dir (@{$seq}) {
+        if (    ref($dir) eq 'ARRAY'
+            and $dir->[0] =~ m/^$re$/s) {
+            #   remove first token of directive array
+            shift(@{$dir});
+        }
+    }
+
+    return $seq;
+}
+
+#   unpack helper function:
+#   flatten empty and single token directives
+sub unpack_postproc_flatten {
+    my ($seq, $empty) = @_;
+
+    for (my $i = 0; $i < @{$seq}; $i++) {
+        if (ref($seq->[$i]) eq 'ARRAY') {
+            if (@{$seq->[$i]} == 0) {
+                #   replace empty directive array
+                #   (can exist after "dirstrip" processing!)
+                $seq->[$i] = $empty;
+            }
+            elsif (@{$seq->[$i]} == 1) {
+                #   simplify single token directive array
+                $seq->[$i] = $seq->[$i]->[0];
+            }
+        }
+    }
+
+    return $seq;
+}
+
 1;
 


ossp-pkg/cfg/perl/cfg.pod 1.3 -> 1.4

--- cfg.pod      2004/11/20 17:09:35     1.3
+++ cfg.pod      2004/11/27 19:40:33     1.4
@@ -59,12 +59,63 @@
 
 =item C<$txt  = $cfg-E<gt>>B<format>C<();>
 
-=item C<$tree = $cfg-E<gt>>B<unpack>C<();>
+=item C<$tree = $cfg-E<gt>>B<unpack>C<(>[I<options]C<);>
 
 =item C<undef $cfg;>
 
 =back
 
+The B<unpack()> I<options> are:
+
+=over 4
+
+=item C<-merge =E<gt> >I<dir-regex>
+
+Merge all directives with a start token matching against I<dir-regex> by
+appending the argument tokens of second and following directives to the
+first directive. The resulting data structure is changed as following:
+ 
+  before: [['foo','foo1'],['bar'],['foo','foo2']]
+  after:  [['foo','foo1','foo2'],['bar']]
+
+=item C<-index =E<gt> >I<dir-regex>
+
+Add all directives start tokens matching against I<dir-regex> to a
+pseudo-directive element pointing to the first occurrence of the
+directive.
+
+  before: [['foo','foo1'],['bar','bar1'],['quux','quux1']]
+  after:  [{'foo'=>1,'bar'=>2,'quux'=>3},
+           ['foo','foo1'],['bar','bar1'],['quux','quux1']]
+
+This leverages perlref(1)'s "Pseudo-hashes: Using an array as a hash"
+approach to allow one to directly access directives by name. The
+following are then equivalent:
+
+  ...->{'bar'}
+  ...->[2]
+
+=item C<-strip =E<gt> >I<dir-regex>
+
+Strip start token of all directives where it matches the I<dir-regex>.
+This is useful in combination with B<-index> only.
+
+  before: [['foo','foo1'],['bar','bar1'],['quux','quux1']]
+  after:  [['foo1'],['bar1'],['quux1']]
+
+=item C<-flatten =E<gt> >I<boolean-value>
+
+Flatten the tokens of all directives with a start token matching
+against I<dir-regex> by replacing empty arrays with I<boolean-value>,
+arrays containing a single token with just the token and leaving arrays
+containing more than one token as is. This is useful in combination with
+B<-index> and B<-strip>.
+
+  before: [['foo','foo1'],['bar'],['']]
+  after:  [['foo','foo1'],'bar',1]
+
+=back
+
 =head2 OO-STYLE API
 
 The OO-style API is a wrapper around the C-style API and intended for

CVSTrac 2.0.1