OSSP CVS Repository |
|
#!/usr/bin/perl ## ## srml2sxml -- Sugar SRML to SXML Translation ## Copyright (c) 1999-2002 Ralf S. Engelschall <rse@engelschall.com> ## require 5.005; $|++; my $tags1d = { '==' => [ '<span class=h1>', '</span>' ], '--' => [ '<span class=h2>', '</span>' ], '~~' => [ '<span class=h3>', '</span>' ], '..' => [ '<span class=h4>', '</span>' ], '**' => [ '<b>', '</b>' ], '//' => [ '<i>', '</i>' ], '__' => [ '<ul>', '</ul>' ], '[[' => [ '<boxed>', '</boxed>' ], '||' => [ '<tt>', '</tt>' ], '!!' => [ '<verbatim>', '</verbatim>' ], '(+' => [ '<anchor>', '</anchor>' ], '->' => [ '<xref>', '</xref>' ], }; my $alias1d = { '+)' => '(+', '<-' => '->', ']]' => '[[', }; my $tags2d = { '==' => [ '<h1>', '</h1>' ], '--' => [ '<h2>', '</h2>' ], '~~' => [ '<h3>', '</h3>' ], '..' => [ '<h4>', '</h4>' ], '**' => [ '<b>', '</b>' ], '//' => [ '<i>', '</i>' ], '__' => [ '<ul>', '</ul>' ], '[[' => [ '<boxed>', '</boxed>' ], '||' => [ '<pre>', '</pre>' ], '!!' => [ '<verbatim>', '</verbatim>' ], '>>' => [ '<blockquote>', '</blockquote>' ], '((' => [ '<align to=left>', '</align>' ], '%%' => [ '<align to=center>', '</align>' ], '))' => [ '<align to=right>', '</align>' ], '++' => [ '<table>', '</table>' ], '##' => [ '<comment>', '</comment>' ], '$$' => [ '<list>', '</list>' ], 'o ' => [ '<item type=ul>', '</item>' ], '- ' => [ '<item type=ul>', '</item>' ], 'o.' => [ '<item type=ol>', '</item>' ], '-.' => [ '<item type=ol>', '</item>' ], '::' => [ '<item type=dl>', '</item>' ], 'Title:' => [ '<title>', '</title>' ], 'Author:' => [ '<author>', '</author>' ], 'Genesis:' => [ '<genesis>', '</genesis>' ], 'Date:' => [ '<date>', '</date>' ], 'Abstract:' => [ '<abstract>', '</abstract>' ], }; my $alias2d = { '[]' => '[[', }; my $escapes = { '&' => '&', '<' => '<', '>' => '>', }; # assemble mega-regex which matches all 1d-tags my $tag1d = &assemble_regex((keys(%{$tags1d}),keys(%{$alias1d}))); my $tag2d = &assemble_regex((keys(%{$tags2d}),keys(%{$alias2d}))); sub assemble_regex { my (@tags) = @_; my $regex = '(?:'; foreach $tag (sort(@tags)) { $regex .= quotemeta($tag)."+|"; } $regex =~ s/\+\|$//; $regex .= ')'; return $regex; } my $escape = '(?:'; foreach $e (sort(%{$escapes})) { $escape .= quotemeta($e)."|"; } $escape =~ s/\|$//; $escape .= ')'; sub expandtab { my ($ts, $str) = @_; my $i = 0; my $k; $str =~ s|(.) |($1 eq "\t" ? ($k = $i % $ts, $i += ($ts - $k), "*" x ($ts - $k) ) : ($i++, $1) ) |sgex; return $str; } # <document> ::= <blockgroup>* # <blockgroup> ::= <block>* # <block> ::= <para> | <2d-block> # <2d-block> ::= <2d-tag> <document> # <2d-tag> ::= m/^\s*XY+\s+/ # <para> ::= <1d-block>* # # <blockgroup> und <para> koennen degenerieren # auf genau einen, d.h. z.B. # <blockgroup> == 1 <para> oder <para> == 1 <1d-block> # # 1d-block: # foo bar # **baz # quux # # 2d-block # ** baz # quux # # -> blank lines zwischen 2d-blocks koennen entfallen! # -> blank lines wenn da sind markieren paragraphen # und schliessen 1d block ab sub parse_srml { my ($srml) = @_; my $sxml = ''; while ($srml ne '') { # stop at end of input if ($srml =~ m|^(\s*)$|s) { $sxml .= $1; $srml = ''; last; } # pass through blank lines elsif ($srml =~ m|^((?: *\n)+)(.*)$|s) { $sxml .= $1; $srml = $2; } # recognize 2-dimensional block start elsif ($srml =~ m|^( *)($tag2d)( +[^\n]*)?(.*)$|s) { my ($prefix, $tag, $block) = ($1, $2, $3); $block = $prefix . $block; $srml = $4; # reduce tag to canonical 2-character form $tag =~ s|^(.)(.)\2+$|$1$2|s; if (defined($alias2d->{$tag})) { $tag = $alias2d->{$tag}; } # gather block lines # (same or less indent indicates end) my $minindent = length($prefix)+1; while ($srml =~ m/^((?: *| {$minindent,}[^\n]+)\n)(.*)$/s) { $block .= $1; $srml = $2; } $block = &parse_srml($block); $block =~ s|^(\s*)(.*?)(\s*)$| $1 . $tags2d->{$tag}->[0] . $2 . $tags2d->{$tag}->[1] . $3|se; $sxml .= $block; } # anything else we treat as a 1-dimensional block start else { # gather block lines # (a blank line indicates end) my $block = ''; while ($srml =~ m/^( *[^ \n][^\n]*\n)(.*)$/s) { $block .= $1; $srml = $2; } print STDERR "1D<<$block>>\n"; # escape special XML characters $block =~ s|($escape)|$escapes->{$1}|sge; # sequencially walk through 1-dimensional block my $sxml_local = ''; my @active = (); while ($block =~ m|^(.*?)($tag1d)(.*)$|s) { my ($prolog, $tag, $epilog) = ($1, $2, $3); $sxml_local .= $prolog; $block = $epilog; # reduce tag to canonical 2-character form $tag =~ s|^(.)(.)\2+$|$1$2|s; if (defined($alias1d->{$tag})) { $tag = $alias1d->{$tag}; } # look on stack of still active tags my $active = ''; if ($#active > -1) { $active = pop(@active); } if ($active eq $tag) { # close the currently active tag $sxml_local .= $tags1d->{$tag}->[1]; } else { # open a new active tag if ($block =~ m|^\s*$|s) { # tag wraps around $sxml_local =~ s|^(\s*)(.*?)(\s*)$| $1 . $tags1d->{$tag}->[0] . $2 . $tags1d->{$tag}->[1] . $3|se; } else { $sxml_local .= $tags1d->{$tag}->[0]; push(@active, $active) if ($active ne ''); push(@active, $tag); } } } # post-processing for implicitly to-be-closed tags my $trailer = ''; if ($block =~ m|^(.*)(\s+)$|s) { ($block, $trailer) = ($1, $2); } $sxml_local .= $block; while ($#active > -1) { my $active = pop(@active); $sxml_local .= $tags1d->{$active}->[1]; } $sxml_local .= $trailer; $sxml .= $sxml_local; } } return $sxml; } my $srml = ''; $srml .= $_ while (<STDIN>); $srml =~ s|\n$||s; $srml .= "\n"; $srml = &expandtab(8, $srml); my $sxml = ''; $sxml .= "<sugar>"; $sxml .= &parse_srml($srml); $sxml .= "</sugar>\n"; print $sxml;