#!/usr/bin/perl ## ## srml2sxml -- Sugar SRML to SXML Translation ## Copyright (c) 1999-2002 Ralf S. Engelschall ## require 5.005; $|++; my $tags1d = { '==' => [ '', '' ], '--' => [ '', '' ], '~~' => [ '', '' ], '..' => [ '', '' ], '**' => [ '', '' ], '//' => [ '', '' ], '__' => [ '' ], '[[' => [ '', '' ], '||' => [ '', '' ], '!!' => [ '', '' ], '(+' => [ '', '' ], '->' => [ '', '' ], }; my $alias1d = { '+)' => '(+', '<-' => '->', ']]' => '[[', }; my $tags2d = { '==' => [ '

', '

' ], '--' => [ '

', '

' ], '~~' => [ '

', '

' ], '..' => [ '

', '

' ], '**' => [ '', '' ], '//' => [ '', '' ], '__' => [ '' ], '[[' => [ '', '' ], '||' => [ '
',             '
' ], '!!' => [ '', '' ], '>>' => [ '
', '
' ], '((' => [ '', '' ], '%%' => [ '', '' ], '))' => [ '', '' ], '++' => [ '', '
' ], '##' => [ '', '' ], '$$' => [ '', '' ], 'o ' => [ '', '' ], '- ' => [ '', '' ], 'o.' => [ '', '' ], '-.' => [ '', '' ], '::' => [ '', '' ], 'Title:' => [ '', '' ], 'Author:' => [ '', '' ], 'Genesis:' => [ '', '' ], 'Date:' => [ '', '' ], 'Abstract:' => [ '', '' ], }; my $alias2d = { '[]' => '[[', }; my $escapes = { '&' => '&', '<' => '<', '>' => '>', }; # assemble mega-regex which matches all 1d-tags my $tag1d = &assemble_regex((keys(%{$tags1d}),keys(%{$alias1d}))); my $tag2d = &assemble_regex((keys(%{$tags2d}),keys(%{$alias2d}))); sub assemble_regex { my (@tags) = @_; my $regex = '(?:'; foreach $tag (sort(@tags)) { $regex .= quotemeta($tag)."+|"; } $regex =~ s/\+\|$//; $regex .= ')'; return $regex; } my $escape = '(?:'; foreach $e (sort(%{$escapes})) { $escape .= quotemeta($e)."|"; } $escape =~ s/\|$//; $escape .= ')'; sub expandtab { my ($ts, $str) = @_; my $i = 0; my $k; $str =~ s|(.) |($1 eq "\t" ? ($k = $i % $ts, $i += ($ts - $k), "*" x ($ts - $k) ) : ($i++, $1) ) |sgex; return $str; } # ::= * # ::= * # ::= | <2d-block> # <2d-block> ::= <2d-tag> # <2d-tag> ::= m/^\s*XY+\s+/ # ::= <1d-block>* # # und koennen degenerieren # auf genau einen, d.h. z.B. # == 1 oder == 1 <1d-block> # # 1d-block: # foo bar # **baz # quux # # 2d-block # ** baz # quux # # -> blank lines zwischen 2d-blocks koennen entfallen! # -> blank lines wenn da sind markieren paragraphen # und schliessen 1d block ab sub parse_srml { my ($srml) = @_; my $sxml = ''; while ($srml ne '') { # stop at end of input if ($srml =~ m|^(\s*)$|s) { $sxml .= $1; $srml = ''; last; } # pass through blank lines elsif ($srml =~ m|^((?: *\n)+)(.*)$|s) { $sxml .= $1; $srml = $2; } # recognize 2-dimensional block start elsif ($srml =~ m|^( *)($tag2d)( +[^\n]*)?(.*)$|s) { my ($prefix, $tag, $block) = ($1, $2, $3); $block = $prefix . $block; $srml = $4; # reduce tag to canonical 2-character form $tag =~ s|^(.)(.)\2+$|$1$2|s; if (defined($alias2d->{$tag})) { $tag = $alias2d->{$tag}; } # gather block lines # (same or less indent indicates end) my $minindent = length($prefix)+1; while ($srml =~ m/^((?: *| {$minindent,}[^\n]+)\n)(.*)$/s) { $block .= $1; $srml = $2; } $block = &parse_srml($block); $block =~ s|^(\s*)(.*?)(\s*)$| $1 . $tags2d->{$tag}->[0] . $2 . $tags2d->{$tag}->[1] . $3|se; $sxml .= $block; } # anything else we treat as a 1-dimensional block start else { # gather block lines # (a blank line indicates end) my $block = ''; while ($srml =~ m/^( *[^ \n][^\n]*\n)(.*)$/s) { $block .= $1; $srml = $2; } print STDERR "1D<<$block>>\n"; # escape special XML characters $block =~ s|($escape)|$escapes->{$1}|sge; # sequencially walk through 1-dimensional block my $sxml_local = ''; my @active = (); while ($block =~ m|^(.*?)($tag1d)(.*)$|s) { my ($prolog, $tag, $epilog) = ($1, $2, $3); $sxml_local .= $prolog; $block = $epilog; # reduce tag to canonical 2-character form $tag =~ s|^(.)(.)\2+$|$1$2|s; if (defined($alias1d->{$tag})) { $tag = $alias1d->{$tag}; } # look on stack of still active tags my $active = ''; if ($#active > -1) { $active = pop(@active); } if ($active eq $tag) { # close the currently active tag $sxml_local .= $tags1d->{$tag}->[1]; } else { # open a new active tag if ($block =~ m|^\s*$|s) { # tag wraps around $sxml_local =~ s|^(\s*)(.*?)(\s*)$| $1 . $tags1d->{$tag}->[0] . $2 . $tags1d->{$tag}->[1] . $3|se; } else { $sxml_local .= $tags1d->{$tag}->[0]; push(@active, $active) if ($active ne ''); push(@active, $tag); } } } # post-processing for implicitly to-be-closed tags my $trailer = ''; if ($block =~ m|^(.*)(\s+)$|s) { ($block, $trailer) = ($1, $2); } $sxml_local .= $block; while ($#active > -1) { my $active = pop(@active); $sxml_local .= $tags1d->{$active}->[1]; } $sxml_local .= $trailer; $sxml .= $sxml_local; } } return $sxml; } my $srml = ''; $srml .= $_ while (); $srml =~ s|\n$||s; $srml .= "\n"; $srml = &expandtab(8, $srml); my $sxml = ''; $sxml .= ""; $sxml .= &parse_srml($srml); $sxml .= "\n"; print $sxml;