*** /dev/null Sat Nov 23 01:37:31 2024
--- - Sat Nov 23 01:37:37 2024
***************
*** 0 ****
--- 1,265 ----
+ #!/usr/bin/perl
+ ##
+ ## srml2sxml -- Sugar SRML to SXML Translation
+ ## Copyright (c) 1999-2002 Ralf S. Engelschall <rse@engelschall.com>
+ ##
+
+ require 5.005;
+ $|++;
+
+ my $tags1d = {
+ '==' => [ '<span class=h1>', '</span>' ],
+ '--' => [ '<span class=h2>', '</span>' ],
+ '~~' => [ '<span class=h3>', '</span>' ],
+ '..' => [ '<span class=h4>', '</span>' ],
+
+ '**' => [ '<b>', '</b>' ],
+ '//' => [ '<i>', '</i>' ],
+ '__' => [ '<ul>', '</ul>' ],
+ '[[' => [ '<boxed>', '</boxed>' ],
+ '||' => [ '<tt>', '</tt>' ],
+ '!!' => [ '<verbatim>', '</verbatim>' ],
+
+ '(+' => [ '<anchor>', '</anchor>' ],
+ '->' => [ '<xref>', '</xref>' ],
+ };
+
+ my $alias1d = {
+ '+)' => '(+',
+ '<-' => '->',
+ ']]' => '[[',
+ };
+
+ my $tags2d = {
+ '==' => [ '<h1>', '</h1>' ],
+ '--' => [ '<h2>', '</h2>' ],
+ '~~' => [ '<h3>', '</h3>' ],
+ '..' => [ '<h4>', '</h4>' ],
+ '**' => [ '<b>', '</b>' ],
+ '//' => [ '<i>', '</i>' ],
+ '__' => [ '<ul>', '</ul>' ],
+ '[[' => [ '<boxed>', '</boxed>' ],
+ '||' => [ '<pre>', '</pre>' ],
+ '!!' => [ '<verbatim>', '</verbatim>' ],
+
+ '>>' => [ '<blockquote>', '</blockquote>' ],
+ '((' => [ '<align to=left>', '</align>' ],
+ '%%' => [ '<align to=center>', '</align>' ],
+ '))' => [ '<align to=right>', '</align>' ],
+
+ '++' => [ '<table>', '</table>' ],
+ '##' => [ '<comment>', '</comment>' ],
+
+ '$$' => [ '<list>', '</list>' ],
+ 'o ' => [ '<item type=ul>', '</item>' ],
+ '- ' => [ '<item type=ul>', '</item>' ],
+ 'o.' => [ '<item type=ol>', '</item>' ],
+ '-.' => [ '<item type=ol>', '</item>' ],
+ '::' => [ '<item type=dl>', '</item>' ],
+
+ 'Title:' => [ '<title>', '</title>' ],
+ 'Author:' => [ '<author>', '</author>' ],
+ 'Genesis:' => [ '<genesis>', '</genesis>' ],
+ 'Date:' => [ '<date>', '</date>' ],
+ 'Abstract:' => [ '<abstract>', '</abstract>' ],
+ };
+
+ my $alias2d = {
+ '[]' => '[[',
+ };
+
+ my $escapes = {
+ '&' => '&',
+ '<' => '<',
+ '>' => '>',
+ };
+
+ # assemble mega-regex which matches all 1d-tags
+ my $tag1d = &assemble_regex((keys(%{$tags1d}),keys(%{$alias1d})));
+ my $tag2d = &assemble_regex((keys(%{$tags2d}),keys(%{$alias2d})));
+ sub assemble_regex {
+ my (@tags) = @_;
+ my $regex = '(?:';
+ foreach $tag (sort(@tags)) {
+ $regex .= quotemeta($tag)."+|";
+ }
+ $regex =~ s/\+\|$//;
+ $regex .= ')';
+ return $regex;
+ }
+
+ my $escape = '(?:';
+ foreach $e (sort(%{$escapes})) {
+ $escape .= quotemeta($e)."|";
+ }
+ $escape =~ s/\|$//;
+ $escape .= ')';
+
+ sub expandtab {
+ my ($ts, $str) = @_;
+ my $i = 0;
+ my $k;
+ $str =~ s|(.)
+ |($1 eq "\t"
+ ? ($k = $i % $ts,
+ $i += ($ts - $k),
+ "*" x ($ts - $k) )
+ : ($i++, $1)
+ )
+ |sgex;
+ return $str;
+ }
+
+ # <document> ::= <blockgroup>*
+ # <blockgroup> ::= <block>*
+ # <block> ::= <para> | <2d-block>
+ # <2d-block> ::= <2d-tag> <document>
+ # <2d-tag> ::= m/^\s*XY+\s+/
+ # <para> ::= <1d-block>*
+ #
+ # <blockgroup> und <para> koennen degenerieren
+ # auf genau einen, d.h. z.B.
+ # <blockgroup> == 1 <para> oder <para> == 1 <1d-block>
+ #
+ # 1d-block:
+ # foo bar
+ # **baz
+ # quux
+ #
+ # 2d-block
+ # ** baz
+ # quux
+ #
+ # -> blank lines zwischen 2d-blocks koennen entfallen!
+ # -> blank lines wenn da sind markieren paragraphen
+ # und schliessen 1d block ab
+
+ sub parse_srml {
+ my ($srml) = @_;
+ my $sxml = '';
+
+ while ($srml ne '') {
+ # stop at end of input
+ if ($srml =~ m|^(\s*)$|s) {
+ $sxml .= $1;
+ $srml = '';
+ last;
+ }
+
+ # pass through blank lines
+ elsif ($srml =~ m|^((?: *\n)+)(.*)$|s) {
+ $sxml .= $1;
+ $srml = $2;
+ }
+
+ # recognize 2-dimensional block start
+ elsif ($srml =~ m|^( *)($tag2d)( +[^\n]*)?(.*)$|s) {
+ my ($prefix, $tag, $block) = ($1, $2, $3);
+ $block = $prefix . $block;
+ $srml = $4;
+
+ # reduce tag to canonical 2-character form
+ $tag =~ s|^(.)(.)\2+$|$1$2|s;
+ if (defined($alias2d->{$tag})) {
+ $tag = $alias2d->{$tag};
+ }
+
+ # gather block lines
+ # (same or less indent indicates end)
+ my $minindent = length($prefix)+1;
+ while ($srml =~ m/^((?: *| {$minindent,}[^\n]+)\n)(.*)$/s) {
+ $block .= $1;
+ $srml = $2;
+ }
+ $block = &parse_srml($block);
+ $block =~ s|^(\s*)(.*?)(\s*)$|
+ $1 . $tags2d->{$tag}->[0] .
+ $2 . $tags2d->{$tag}->[1] . $3|se;
+ $sxml .= $block;
+ }
+
+ # anything else we treat as a 1-dimensional block start
+ else {
+ # gather block lines
+ # (a blank line indicates end)
+ my $block = '';
+ while ($srml =~ m/^( *[^ \n][^\n]*\n)(.*)$/s) {
+ $block .= $1;
+ $srml = $2;
+ }
+
+ print STDERR "1D<<$block>>\n";
+
+ # escape special XML characters
+ $block =~ s|($escape)|$escapes->{$1}|sge;
+
+ # sequencially walk through 1-dimensional block
+ my $sxml_local = '';
+ my @active = ();
+ while ($block =~ m|^(.*?)($tag1d)(.*)$|s) {
+ my ($prolog, $tag, $epilog) = ($1, $2, $3);
+ $sxml_local .= $prolog;
+ $block = $epilog;
+
+ # reduce tag to canonical 2-character form
+ $tag =~ s|^(.)(.)\2+$|$1$2|s;
+ if (defined($alias1d->{$tag})) {
+ $tag = $alias1d->{$tag};
+ }
+
+ # look on stack of still active tags
+ my $active = '';
+ if ($#active > -1) {
+ $active = pop(@active);
+ }
+
+ if ($active eq $tag) {
+ # close the currently active tag
+ $sxml_local .= $tags1d->{$tag}->[1];
+ }
+ else {
+ # open a new active tag
+ if ($block =~ m|^\s*$|s) {
+ # tag wraps around
+ $sxml_local =~ s|^(\s*)(.*?)(\s*)$|
+ $1 . $tags1d->{$tag}->[0] .
+ $2 . $tags1d->{$tag}->[1] . $3|se;
+ }
+ else {
+ $sxml_local .= $tags1d->{$tag}->[0];
+ push(@active, $active) if ($active ne '');
+ push(@active, $tag);
+ }
+ }
+ }
+
+ # post-processing for implicitly to-be-closed tags
+ my $trailer = '';
+ if ($block =~ m|^(.*)(\s+)$|s) {
+ ($block, $trailer) = ($1, $2);
+ }
+ $sxml_local .= $block;
+ while ($#active > -1) {
+ my $active = pop(@active);
+ $sxml_local .= $tags1d->{$active}->[1];
+ }
+ $sxml_local .= $trailer;
+
+ $sxml .= $sxml_local;
+ }
+ }
+ return $sxml;
+ }
+
+ my $srml = '';
+ $srml .= $_ while (<STDIN>);
+ $srml =~ s|\n$||s;
+ $srml .= "\n";
+ $srml = &expandtab(8, $srml);
+
+ my $sxml = '';
+ $sxml .= "<sugar>";
+ $sxml .= &parse_srml($srml);
+ $sxml .= "</sugar>\n";
+ print $sxml;
+
|