OSSP CVS Repository

ossp - ossp-pkg/sugar/srml2sxml
Not logged in
[Honeypot]  [Browse]  [Directory]  [Home]  [Login
[Reports]  [Search]  [Ticket]  [Timeline
  [Raw

ossp-pkg/sugar/srml2sxml
#!/usr/bin/perl
##
##  srml2sxml -- Sugar SRML to SXML Translation
##  Copyright (c) 1999-2002 Ralf S. Engelschall <rse@engelschall.com> 
##

require 5.005;
$|++;

my $tags1d = {
    '=='        => [ '<span class=h1>',   '</span>'       ],
    '--'        => [ '<span class=h2>',   '</span>'       ],
    '~~'        => [ '<span class=h3>',   '</span>'       ],
    '..'        => [ '<span class=h4>',   '</span>'       ],

    '**'        => [ '<b>',               '</b>'          ],
    '//'        => [ '<i>',               '</i>'          ],
    '__'        => [ '<ul>',              '</ul>'         ],
    '[['        => [ '<boxed>',           '</boxed>'      ],
    '||'        => [ '<tt>',              '</tt>'         ],
    '!!'        => [ '<verbatim>',        '</verbatim>'   ],

    '(+'        => [ '<anchor>',          '</anchor>'     ],
    '->'        => [ '<xref>',            '</xref>'       ],
};

my $alias1d = {
    '+)' => '(+',
    '<-' => '->',
    ']]' => '[[',
};

my $tags2d = {
    '=='        => [ '<h1>',              '</h1>'         ],
    '--'        => [ '<h2>',              '</h2>'         ],
    '~~'        => [ '<h3>',              '</h3>'         ],
    '..'        => [ '<h4>',              '</h4>'         ],
    '**'        => [ '<b>',               '</b>'          ],
    '//'        => [ '<i>',               '</i>'          ],
    '__'        => [ '<ul>',              '</ul>'         ],
    '[['        => [ '<boxed>',           '</boxed>'      ],
    '||'        => [ '<pre>',             '</pre>'        ],
    '!!'        => [ '<verbatim>',        '</verbatim>'   ],

    '>>'        => [ '<blockquote>',      '</blockquote>' ],
    '(('        => [ '<align to=left>',   '</align>'      ],
    '%%'        => [ '<align to=center>', '</align>'      ],
    '))'        => [ '<align to=right>',  '</align>'      ],

    '++'        => [ '<table>',           '</table>'      ],
    '##'        => [ '<comment>',         '</comment>'    ],

    '$$'        => [ '<list>',            '</list>'       ],
    'o '        => [ '<item type=ul>',    '</item>'       ],
    '- '        => [ '<item type=ul>',    '</item>'       ],
    'o.'        => [ '<item type=ol>',    '</item>'       ],
    '-.'        => [ '<item type=ol>',    '</item>'       ],
    '::'        => [ '<item type=dl>',    '</item>'       ],

    'Title:'    => [ '<title>',           '</title>'      ],
    'Author:'   => [ '<author>',          '</author>'     ],
    'Genesis:'  => [ '<genesis>',         '</genesis>'    ],
    'Date:'     => [ '<date>',            '</date>'       ],
    'Abstract:' => [ '<abstract>',        '</abstract>'   ],
};

my $alias2d = {
    '[]' => '[[',
};

my $escapes = {
    '&' => '&amp;',
    '<' => '&lt;',
    '>' => '&gt;',
};

#   assemble mega-regex which matches all 1d-tags
my $tag1d = &assemble_regex((keys(%{$tags1d}),keys(%{$alias1d})));
my $tag2d = &assemble_regex((keys(%{$tags2d}),keys(%{$alias2d})));
sub assemble_regex {
    my (@tags) = @_;
    my $regex = '(?:';
    foreach $tag (sort(@tags)) {
        $regex .= quotemeta($tag)."+|";
    }
    $regex =~ s/\+\|$//;
    $regex .= ')';
    return $regex;
}

my $escape = '(?:';
foreach $e (sort(%{$escapes})) {
    $escape .= quotemeta($e)."|";
}
$escape =~ s/\|$//;
$escape .= ')';

sub expandtab {
    my ($ts, $str) = @_;
    my $i = 0;
    my $k;
    $str =~ s|(.)
             |($1 eq "\t" 
               ? ($k = $i % $ts, 
                  $i += ($ts - $k), 
                  "*" x ($ts - $k) ) 
               : ($i++, $1)
              )
             |sgex;
    return $str;
}

#   <document>     ::= <blockgroup>* 
#   <blockgroup>   ::= <block>*
#   <block>        ::= <para> | <2d-block>
#   <2d-block>     ::= <2d-tag> <document>
#   <2d-tag>       ::= m/^\s*XY+\s+/
#   <para>         ::= <1d-block>*
#
#   <blockgroup> und <para> koennen degenerieren
#   auf genau einen, d.h. z.B.
#   <blockgroup> == 1 <para>  oder <para> == 1 <1d-block>
#   
#   1d-block:
#   foo bar
#   **baz 
#   quux
#
#   2d-block
#   ** baz
#   quux
#
#   -> blank lines zwischen 2d-blocks koennen entfallen!
#   -> blank lines wenn da sind markieren paragraphen
#      und schliessen 1d block ab

sub parse_srml {
    my ($srml) = @_;
    my $sxml = '';

    while ($srml ne '') {
        #   stop at end of input
        if ($srml =~ m|^(\s*)$|s) {
            $sxml .= $1; 
            $srml = '';
            last;
        }

        #   pass through blank lines
        elsif ($srml =~ m|^((?: *\n)+)(.*)$|s) {
            $sxml .= $1;
            $srml  = $2;
        }

        #   recognize 2-dimensional block start
        elsif ($srml =~ m|^( *)($tag2d)( +[^\n]*)?(.*)$|s) {
            my ($prefix, $tag, $block) = ($1, $2, $3);
            $block = $prefix . $block;
            $srml = $4;

            #   reduce tag to canonical 2-character form
            $tag =~ s|^(.)(.)\2+$|$1$2|s;
            if (defined($alias2d->{$tag})) {
                $tag = $alias2d->{$tag};
            }

            #   gather block lines
            #   (same or less indent indicates end)
            my $minindent = length($prefix)+1;
            while ($srml =~ m/^((?: *| {$minindent,}[^\n]+)\n)(.*)$/s) {
                $block .= $1;
                $srml   = $2;
            }
            $block = &parse_srml($block);
            $block =~ s|^(\s*)(.*?)(\s*)$|
                        $1 . $tags2d->{$tag}->[0] . 
                        $2 . $tags2d->{$tag}->[1] . $3|se;
            $sxml .= $block;
        }

        #   anything else we treat as a 1-dimensional block start
        else {
            #   gather block lines
            #   (a blank line indicates end)
            my $block = '';
            while ($srml =~ m/^( *[^ \n][^\n]*\n)(.*)$/s) {
                $block .= $1;
                $srml   = $2;
            }

            print STDERR "1D<<$block>>\n";

            #   escape special XML characters
            $block =~ s|($escape)|$escapes->{$1}|sge;

            #   sequencially walk through 1-dimensional block
            my $sxml_local = '';
            my @active = ();
            while ($block =~ m|^(.*?)($tag1d)(.*)$|s) {
                my ($prolog, $tag, $epilog) = ($1, $2, $3);
                $sxml_local .= $prolog;
                $block = $epilog;

                #   reduce tag to canonical 2-character form
                $tag =~ s|^(.)(.)\2+$|$1$2|s;
                if (defined($alias1d->{$tag})) {
                    $tag = $alias1d->{$tag};
                }

                #   look on stack of still active tags
                my $active = '';
                if ($#active > -1) {
                    $active = pop(@active); 
                }

                if ($active eq $tag) {
                    #   close the currently active tag
                    $sxml_local .= $tags1d->{$tag}->[1];
                }
                else {
                    #   open a new active tag
                    if ($block =~ m|^\s*$|s) {
                        #   tag wraps around
                        $sxml_local =~ s|^(\s*)(.*?)(\s*)$|
                                         $1 . $tags1d->{$tag}->[0] . 
                                         $2 . $tags1d->{$tag}->[1] . $3|se;
                    }
                    else {
                        $sxml_local .= $tags1d->{$tag}->[0];
                        push(@active, $active) if ($active ne '');
                        push(@active, $tag);
                    }
                }
            }
            
            #   post-processing for implicitly to-be-closed tags
            my $trailer = '';
            if ($block =~ m|^(.*)(\s+)$|s) {
                ($block, $trailer) = ($1, $2);
            }
            $sxml_local .= $block;
            while ($#active > -1) {
                my $active = pop(@active);
                $sxml_local .= $tags1d->{$active}->[1];
            }
            $sxml_local .= $trailer;
            
            $sxml .= $sxml_local;
        }
    }
    return $sxml;
}

my $srml = '';
$srml .= $_ while (<STDIN>);
$srml =~ s|\n$||s;
$srml .= "\n";
$srml = &expandtab(8, $srml);

my $sxml = '';
$sxml .= "<sugar>";
$sxml .= &parse_srml($srml);
$sxml .= "</sugar>\n";
print $sxml;


CVSTrac 2.0.1