OSSP CVS Repository

ossp - ossp-pkg/quos/openpkg-rdf2sql.pl
Not logged in
[Honeypot]  [Browse]  [Directory]  [Home]  [Login
[Reports]  [Search]  [Ticket]  [Timeline
  [Raw

ossp-pkg/quos/openpkg-rdf2sql.pl
#!/usr/lpkg/bin/perl
##
##  OSSP quos - Query On Steroids
##  Copyright (c) 2004 Ralf S. Engelschall <rse@engelschall.com>
##  Copyright (c) 2004 The OSSP Project <http://www.ossp.org/>
##
##  This file is part of OSSP quos, a Web user interface for querying
##  a database which can be found at http://www.ossp.org/pkg/tool/quos/.
##
##  This program is free software; you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation; either version 2 of the License, or
##  (at your option) any later version.
##
##  This program is distributed in the hope that it will be useful,
##  but WITHOUT ANY WARRANTY; without even the implied warranty of
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
##  General Public License for more details.
##
##  You should have received a copy of the GNU General Public License
##  along with this program; if not, write to the Free Software
##  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
##  USA, or contact Ralf S. Engelschall <rse@engelschall.com>.
##
##  openpkg-rdf2sql.cfg: OpenPKG XML/RDF to SQL importing tool
##

require 5.008;
use strict;
use Getopt::Long;
use XML::Parser;
use DBI;
use DBD::SQLite;
use Data::Dumper;
use XML::Simple;
use LWP::UserAgent;
use IPC::Filter qw();

#   configure optional debugging
$Data::Dumper::Purity = 1;
$Data::Dumper::Indent = 1;
$Data::Dumper::Terse  = 1;

my $ftpserv  = 'ftp://anonymous@ftp.openpkg.org/';
my $starturl = '00INDEX.rdf';

#   connect to database
my $db = DBI->connect("dbi:SQLite:dbname=openpkg.db", "", "",
                      { RaiseError => 1, AutoCommit => 1 });

#   prepare SQL commands
my $sql = {};
$sql->{-rdf} = $db->prepare(
    "INSERT INTO quos_rdf (rd_url) VALUES (?);"
);
$sql->{-package} = $db->prepare(
    "INSERT INTO quos_package" .
    " (pk_name, pk_version, pk_release, pk_distribution, pk_group," .
    "  pk_license, pk_packager, pk_summary, pk_url, pk_vendor, pk_description, pk_rdf)" .
    " VALUES (?,?,?,?,?,?,?,?,?,?,?,(SELECT MAX(rd_id) FROM quos_rdf));\n"
);
$sql->{-buildprereq} = $db->prepare(
    "INSERT INTO quos_buildprereq" .
    " (bp_id, bp_key, bp_op, bp_val)" .
    " VALUES ((SELECT MAX(pk_id) FROM quos_package),?,?,?);" 
);
$sql->{-prereq} = $db->prepare(
    "INSERT INTO quos_prereq" .
    " (rp_id, rp_key, rp_op, rp_val)" .
    " VALUES ((SELECT MAX(pk_id) FROM quos_package),?,?,?);" 
);
$sql->{-provide} = $db->prepare(
    "INSERT INTO quos_provide" .
    " (pr_id, pr_key, pr_op, pr_val)" .
    " VALUES ((SELECT MAX(pk_id) FROM quos_package),?,?,?);" 
);
$sql->{-source} = $db->prepare(
    "INSERT INTO quos_source" .
    " (sr_id, sr_url)" .
    " VALUES ((SELECT MAX(pk_id) FROM quos_package),?);" 
);

&fetchrdfsrecursendump2db($starturl);

sub fetchrdfsrecursendump2db {
    my ($url) = @_;

    #   fetch XML/RDF index file
    my $ua = new LWP::UserAgent;
    $ua->agent("openpkg-rdf2sql/1.0");
    my $req = new HTTP::Request(GET => $ftpserv . $url);
    my $rescont = '';
    my $res = $ua->request($req);
    if ($res->is_success) {
        if ($url =~ m|.*bz2$|) {
            $rescont = IPC::Filter::filter($res->content, "bzip2 -d");
        }
        else {
            $rescont = $res->content;
        }
    }
    else {
        print $res->status_line . "\n";
    }
    
    #   parse XML/RDF index file
    my $xml = new XML::Simple;
    my $rdf = $xml->XMLin(
        $rescont,
        KeepRoot     => 1,
        ForceContent => 0,
        ForceArray   => 1,
    );          
    undef $xml;

    #   iterate over XML/RDF data structure
    foreach my $repo (@{$rdf->{'rdf:RDF'}->[0]->{'Repository'}}) {
        my $rd_url = $repo->{'rdf:resource'};

        #   start SQL transaction
        $db->begin_work();
    
        #   store repository information
        $sql->{-rdf}->execute($rd_url);
    
        #   iterate over all packages in a repository
        if (defined($repo->{'rdf:Description'})) {
            foreach my $desc (@{$repo->{'rdf:Description'}}) {
                #   store simple (single-value) properties of a package
                my $prop = {};
                foreach my $attr (qw(
                    Name Version Release Distribution Group License
                    Packager Summary URL Vendor Description
                )) {
                    $prop->{$attr} = $desc->{$attr}->[0];
                }
                $sql->{-package}->execute(
                    $prop->{'Name'}, $prop->{'Version'}, $prop->{'Release'}, $prop->{'Distribution'}, $prop->{'Group'},
                    $prop->{'License'}, $prop->{'Packager'}, $prop->{'Summary'}, $prop->{'URL'}, $prop->{'Vendor'},
                    $prop->{'Description'}
                );
    
                #   store complex (multi-value) properties of a package
                foreach my $attr (qw(
                    BuildPreReq
                    PreReq
                    Provide
                )) {
                    foreach my $el (@{$desc->{$attr}->[0]->{'rdf:bag'}->[0]->{'resource'}}) {
                        my ($key, $op, $val) = ($el, '=', '*');
                        if (ref($key) eq 'HASH') {
                            $key = $el->{'content'};
                            $op  = (grep { $_ ne 'content' } keys(%{$el}))[0];
                            $val = $el->{$op};
                        }
                        $sql->{"-".lc($attr)}->execute($key, $op, $val);
                    }
                }
                foreach my $url (@{$desc->{'Source'}->[0]->{'rdf:bag'}->[0]->{'rdf:li'}}) {
                    $sql->{-source}->execute($url);
                }
            }
        }

        #   iterate over all sub-repositities in repository
        if (defined($repo->{'Repository'})) {
            foreach my $repcont (@{$repo->{'Repository'}}) {
                $url =~ m|^(.*/)|;
                my $actpath = $1;
                &fetchrdfsrecursendump2db($actpath . $repcont->{'href'}); 
                sleep(1);
            }
        }

        #   end SQL transaction
        $db->commit();
    }
}

#   commit and disconnect from database
$db->disconnect();

#   temporary function for showing actual XML
sub showactxml {
    my ($xmlinput) = @_;
    
    my $xml = new XML::Simple;
    my $rdf = $xml->XMLin(
        $xmlinput,
        KeepRoot     => 1,
        ForceContent => 0,
        ForceArray   => 1,
    );          
    undef $xml;
    print Dumper($rdf);
}

__END__

=pod

=head1 NAME

B<openpkg-rdf2sql> - OpenPKG XML/RDF to SQL importing tool

=head1 SYNOPSIS

B<openpkg-rdf2sql>
[B<-v>|B<--verbose>]
...FIXME...
I<url>

=head1 DESCRIPTION

=cut


CVSTrac 2.0.1