ossp-pkg/quos/openpkg-rdf2sql.pl
#!/usr/lpkg/bin/perl
##
## OSSP quos - Query On Steroids
## Copyright (c) 2004 Ralf S. Engelschall <rse@engelschall.com>
## Copyright (c) 2004 The OSSP Project <http://www.ossp.org/>
##
## This file is part of OSSP quos, a Web user interface for querying
## a database which can be found at http://www.ossp.org/pkg/tool/quos/.
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
## USA, or contact Ralf S. Engelschall <rse@engelschall.com>.
##
## openpkg-rdf2sql.cfg: OpenPKG XML/RDF to SQL importing tool
##
require 5.008;
use strict;
use Getopt::Long;
use XML::Parser;
use DBI;
use DBD::SQLite;
use Data::Dumper;
use XML::Simple;
use LWP::UserAgent;
use IPC::Filter qw();
# configure optional debugging
$Data::Dumper::Purity = 1;
$Data::Dumper::Indent = 1;
$Data::Dumper::Terse = 1;
my $ftpserv = 'ftp://anonymous@ftp.openpkg.org/';
my $starturl = '00INDEX.rdf';
# connect to database
my $db = DBI->connect("dbi:SQLite:dbname=openpkg.db", "", "",
{ RaiseError => 1, AutoCommit => 1 });
# prepare SQL commands
my $sql = {};
$sql->{-rdf} = $db->prepare(
"INSERT INTO quos_rdf (rd_url) VALUES (?);"
);
$sql->{-package} = $db->prepare(
"INSERT INTO quos_package" .
" (pk_name, pk_version, pk_release, pk_distribution, pk_group," .
" pk_license, pk_packager, pk_summary, pk_url, pk_vendor, pk_description, pk_rdf)" .
" VALUES (?,?,?,?,?,?,?,?,?,?,?,(SELECT MAX(rd_id) FROM quos_rdf));\n"
);
$sql->{-buildprereq} = $db->prepare(
"INSERT INTO quos_buildprereq" .
" (bp_id, bp_key, bp_op, bp_val)" .
" VALUES ((SELECT MAX(pk_id) FROM quos_package),?,?,?);"
);
$sql->{-prereq} = $db->prepare(
"INSERT INTO quos_prereq" .
" (rp_id, rp_key, rp_op, rp_val)" .
" VALUES ((SELECT MAX(pk_id) FROM quos_package),?,?,?);"
);
$sql->{-provide} = $db->prepare(
"INSERT INTO quos_provide" .
" (pr_id, pr_key, pr_op, pr_val)" .
" VALUES ((SELECT MAX(pk_id) FROM quos_package),?,?,?);"
);
$sql->{-source} = $db->prepare(
"INSERT INTO quos_source" .
" (sr_id, sr_url)" .
" VALUES ((SELECT MAX(pk_id) FROM quos_package),?);"
);
&fetchrdfsrecursendump2db($starturl);
sub fetchrdfsrecursendump2db {
my ($url) = @_;
# fetch XML/RDF index file
my $ua = new LWP::UserAgent;
$ua->agent("openpkg-rdf2sql/1.0");
my $req = new HTTP::Request(GET => $ftpserv . $url);
my $rescont = '';
my $res = $ua->request($req);
if ($res->is_success) {
if ($url =~ m|.*bz2$|) {
$rescont = IPC::Filter::filter($res->content, "bzip2 -d");
}
else {
$rescont = $res->content;
}
}
else {
print $res->status_line . "\n";
}
# parse XML/RDF index file
my $xml = new XML::Simple;
my $rdf = $xml->XMLin(
$rescont,
KeepRoot => 1,
ForceContent => 0,
ForceArray => 1,
);
undef $xml;
# iterate over XML/RDF data structure
foreach my $repo (@{$rdf->{'rdf:RDF'}->[0]->{'Repository'}}) {
my $rd_url = $repo->{'rdf:resource'};
# start SQL transaction
$db->begin_work();
# store repository information
$sql->{-rdf}->execute($rd_url);
# iterate over all packages in a repository
if (defined($repo->{'rdf:Description'})) {
foreach my $desc (@{$repo->{'rdf:Description'}}) {
# store simple (single-value) properties of a package
my $prop = {};
foreach my $attr (qw(
Name Version Release Distribution Group License
Packager Summary URL Vendor Description
)) {
$prop->{$attr} = $desc->{$attr}->[0];
}
$sql->{-package}->execute(
$prop->{'Name'}, $prop->{'Version'}, $prop->{'Release'}, $prop->{'Distribution'}, $prop->{'Group'},
$prop->{'License'}, $prop->{'Packager'}, $prop->{'Summary'}, $prop->{'URL'}, $prop->{'Vendor'},
$prop->{'Description'}
);
# store complex (multi-value) properties of a package
foreach my $attr (qw(
BuildPreReq
PreReq
Provide
)) {
foreach my $el (@{$desc->{$attr}->[0]->{'rdf:bag'}->[0]->{'resource'}}) {
my ($key, $op, $val) = ($el, '=', '*');
if (ref($key) eq 'HASH') {
$key = $el->{'content'};
$op = (grep { $_ ne 'content' } keys(%{$el}))[0];
$val = $el->{$op};
}
$sql->{"-".lc($attr)}->execute($key, $op, $val);
}
}
foreach my $url (@{$desc->{'Source'}->[0]->{'rdf:bag'}->[0]->{'rdf:li'}}) {
$sql->{-source}->execute($url);
}
}
}
# iterate over all sub-repositities in repository
if (defined($repo->{'Repository'})) {
foreach my $repcont (@{$repo->{'Repository'}}) {
$url =~ m|^(.*/)|;
my $actpath = $1;
&fetchrdfsrecursendump2db($actpath . $repcont->{'href'});
sleep(1);
}
}
# end SQL transaction
$db->commit();
}
}
# commit and disconnect from database
$db->disconnect();
# temporary function for showing actual XML
sub showactxml {
my ($xmlinput) = @_;
my $xml = new XML::Simple;
my $rdf = $xml->XMLin(
$xmlinput,
KeepRoot => 1,
ForceContent => 0,
ForceArray => 1,
);
undef $xml;
print Dumper($rdf);
}
__END__
=pod
=head1 NAME
B<openpkg-rdf2sql> - OpenPKG XML/RDF to SQL importing tool
=head1 SYNOPSIS
B<openpkg-rdf2sql>
[B<-v>|B<--verbose>]
...FIXME...
I<url>
=head1 DESCRIPTION
=cut