--- openpkg-rdf2sql.pl 2004/11/08 15:32:59 1.3
+++ openpkg-rdf2sql.pl 2004/11/16 14:05:32 1.4
@@ -1,4 +1,4 @@
-#!/usr/opkg/bin/perl
+#!/usr/lpkg/bin/perl
##
## OSSP quos - Query On Steroids
## Copyright (c) 2004 Ralf S. Engelschall <rse@engelschall.com>
@@ -26,28 +26,23 @@
##
require 5.008;
+use strict;
use Getopt::Long;
use XML::Parser;
use DBI;
use DBD::SQLite;
use Data::Dumper;
use XML::Simple;
+use LWP::UserAgent;
+use IPC::Filter qw();
# configure optional debugging
$Data::Dumper::Purity = 1;
$Data::Dumper::Indent = 1;
$Data::Dumper::Terse = 1;
-my $xml = new XML::Simple;
-my $rdf = $xml->XMLin(
- "openpkg.rdf",
- KeepRoot => 1,
- ForceContent => 0,
- ForceArray => 1,
-);
-undef $xml;
-#print Dumper($xml);
-#exit(0);
+my $ftpserv = 'ftp://anonymous:herb@ftp.openpkg.org/';
+my $starturl = '00INDEX.rdf';
# connect to database
my $db = DBI->connect("dbi:SQLite:dbname=openpkg.db", "", "",
@@ -85,56 +80,121 @@
" VALUES ((SELECT MAX(pk_id) FROM quos_package),?);"
);
-# iterate over XML/RDF data structure
-foreach my $repo (@{$rdf->{'rdf:RDF'}->[0]->{'Repository'}}) {
- my $rd_url = $repo->{'rdf:resource'};
+$db->begin_work();
- # store repository information
- $db->begin_work();
- $sql->{-rdf}->execute($rd_url);
-
- # interate over all packages in a repository
- foreach my $desc (@{$repo->{'rdf:Description'}}) {
- # store simple (single-value) properties of a package
- my $prop = {};
- foreach my $attr (qw(
- Name Version Release Distribution Group License
- Packager Summary URL Vendor Description
- )) {
- $prop->{$attr} = $desc->{$attr}->[0];
+&fetchrdfsrecursendump2db($starturl);
+
+sub fetchrdfsrecursendump2db {
+
+ my ($url) = @_;
+ my $ua = new LWP::UserAgent;
+ $ua->agent("rdfcrawl/1.0 ");
+ my $req = new HTTP::Request(GET => $ftpserv . $url);
+ my $rescont = '';
+
+ my $res = $ua->request($req);
+ if ($res->is_success) {
+ $_ = $url;
+ if (m|.*bz2$|) {
+ $rescont = IPC::Filter::filter($res->content, "bzip2 -d");
+ }
+ else {
+ $rescont = $res->content;
}
- $sql->{-package}->execute(
- $prop->{'Name'}, $prop->{'Version'}, $prop->{'Release'}, $prop->{'Distribution'}, $prop->{'Group'},
- $prop->{'License'}, $prop->{'Packager'}, $prop->{'Summary'}, $prop->{'URL'}, $prop->{'Vendor'},
- $prop->{'Description'}
- );
-
- # store complex (multi-value) properties of a package
- foreach my $attr (qw(
- BuildPreReq
- PreReq
- Provide
- )) {
- foreach my $el (@{$desc->{$attr}->[0]->{'rdf:bag'}->[0]->{'resource'}}) {
- my ($key, $op, $val) = ($el, '=', '*');
- if (ref($key) eq 'HASH') {
- $key = $el->{'content'};
- $op = (grep { $_ ne 'content' } keys(%{$el}))[0];
- $val = $el->{$op};
+ }
+ else {
+ print $res->status_line, "\n";
+ }
+
+ my $xml = new XML::Simple;
+ my $rdf = $xml->XMLin(
+ $rescont,
+ KeepRoot => 1,
+ ForceContent => 0,
+ ForceArray => 1,
+ );
+ undef $xml;
+
+# iterate over XML/RDF data structure
+ foreach my $repo (@{$rdf->{'rdf:RDF'}->[0]->{'Repository'}}) {
+ my $rd_url = $repo->{'rdf:resource'};
+
+ # store repository information
+# $db->begin_work();
+ $sql->{-rdf}->execute($rd_url);
+
+ # interate over all packages in a repository
+ if (defined($repo->{'rdf:Description'})) {
+ # from now on package descriptions
+ foreach my $desc (@{$repo->{'rdf:Description'}}) {
+ # store simple (single-value) properties of a package
+ my $prop = {};
+ foreach my $attr (qw(
+ Name Version Release Distribution Group License
+ Packager Summary URL Vendor Description
+ )) {
+ $prop->{$attr} = $desc->{$attr}->[0];
+ }
+ $sql->{-package}->execute(
+ $prop->{'Name'}, $prop->{'Version'}, $prop->{'Release'}, $prop->{'Distribution'}, $prop->{'Group'},
+ $prop->{'License'}, $prop->{'Packager'}, $prop->{'Summary'}, $prop->{'URL'}, $prop->{'Vendor'},
+ $prop->{'Description'}
+ );
+
+ # store complex (multi-value) properties of a package
+ foreach my $attr (qw(
+ BuildPreReq
+ PreReq
+ Provide
+ )) {
+ foreach my $el (@{$desc->{$attr}->[0]->{'rdf:bag'}->[0]->{'resource'}}) {
+ my ($key, $op, $val) = ($el, '=', '*');
+ if (ref($key) eq 'HASH') {
+ $key = $el->{'content'};
+ $op = (grep { $_ ne 'content' } keys(%{$el}))[0];
+ $val = $el->{$op};
+ }
+ $sql->{"-".lc($attr)}->execute($key, $op, $val);
+ }
+ }
+ foreach my $url (@{$desc->{'Source'}->[0]->{'rdf:bag'}->[0]->{'rdf:li'}}) {
+ $sql->{-source}->execute($url);
}
- $sql->{"-".lc($attr)}->execute($key, $op, $val);
}
}
- foreach my $url (@{$desc->{'Source'}->[0]->{'rdf:bag'}->[0]->{'rdf:li'}}) {
- $sql->{-source}->execute($url);
+ else {
+ # from now on rdf references
+ foreach my $repcont (@{$repo->{'Repository'}}) {
+ $url =~ m|^(.*/)|;
+ my $actpath = $1;
+ &fetchrdfsrecursendump2db($actpath . $repcont->{'href'});
+ sleep(1);
+ }
}
+# $db->commit();
}
- $db->commit();
}
-
-# disconnect from database
+# commit and disconnect from database
+$db->commit();
$db->disconnect();
+sub showactxml {
+# temporary function for showing actual xml
+
+ my ($xmlinput) = @_;
+
+ my $xml = new XML::Simple;
+
+ my $rdf = $xml->XMLin(
+ $xmlinput,
+ KeepRoot => 1,
+ ForceContent => 0,
+ ForceArray => 1,
+ );
+undef $xml;
+print Dumper($rdf);
+}
+
__END__
=pod
|