--- openpkg-rdf2sql.pl 2004/11/16 14:05:32 1.4
+++ openpkg-rdf2sql.pl 2004/11/16 16:20:27 1.5
@@ -41,7 +41,7 @@
$Data::Dumper::Indent = 1;
$Data::Dumper::Terse = 1;
-my $ftpserv = 'ftp://anonymous:herb@ftp.openpkg.org/';
+my $ftpserv = 'ftp://anonymous@ftp.openpkg.org/';
my $starturl = '00INDEX.rdf';
# connect to database
@@ -85,17 +85,16 @@
&fetchrdfsrecursendump2db($starturl);
sub fetchrdfsrecursendump2db {
-
my ($url) = @_;
+
+ # fetch XML/RDF index file
my $ua = new LWP::UserAgent;
- $ua->agent("rdfcrawl/1.0 ");
+ $ua->agent("openpkg-rdf2sql/1.0");
my $req = new HTTP::Request(GET => $ftpserv . $url);
my $rescont = '';
-
my $res = $ua->request($req);
if ($res->is_success) {
- $_ = $url;
- if (m|.*bz2$|) {
+ if ($url =~ m|.*bz2$|) {
$rescont = IPC::Filter::filter($res->content, "bzip2 -d");
}
else {
@@ -103,9 +102,10 @@
}
}
else {
- print $res->status_line, "\n";
+ print $res->status_line . "\n";
}
+ # parse XML/RDF index file
my $xml = new XML::Simple;
my $rdf = $xml->XMLin(
$rescont,
@@ -115,17 +115,17 @@
);
undef $xml;
-# iterate over XML/RDF data structure
+ # iterate over XML/RDF data structure
foreach my $repo (@{$rdf->{'rdf:RDF'}->[0]->{'Repository'}}) {
my $rd_url = $repo->{'rdf:resource'};
# store repository information
-# $db->begin_work();
+ #$db->begin_work();
$sql->{-rdf}->execute($rd_url);
- # interate over all packages in a repository
+ # iterate over all packages in a repository
if (defined($repo->{'rdf:Description'})) {
- # from now on package descriptions
+ # from now on package descriptions
foreach my $desc (@{$repo->{'rdf:Description'}}) {
# store simple (single-value) properties of a package
my $prop = {};
@@ -163,36 +163,35 @@
}
}
else {
- # from now on rdf references
+ # from now on RDF references
foreach my $repcont (@{$repo->{'Repository'}}) {
- $url =~ m|^(.*/)|;
- my $actpath = $1;
- &fetchrdfsrecursendump2db($actpath . $repcont->{'href'});
- sleep(1);
+ $url =~ m|^(.*/)|;
+ my $actpath = $1;
+ &fetchrdfsrecursendump2db($actpath . $repcont->{'href'});
+ sleep(1);
}
}
-# $db->commit();
+ #$db->commit();
}
}
-# commit and disconnect from database
+
+# commit and disconnect from database
$db->commit();
$db->disconnect();
+# temporary function for showing actual XML
sub showactxml {
-# temporary function for showing actual xml
-
my ($xmlinput) = @_;
my $xml = new XML::Simple;
-
my $rdf = $xml->XMLin(
$xmlinput,
KeepRoot => 1,
ForceContent => 0,
ForceArray => 1,
);
-undef $xml;
-print Dumper($rdf);
+ undef $xml;
+ print Dumper($rdf);
}
__END__
|