Index: ossp-pkg/quos/openpkg-rdf2sql.pl RCS File: /v/ossp/cvs/ossp-pkg/quos/openpkg-rdf2sql.pl,v rcsdiff -q -kk '-r1.4' '-r1.5' -u '/v/ossp/cvs/ossp-pkg/quos/openpkg-rdf2sql.pl,v' 2>/dev/null --- openpkg-rdf2sql.pl 2004/11/16 14:05:32 1.4 +++ openpkg-rdf2sql.pl 2004/11/16 16:20:27 1.5 @@ -41,7 +41,7 @@ $Data::Dumper::Indent = 1; $Data::Dumper::Terse = 1; -my $ftpserv = 'ftp://anonymous:herb@ftp.openpkg.org/'; +my $ftpserv = 'ftp://anonymous@ftp.openpkg.org/'; my $starturl = '00INDEX.rdf'; # connect to database @@ -85,17 +85,16 @@ &fetchrdfsrecursendump2db($starturl); sub fetchrdfsrecursendump2db { - my ($url) = @_; + + # fetch XML/RDF index file my $ua = new LWP::UserAgent; - $ua->agent("rdfcrawl/1.0 "); + $ua->agent("openpkg-rdf2sql/1.0"); my $req = new HTTP::Request(GET => $ftpserv . $url); my $rescont = ''; - my $res = $ua->request($req); if ($res->is_success) { - $_ = $url; - if (m|.*bz2$|) { + if ($url =~ m|.*bz2$|) { $rescont = IPC::Filter::filter($res->content, "bzip2 -d"); } else { @@ -103,9 +102,10 @@ } } else { - print $res->status_line, "\n"; + print $res->status_line . "\n"; } + # parse XML/RDF index file my $xml = new XML::Simple; my $rdf = $xml->XMLin( $rescont, @@ -115,17 +115,17 @@ ); undef $xml; -# iterate over XML/RDF data structure + # iterate over XML/RDF data structure foreach my $repo (@{$rdf->{'rdf:RDF'}->[0]->{'Repository'}}) { my $rd_url = $repo->{'rdf:resource'}; # store repository information -# $db->begin_work(); + #$db->begin_work(); $sql->{-rdf}->execute($rd_url); - # interate over all packages in a repository + # iterate over all packages in a repository if (defined($repo->{'rdf:Description'})) { - # from now on package descriptions + # from now on package descriptions foreach my $desc (@{$repo->{'rdf:Description'}}) { # store simple (single-value) properties of a package my $prop = {}; @@ -163,36 +163,35 @@ } } else { - # from now on rdf references + # from now on RDF references foreach my $repcont (@{$repo->{'Repository'}}) { - $url =~ m|^(.*/)|; - my $actpath = $1; - &fetchrdfsrecursendump2db($actpath . $repcont->{'href'}); - sleep(1); + $url =~ m|^(.*/)|; + my $actpath = $1; + &fetchrdfsrecursendump2db($actpath . $repcont->{'href'}); + sleep(1); } } -# $db->commit(); + #$db->commit(); } } -# commit and disconnect from database + +# commit and disconnect from database $db->commit(); $db->disconnect(); +# temporary function for showing actual XML sub showactxml { -# temporary function for showing actual xml - my ($xmlinput) = @_; my $xml = new XML::Simple; - my $rdf = $xml->XMLin( $xmlinput, KeepRoot => 1, ForceContent => 0, ForceArray => 1, ); -undef $xml; -print Dumper($rdf); + undef $xml; + print Dumper($rdf); } __END__