OSSP CVS Repository

ossp - Check-in [4838]
Not logged in
[Honeypot]  [Browse]  [Home]  [Login]  [Reports
[Search]  [Ticket]  [Timeline
  [Patchset]  [Tagging/Branching

Check-in Number: 4838
Date: 2004-Nov-16 15:05:32 (local)
2004-Nov-16 14:05:32 (UTC)
User:hms
Branch:
Comment: appended recursive search for rdfs to rdf2sql tool
Tickets:
Inspections:
Files:
ossp-pkg/quos/openpkg-rdf2sql.pl      1.3 -> 1.4     111 inserted, 51 deleted

ossp-pkg/quos/openpkg-rdf2sql.pl 1.3 -> 1.4

--- openpkg-rdf2sql.pl   2004/11/08 15:32:59     1.3
+++ openpkg-rdf2sql.pl   2004/11/16 14:05:32     1.4
@@ -1,4 +1,4 @@
-#!/usr/opkg/bin/perl
+#!/usr/lpkg/bin/perl
 ##
 ##  OSSP quos - Query On Steroids
 ##  Copyright (c) 2004 Ralf S. Engelschall <rse@engelschall.com>
@@ -26,28 +26,23 @@
 ##
 
 require 5.008;
+use strict;
 use Getopt::Long;
 use XML::Parser;
 use DBI;
 use DBD::SQLite;
 use Data::Dumper;
 use XML::Simple;
+use LWP::UserAgent;
+use IPC::Filter qw();
 
 #   configure optional debugging
 $Data::Dumper::Purity = 1;
 $Data::Dumper::Indent = 1;
 $Data::Dumper::Terse  = 1;
 
-my $xml = new XML::Simple;
-my $rdf = $xml->XMLin(
-    "openpkg.rdf",
-    KeepRoot     => 1,
-    ForceContent => 0,
-    ForceArray   => 1,
-);
-undef $xml;
-#print Dumper($xml);
-#exit(0);
+my $ftpserv = 'ftp://anonymous:herb@ftp.openpkg.org/';
+my $starturl = '00INDEX.rdf';
 
 #   connect to database
 my $db = DBI->connect("dbi:SQLite:dbname=openpkg.db", "", "",
@@ -85,56 +80,121 @@
     " VALUES ((SELECT MAX(pk_id) FROM quos_package),?);" 
 );
 
-#   iterate over XML/RDF data structure
-foreach my $repo (@{$rdf->{'rdf:RDF'}->[0]->{'Repository'}}) {
-    my $rd_url = $repo->{'rdf:resource'};
+$db->begin_work();
 
-    #   store repository information
-    $db->begin_work();
-    $sql->{-rdf}->execute($rd_url);
-
-    #   interate over all packages in a repository
-    foreach my $desc (@{$repo->{'rdf:Description'}}) {
-        #   store simple (single-value) properties of a package
-        my $prop = {};
-        foreach my $attr (qw(
-            Name Version Release Distribution Group License
-            Packager Summary URL Vendor Description
-        )) {
-            $prop->{$attr} = $desc->{$attr}->[0];
+&fetchrdfsrecursendump2db($starturl);
+
+sub fetchrdfsrecursendump2db {
+    
+    my ($url) = @_;
+    my $ua = new LWP::UserAgent;
+    $ua->agent("rdfcrawl/1.0 ");
+    my $req = new HTTP::Request(GET => $ftpserv . $url);
+    my $rescont = '';
+
+    my $res = $ua->request($req);
+    if ($res->is_success) {
+        $_ = $url;
+        if (m|.*bz2$|) {
+            $rescont = IPC::Filter::filter($res->content, "bzip2 -d");
+        }
+        else {
+            $rescont = $res->content;
         }
-        $sql->{-package}->execute(
-            $prop->{'Name'}, $prop->{'Version'}, $prop->{'Release'}, $prop->{'Distribution'}, $prop->{'Group'},
-            $prop->{'License'}, $prop->{'Packager'}, $prop->{'Summary'}, $prop->{'URL'}, $prop->{'Vendor'},
-            $prop->{'Description'}
-        );
-
-        #   store complex (multi-value) properties of a package
-        foreach my $attr (qw(
-            BuildPreReq
-            PreReq
-            Provide
-        )) {
-            foreach my $el (@{$desc->{$attr}->[0]->{'rdf:bag'}->[0]->{'resource'}}) {
-                my ($key, $op, $val) = ($el, '=', '*');
-                if (ref($key) eq 'HASH') {
-                    $key = $el->{'content'};
-                    $op  = (grep { $_ ne 'content' } keys(%{$el}))[0];
-                    $val = $el->{$op};
+    }
+    else {
+        print $res->status_line, "\n";
+    }
+    
+    my $xml = new XML::Simple;
+    my $rdf = $xml->XMLin(
+        $rescont,
+        KeepRoot     => 1,
+        ForceContent => 0,
+        ForceArray   => 1,
+    );          
+    undef $xml;
+
+#   iterate over XML/RDF data structure
+    foreach my $repo (@{$rdf->{'rdf:RDF'}->[0]->{'Repository'}}) {
+        my $rd_url = $repo->{'rdf:resource'};
+    
+        #   store repository information
+#        $db->begin_work();
+        $sql->{-rdf}->execute($rd_url);
+    
+        #   interate over all packages in a repository
+        if (defined($repo->{'rdf:Description'})) {
+            # from now on package descriptions
+            foreach my $desc (@{$repo->{'rdf:Description'}}) {
+                #   store simple (single-value) properties of a package
+                my $prop = {};
+                foreach my $attr (qw(
+                    Name Version Release Distribution Group License
+                    Packager Summary URL Vendor Description
+                )) {
+                    $prop->{$attr} = $desc->{$attr}->[0];
+                }
+                $sql->{-package}->execute(
+                    $prop->{'Name'}, $prop->{'Version'}, $prop->{'Release'}, $prop->{'Distribution'}, $prop->{'Group'},
+                    $prop->{'License'}, $prop->{'Packager'}, $prop->{'Summary'}, $prop->{'URL'}, $prop->{'Vendor'},
+                    $prop->{'Description'}
+                );
+    
+                #   store complex (multi-value) properties of a package
+                foreach my $attr (qw(
+                    BuildPreReq
+                    PreReq
+                    Provide
+                )) {
+                    foreach my $el (@{$desc->{$attr}->[0]->{'rdf:bag'}->[0]->{'resource'}}) {
+                        my ($key, $op, $val) = ($el, '=', '*');
+                        if (ref($key) eq 'HASH') {
+                            $key = $el->{'content'};
+                            $op  = (grep { $_ ne 'content' } keys(%{$el}))[0];
+                            $val = $el->{$op};
+                        }
+                        $sql->{"-".lc($attr)}->execute($key, $op, $val);
+                    }
+                }
+                foreach my $url (@{$desc->{'Source'}->[0]->{'rdf:bag'}->[0]->{'rdf:li'}}) {
+                    $sql->{-source}->execute($url);
                 }
-                $sql->{"-".lc($attr)}->execute($key, $op, $val);
             }
         }
-        foreach my $url (@{$desc->{'Source'}->[0]->{'rdf:bag'}->[0]->{'rdf:li'}}) {
-            $sql->{-source}->execute($url);
+        else {
+            # from now on rdf references
+            foreach my $repcont (@{$repo->{'Repository'}}) {
+                    $url =~ m|^(.*/)|;
+                    my $actpath = $1;
+                    &fetchrdfsrecursendump2db($actpath . $repcont->{'href'}); 
+                    sleep(1);
+            }
         }
+#            $db->commit();
     }
-    $db->commit();
 }
-
-#   disconnect from database
+# commit and disconnect from database
+$db->commit();
 $db->disconnect();
 
+sub showactxml {
+# temporary function for showing actual xml
+
+    my ($xmlinput) = @_;
+    
+    my $xml = new XML::Simple;
+
+    my $rdf = $xml->XMLin(
+        $xmlinput,
+        KeepRoot     => 1,
+        ForceContent => 0,
+        ForceArray   => 1,
+    );          
+undef $xml;
+print Dumper($rdf);
+}
+
 __END__
 
 =pod

CVSTrac 2.0.1