/[Search-Estraier]/trunk/scripts/dbi-indexer.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/scripts/dbi-indexer.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 195 - (hide annotations)
Tue Nov 14 16:39:08 2006 UTC (17 years, 5 months ago) by dpavlin
File MIME type: text/plain
File size: 2287 byte(s)
added new --dbi and --quiet command-line options, saner defaults

1 dpavlin 131 #!/usr/bin/perl -w
2    
3     use strict;
4 dpavlin 144 use Search::Estraier 0.06;
5 dpavlin 131 use DBI;
6     use Data::Dumper;
7     use Encode qw/from_to/;
8 dpavlin 146 use Time::HiRes qw/time/;
9 dpavlin 159 use Getopt::Long;
10 dpavlin 131
11     =head1 NAME
12    
13     dbi-indexer.pl - example indexer of DBI sources for Search::Estraier
14    
15     =cut
16    
17 dpavlin 144 my $c = {
18 dpavlin 195 node_url => 'http://localhost:1978/node/dbi-template1',
19     dbi => 'Pg:dbname=template1',
20     dbuser => 'postgres',
21 dpavlin 144 sql => qq{
22 dpavlin 195 select * from pg_database
23 dpavlin 144 },
24 dpavlin 195 pk_col => 'datname',
25 dpavlin 145 db_encoding => 'iso-8859-2',
26 dpavlin 144 debug => 0,
27 dpavlin 195 estuser => 'admin',
28     estpasswd => 'admin',
29     quiet => 0,
30 dpavlin 131 };
31    
32 dpavlin 195 GetOptions($c, qw/node_url=s dbi=s sql=s pk_col=s eb_encoding=s debug+ quiet+ estuser=s estpasswd=s dbuser=s dbpasswd=s/);
33 dpavlin 159
34     warn "# c: ", Dumper($c) if ($c->{debug});
35    
36 dpavlin 131 # create and configure node
37     my $node = new Search::Estraier::Node(
38 dpavlin 144 url => $c->{node_url},
39 dpavlin 183 user => $c->{estuser},
40     passwd => $c->{estpasswd},
41 dpavlin 131 croak_on_error => 1,
42 dpavlin 144 create => 1,
43 dpavlin 159 debug => $c->{debug} >= 4 ? 1 : 0,
44 dpavlin 131 );
45    
46     # create DBI connection
47 dpavlin 183 my $dbh = DBI->connect("DBI:$c->{dbi}", $c->{dbuser}, $c->{dbpasswd}) || die $DBI::errstr;
48 dpavlin 131
49 dpavlin 144 my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
50 dpavlin 131 $sth->execute() || die $sth->errstr();
51    
52 dpavlin 144 warn "# columns: ",join(",",@{ $sth->{NAME} }),"\n" if ($c->{debug});
53 dpavlin 131
54     my $total = $sth->rows;
55     my $i = 1;
56    
57 dpavlin 146 my $t = time();
58 dpavlin 147 my $pk_col = $c->{pk_col} || 'id';
59 dpavlin 146
60 dpavlin 131 while (my $row = $sth->fetchrow_hashref() ) {
61    
62 dpavlin 159 warn "# row: ",Dumper($row) if ($c->{debug} >= 3);
63 dpavlin 131
64     # create document
65     my $doc = new Search::Estraier::Document;
66    
67 dpavlin 147 if (my $id = $row->{$pk_col}) {
68     $doc->add_attr('@uri', $id);
69     } else {
70     die "can't find pk_col column '$pk_col' in results\n";
71     }
72 dpavlin 131
73 dpavlin 195 my $out = '';
74     $out .= sprintf("%4d ",$i);
75 dpavlin 131
76     while (my ($col,$val) = each %{$row}) {
77    
78     if ($val) {
79     # change encoding?
80 dpavlin 145 from_to($val, ($c->{db_encoding} || 'ISO-8859-1'), 'UTF-8');
81 dpavlin 131
82     # add attributes (make column usable from attribute search)
83     $doc->add_attr($col, $val);
84    
85     # add body text to document (make it searchable using full-text index)
86     $doc->add_text($val);
87    
88 dpavlin 195 $out .= "R";
89 dpavlin 131 } else {
90 dpavlin 195 $out .= ".";
91 dpavlin 131 }
92    
93     }
94    
95 dpavlin 159 warn "# doc draft: ",$doc->dump_draft, "\n" if ($c->{debug} >= 2);
96 dpavlin 131
97     die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
98 dpavlin 146
99 dpavlin 195 printf ("%s %d%% %.1f/s\n", $out, int(( $i++ / $total) * 100), ( $i / (time() - $t) ) ) unless ($c->{quiet});
100 dpavlin 146
101 dpavlin 131 }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26