/[webpac2]/trunk/run.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/run.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 209 - (hide annotations)
Mon Dec 5 17:46:57 2005 UTC (18 years, 4 months ago) by dpavlin
File MIME type: text/plain
File size: 2171 byte(s)
 r11518@llin:  dpavlin | 2005-12-04 19:43:29 +0100
 renamed WebPAC::DB to WebPAC::Store

1 dpavlin 74 #!/usr/bin/perl -w
2    
3     use strict;
4    
5     use Cwd qw/abs_path/;
6     use File::Temp qw/tempdir/;
7     use Data::Dumper;
8     use lib './lib';
9    
10     use WebPAC::Lookup;
11     use WebPAC::Input::ISIS;
12 dpavlin 209 use WebPAC::Store 0.03;
13 dpavlin 74 use WebPAC::Normalize::XML;
14     use WebPAC::Output::TT;
15     use WebPAC::Output::Estraier;
16 dpavlin 141 use YAML qw/LoadFile/;
17 dpavlin 74
18 dpavlin 76 my $limit = shift @ARGV;
19    
20 dpavlin 141 my $config = LoadFile('conf/config.yml');
21    
22     print "config = ",Dumper($config);
23    
24     my $type = lc($config->{input}->{type});
25    
26     die "I know only how to handle input type isis, not '$type'!\n" unless ($type eq 'isis');
27    
28 dpavlin 74 my $abs_path = abs_path($0);
29     $abs_path =~ s#/[^/]*$#/#;
30    
31     my $lookup = new WebPAC::Lookup(
32 dpavlin 141 lookup_file => $config->{input}->{lookup},
33 dpavlin 74 );
34    
35 dpavlin 141
36    
37 dpavlin 74 my $isis = new WebPAC::Input::ISIS(
38 dpavlin 141 code_page => $config->{webpac}->{webpac_encoding},
39     limit_mfn => $config->{input}->{limit},
40 dpavlin 74 );
41    
42     my $maxmfn = $isis->open(
43 dpavlin 141 filename => $config->{input}->{path},
44     code_page => $config->{input}->{encoding}, # database encoding
45 dpavlin 74 );
46    
47     my $path = './db/';
48    
49 dpavlin 209 my $db = new WebPAC::Store(
50 dpavlin 141 path => $config->{webpac}->{db_path},
51 dpavlin 74 );
52    
53     my $n = new WebPAC::Normalize::XML(
54     # filter => { 'foo' => sub { shift } },
55     db => $db,
56     lookup_regex => $lookup->regex,
57     lookup => $lookup,
58     );
59    
60     $n->open(
61 dpavlin 141 tag => $config->{normalize}->{tag},
62     xml_file => $config->{normalize}->{path},
63 dpavlin 74 );
64    
65     my $out = new WebPAC::Output::TT(
66 dpavlin 141 include_path => $config->{webpac}->{template_path},
67 dpavlin 74 filters => { foo => sub { shift } },
68     );
69    
70     my $est = new WebPAC::Output::Estraier(
71 dpavlin 141 %{ $config->{hyperestraier} }
72 dpavlin 74 );
73    
74 dpavlin 113 my $total_rows = 0;
75 dpavlin 74
76 dpavlin 113 for ( 0 ... $isis->size ) {
77    
78     my $row = $isis->fetch || next;
79    
80 dpavlin 74 my $mfn = $row->{'000'}->[0] || die "can't find MFN";
81    
82     my $ds = $n->data_structure($row);
83    
84     # print STDERR Dumper($row, $ds);
85    
86 dpavlin 113 # my $html = $out->apply(
87     # template => 'html_ffzg.tt',
88     # data => $ds,
89     # );
90     #
91     # # create test output
92     #
93     # my $file = sprintf('out/%02d.html', $mfn );
94     # open(my $fh, '>', $file) or die "can't open $file: $!";
95     # print $fh $html;
96     # close($fh);
97     #
98     # $html =~ s#\s*[\n\r]+\s*##gs;
99     #
100 dpavlin 74 # print STDERR $html;
101    
102     $est->add(
103     id => $mfn,
104     ds => $ds,
105 dpavlin 141 type => $config->{hyperestraier}->{type},
106 dpavlin 74 );
107    
108 dpavlin 113 $total_rows++;
109    
110 dpavlin 74 };
111 dpavlin 113
112     my $log = $lookup->_get_logger;
113    
114     $log->info("$total_rows records indexed");

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26