/[pgestraier]/trunk/data/parse_trivia.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/data/parse_trivia.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 83 - (hide annotations)
Wed Jan 17 22:30:14 2007 UTC (17 years, 4 months ago) by dpavlin
File size: 1646 byte(s)
restructure IMDB trivia parser, added db target to create
trivia database in PostgreSQL
1 dpavlin 83 use strict;
2    
3     my $header = 1;
4     my ($title, $year) = ('',undef);
5     my $in = 0;
6     my $trivia = '';
7     my @qv;
8    
9     my ($all_years,$all_titles,$all_quotes);
10    
11     sub qv {
12     my $t = shift || return;
13    
14     sub qv_print {
15     my $v = shift || return '';
16     # $v =~ s/(.*)\s*,\s*(.+)/$2 $1/g;
17     push @qv, $v;
18     $all_quotes->{$v}++;
19     print "+ $v ";
20     return '';
21     }
22     $t =~ s#([_'"])([^_'"]+?)\1 \(qv\)#qv_print($2)#ge;
23     }
24    
25     sub fix_title {
26     my $t = shift;
27     $t =~ s/,\s+The\s*$//;
28     return $t;
29     };
30    
31     my $i = 0;
32    
33     sub parse_trivia {
34     my ($t, $call) = @_;
35    
36     while(<$t>) {
37     if ($header && /^=====+/) {
38     $header = 0;
39     next;
40     }
41     next if $header;
42    
43     if (/^#\s+(.*)\s*$/) {
44     $title = $1;
45     if ($title =~ m#^("*)(.*)\1\s*\((\d+)\)(:?\s*\(\w+\))*$#) {
46     ($title, $year) = (fix_title($2),$3);
47     $all_titles->{$title}++;
48     $all_years->{$3}++;
49     } else {
50     $year = undef;
51     }
52    
53     print "# $title ", ( $year ? "[$year]" : "" ), "\n";
54     next;
55    
56     } elsif (/^-\s(.*)\s*$/) {
57     $in = 1;
58     $trivia = "$1\n";
59     qv($1);
60     } elsif (/^\s\s(.*)\s*$/) {
61     $trivia .= "$1\n";
62     qv($1);
63     } elsif (/^$/ && $in) {
64     $i++;
65     print "[$i] ";
66    
67     $call->(
68     title => $title,
69     year => $year,
70     trivia => $trivia,
71     qv => [ @qv ],
72     );
73    
74     $trivia = '';
75     @qv = ();
76     $in = 0;
77     } else {
78     print "#$_\n";
79     }
80    
81     # last if ($i > 1000); # XXX remove this!
82     }
83     }
84    
85     sub dump_data($$) {
86     my ($name,$hash) = @_;
87    
88     open(my $fh, "> $name") || die "can't open $name: $!";
89    
90     foreach my $k (sort keys %{$hash}) {
91     print $fh "$k\t",$hash->{$k},"\n";
92     }
93    
94     close($fh);
95     }
96    
97     #dump_data('titles.data', $all_titles);
98     #dump_data('quotes.data', $all_quotes);
99     #dump_data('years.data', $all_years);
100    
101     1;

  ViewVC Help
Powered by ViewVC 1.1.26