/[pgestraier]/trunk/data/convert.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/data/convert.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 21 - (hide annotations)
Thu May 26 19:43:56 2005 UTC (19 years ago) by dpavlin
File MIME type: text/plain
File size: 1845 byte(s)
Generate dumps of used years, titles and quotes. Some parsing fixes

1 dpavlin 6 #!/usr/bin/perl -w
2    
3     use strict;
4    
5 dpavlin 10 # score for words in title
6     my $title_rank = 3;
7    
8 dpavlin 6 open(my $t, "gzip -cd trivia.list.gz |") || die "can't open trivia.list.gz: $!";
9    
10    
11     my $header = 1;
12 dpavlin 17 my ($title, $year) = ('',undef);
13 dpavlin 6 my $in = 0;
14     my $trivia = '';
15     my @qv;
16    
17 dpavlin 21 my ($all_years,$all_titles,$all_quotes);
18    
19 dpavlin 6 sub qv {
20     my $t = shift || return;
21    
22     sub qv_print {
23     my $v = shift || return '';
24 dpavlin 21 # $v =~ s/(.*)\s*,\s*(.+)/$2 $1/g;
25 dpavlin 6 push @qv, $v;
26 dpavlin 21 $all_quotes->{$v}++;
27 dpavlin 6 return '';
28     }
29 dpavlin 21 $t =~ s#([_'])([^_']+?)\1 \(qv\)#qv_print($2)#ge;
30 dpavlin 6 }
31    
32     my $i = 0;
33    
34     while(<$t>) {
35 dpavlin 17 if ($header && /^=====+/) {
36 dpavlin 6 $header = 0;
37     next;
38     }
39     next if $header;
40    
41     if (/^#\s+(.*)\s*$/) {
42     $title = $1;
43 dpavlin 21 if ($title =~ m#^("*)(.*)\1\s*\((\d+)\)(:?\s*\(\w+\))*$#) {
44     $year = $3;
45     $all_titles->{$2}++;
46     $all_years->{$3}++;
47 dpavlin 17 } else {
48     $year = undef;
49     }
50    
51     print "# $title ", ( $year ? "[$year]" : "" ), "\n";
52 dpavlin 6 next;
53 dpavlin 17
54 dpavlin 6 } elsif (/^-\s(.*)\s*$/) {
55     $in = 1;
56     $trivia = "$1\n";
57     qv($1);
58     } elsif (/^\s\s(.*)\s*$/) {
59     $trivia .= "$1\n";
60     qv($1);
61     } elsif (/^$/ && $in) {
62     $i++;
63     print "$i ";
64 dpavlin 17 my $out = '@title=' . $title . "\n" .
65     '@size=' . length($trivia) . "\n";
66     $out .= "year=$year\n" if ($year);
67     $out .= 'quote=' . join(",", @qv) . "\n" if (@qv);
68     $out .= "\n$trivia\n" .
69     ("\t$title\n" x $title_rank) .
70     "\t" . join("\n\t", @qv) . "\n";
71    
72 dpavlin 6 open(my $t, "> trivia/$i.est") || die "can't open $i.est: $!";
73 dpavlin 17 print $t $out;
74 dpavlin 6 close($t);
75     $trivia = '';
76     @qv = ();
77     $in = 0;
78     } else {
79     print "#$_\n";
80     }
81    
82 dpavlin 21 # last if ($i > 1000); # XXX remove this!
83 dpavlin 6 }
84    
85 dpavlin 21 sub dump_data($$) {
86     my ($name,$hash) = @_;
87    
88     open(my $fh, "> $name") || die "can't open $name: $!";
89    
90     foreach my $k (sort keys %{$hash}) {
91     print $fh "$k\t",$hash->{$k},"\n";
92     }
93    
94     close($fh);
95     }
96    
97     dump_data('titles.data', $all_titles);
98     dump_data('quotes.data', $all_quotes);
99     dump_data('years.data', $all_years);

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26