/[pgestraier]/trunk/data/convert.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/data/convert.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 21 - (show annotations)
Thu May 26 19:43:56 2005 UTC (18 years, 11 months ago) by dpavlin
File MIME type: text/plain
File size: 1845 byte(s)
Generate dumps of used years, titles and quotes. Some parsing fixes

1 #!/usr/bin/perl -w
2
3 use strict;
4
5 # score for words in title
6 my $title_rank = 3;
7
8 open(my $t, "gzip -cd trivia.list.gz |") || die "can't open trivia.list.gz: $!";
9
10
11 my $header = 1;
12 my ($title, $year) = ('',undef);
13 my $in = 0;
14 my $trivia = '';
15 my @qv;
16
17 my ($all_years,$all_titles,$all_quotes);
18
19 sub qv {
20 my $t = shift || return;
21
22 sub qv_print {
23 my $v = shift || return '';
24 # $v =~ s/(.*)\s*,\s*(.+)/$2 $1/g;
25 push @qv, $v;
26 $all_quotes->{$v}++;
27 return '';
28 }
29 $t =~ s#([_'])([^_']+?)\1 \(qv\)#qv_print($2)#ge;
30 }
31
32 my $i = 0;
33
34 while(<$t>) {
35 if ($header && /^=====+/) {
36 $header = 0;
37 next;
38 }
39 next if $header;
40
41 if (/^#\s+(.*)\s*$/) {
42 $title = $1;
43 if ($title =~ m#^("*)(.*)\1\s*\((\d+)\)(:?\s*\(\w+\))*$#) {
44 $year = $3;
45 $all_titles->{$2}++;
46 $all_years->{$3}++;
47 } else {
48 $year = undef;
49 }
50
51 print "# $title ", ( $year ? "[$year]" : "" ), "\n";
52 next;
53
54 } elsif (/^-\s(.*)\s*$/) {
55 $in = 1;
56 $trivia = "$1\n";
57 qv($1);
58 } elsif (/^\s\s(.*)\s*$/) {
59 $trivia .= "$1\n";
60 qv($1);
61 } elsif (/^$/ && $in) {
62 $i++;
63 print "$i ";
64 my $out = '@title=' . $title . "\n" .
65 '@size=' . length($trivia) . "\n";
66 $out .= "year=$year\n" if ($year);
67 $out .= 'quote=' . join(",", @qv) . "\n" if (@qv);
68 $out .= "\n$trivia\n" .
69 ("\t$title\n" x $title_rank) .
70 "\t" . join("\n\t", @qv) . "\n";
71
72 open(my $t, "> trivia/$i.est") || die "can't open $i.est: $!";
73 print $t $out;
74 close($t);
75 $trivia = '';
76 @qv = ();
77 $in = 0;
78 } else {
79 print "#$_\n";
80 }
81
82 # last if ($i > 1000); # XXX remove this!
83 }
84
85 sub dump_data($$) {
86 my ($name,$hash) = @_;
87
88 open(my $fh, "> $name") || die "can't open $name: $!";
89
90 foreach my $k (sort keys %{$hash}) {
91 print $fh "$k\t",$hash->{$k},"\n";
92 }
93
94 close($fh);
95 }
96
97 dump_data('titles.data', $all_titles);
98 dump_data('quotes.data', $all_quotes);
99 dump_data('years.data', $all_years);

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26