1 |
#!/usr/bin/perl -w |
#!/usr/bin/perl -w |
2 |
|
|
3 |
use strict; |
use strict; |
4 |
|
use parse_trivia; |
5 |
|
|
6 |
# score for words in title |
# score for words in title |
7 |
my $title_rank = 3; |
my $title_rank = 3; |
8 |
|
|
9 |
open(my $t, "gzip -cd trivia.list.gz |") || die "can't open trivia.list.gz: $!"; |
open(my $t, "gzip -cd trivia.list.gz |") || die "can't open trivia.list.gz: $!"; |
10 |
|
|
11 |
|
my $nr = 1; |
12 |
|
|
13 |
my $header = 1; |
parse_trivia($t, sub { |
|
my ($title, $year) = ('',undef); |
|
|
my $in = 0; |
|
|
my $trivia = ''; |
|
|
my @qv; |
|
|
|
|
|
my ($all_years,$all_titles,$all_quotes); |
|
|
|
|
|
sub qv { |
|
|
my $t = shift || return; |
|
|
|
|
|
sub qv_print { |
|
|
my $v = shift || return ''; |
|
|
# $v =~ s/(.*)\s*,\s*(.+)/$2 $1/g; |
|
|
push @qv, $v; |
|
|
$all_quotes->{$v}++; |
|
|
return ''; |
|
|
} |
|
|
$t =~ s#([_'])([^_']+?)\1 \(qv\)#qv_print($2)#ge; |
|
|
} |
|
|
|
|
|
my $i = 0; |
|
|
|
|
|
while(<$t>) { |
|
|
if ($header && /^=====+/) { |
|
|
$header = 0; |
|
|
next; |
|
|
} |
|
|
next if $header; |
|
|
|
|
|
if (/^#\s+(.*)\s*$/) { |
|
|
$title = $1; |
|
|
if ($title =~ m#^("*)(.*)\1\s*\((\d+)\)(:?\s*\(\w+\))*$#) { |
|
|
$year = $3; |
|
|
$all_titles->{$2}++; |
|
|
$all_years->{$3}++; |
|
|
} else { |
|
|
$year = undef; |
|
|
} |
|
|
|
|
|
print "# $title ", ( $year ? "[$year]" : "" ), "\n"; |
|
|
next; |
|
|
|
|
|
} elsif (/^-\s(.*)\s*$/) { |
|
|
$in = 1; |
|
|
$trivia = "$1\n"; |
|
|
qv($1); |
|
|
} elsif (/^\s\s(.*)\s*$/) { |
|
|
$trivia .= "$1\n"; |
|
|
qv($1); |
|
|
} elsif (/^$/ && $in) { |
|
|
$i++; |
|
|
print "$i "; |
|
|
my $out = '@title=' . $title . "\n" . |
|
|
'@size=' . length($trivia) . "\n"; |
|
|
$out .= "year=$year\n" if ($year); |
|
|
$out .= 'quote=' . join(",", @qv) . "\n" if (@qv); |
|
|
$out .= "\n$trivia\n" . |
|
|
("\t$title\n" x $title_rank) . |
|
|
"\t" . join("\n\t", @qv) . "\n"; |
|
14 |
|
|
15 |
open(my $t, "> trivia/$i.est") || die "can't open $i.est: $!"; |
my $a = {@_}; |
16 |
|
|
17 |
|
my $out = '@title=' . $a->{title} . "\n" . |
18 |
|
'@size=' . length($a->{trivia}) . "\n"; |
19 |
|
$out .= "year=$a->{year}\n" if ($a->{year}); |
20 |
|
$out .= 'quote=' . join(",", $a->{qv}) . "\n" if ($a->{qv}); |
21 |
|
$out .= "\n$a->{trivia}\n" . |
22 |
|
("\t$a->{title}\n" x $title_rank) . |
23 |
|
"\t" . join("\n\t", @{ $a->{qv} }) . "\n"; |
24 |
|
|
25 |
|
open(my $t, "> trivia/$nr.est") || die "can't open $nr.est: $!"; |
26 |
print $t $out; |
print $t $out; |
27 |
close($t); |
close($t); |
28 |
$trivia = ''; |
$nr++; |
29 |
@qv = (); |
}); |
|
$in = 0; |
|
|
} else { |
|
|
print "#$_\n"; |
|
|
} |
|
|
|
|
|
# last if ($i > 1000); # XXX remove this! |
|
|
} |
|
|
|
|
|
sub dump_data($$) { |
|
|
my ($name,$hash) = @_; |
|
|
|
|
|
open(my $fh, "> $name") || die "can't open $name: $!"; |
|
|
|
|
|
foreach my $k (sort keys %{$hash}) { |
|
|
print $fh "$k\t",$hash->{$k},"\n"; |
|
|
} |
|
|
|
|
|
close($fh); |
|
|
} |
|
|
|
|
|
dump_data('titles.data', $all_titles); |
|
|
dump_data('quotes.data', $all_quotes); |
|
|
dump_data('years.data', $all_years); |
|