/[safari]/filter.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /filter.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (hide annotations)
Tue Feb 1 14:34:55 2005 UTC (19 years ago) by dpavlin
Branch: MAIN
CVS Tags: HEAD
Changes since 1.5: +3 -3 lines
File MIME type: text/plain
fixed header/footer

1 dpavlin 1.1 #!/usr/bin/perl -w
2    
3     use strict;
4     my $infile = shift @ARGV || die "$0 [filename]";
5    
6     my $outfile;
7     sub xmlid2file {
8     my ($href,$pre,$post) = @_;
9     $pre |= '';
10     $post |= '';
11     my $isbn;
12 dpavlin 1.3 if ($href =~ m/xmlid=([^&]+)&/i) {
13 dpavlin 1.1 $isbn = $1;
14 dpavlin 1.2 } elsif ($href =~ m/xmlid=([^&]+)$/i) {
15 dpavlin 1.1 $isbn = $1;
16     } else {
17     print STDERR "skipping $href\n";
18     return $pre.$href.$post;
19     }
20     $isbn =~ s!%2f!_!gi;
21     $isbn =~ s!/!_!g;
22    
23     my $mode = "";
24     if ($href =~ m/mode=([^&]+)&/) {
25     $mode = $1
26     } else {
27     $mode = "section";
28     }
29     $mode .= "_";
30    
31     my $view;
32 dpavlin 1.5 if ($isbn =~ m/_index$/ && $href =~ m/view=([^&]+)&/) {
33 dpavlin 1.1 $isbn .= "_".$1;
34     }
35    
36     # remove anchor from absolute URLs
37     $isbn =~ s/#.+$//;
38    
39     $isbn .= ".html";
40    
41     # anchor
42 dpavlin 1.3 if ($href =~ m/(#[^&]+)/) {
43 dpavlin 1.1 $isbn .= $1;
44     }
45    
46    
47     return $pre.$mode.$isbn.$post;
48     }
49    
50     $outfile = xmlid2file($infile);
51     my $html;
52    
53     open(IN,"$infile") || die "$infile: $!";
54     while(<IN>) {
55     chomp;
56     chomp;
57     $html .= $_."\n";
58     }
59     close(IN);
60    
61 dpavlin 1.2 $html =~ s!(<title>)O'Reilly Network Safari Bookshelf\s+-\s+!$1!gsi || die "$infile: title";
62 dpavlin 1.1
63 dpavlin 1.2 $html =~ s!<body leftmargin="0" topmargin="0" marginwidth="0" marginheight="0">!<body leftmargin="10" topmargin="10" marginwidth="10" marginheight="10">!s || die "$infile: margins";
64 dpavlin 1.3 $html =~ s;<a name="toppage">.*<!--Copyright.*?-->;<a name="toppage"></a>;s || die "$infile: surround layout";
65 dpavlin 1.1
66 dpavlin 1.6 $html =~ s!<td valign="top" class="v2">.*?(<td valign="top" class="v2" align="right">)!$1!si || warn "$infile: top buttons";
67     $html =~ s!<td valign="top" class="v2"><a target="_new".*?(<td valign="top" class="v2" align="right">)!$1!si || warn "bottom buttons";
68 dpavlin 1.1
69 dpavlin 1.6 $html =~ s!<p><b>URL</b>.*$!</body></html>!si || die "$infile: footer";
70 dpavlin 1.1
71 dpavlin 1.2 $html =~ s;<!--.+?-->;;gs;
72 dpavlin 1.1
73     $html =~ s!(<a\s+[^>]*href=")([^"]+)("[^>]*>)!xmlid2file("$2","$1","$3")!iegs || die "$infile: links";
74    
75     $html =~ s!<a target="_new"[^>]*href="http://[^>]+>(.+?)</a>!$1!gs;
76    
77 dpavlin 1.3 $html =~ s!<img[^>]+Buy Print Version[^>]+>!!gs;
78 dpavlin 1.4 $html =~ s!<a[^>]+onclick="OpenWin[^>]+mode=downloadPDF[^>]+>\s*<img[^>]+Download this chapter[^>]+>\s*</a>!!gs;
79 dpavlin 1.3
80 dpavlin 1.1 open(OUT,"> $outfile") || die "$outfile: $!";
81     print "$outfile\n";
82     print OUT $html;
83     close(OUT);
84 dpavlin 1.2
85    
86     # fix timestamp
87     # atime = 8, ctime = 9
88     my @s = stat($infile) || die "stat $infile: $!";
89     utime $s[8],$s[9], $outfile || die "touch $outfile: $1";

  ViewVC Help
Powered by ViewVC 1.1.26