--- filter.pl 2003/12/14 19:11:30 1.1 +++ filter.pl 2003/12/14 22:27:18 1.2 @@ -11,7 +11,7 @@ my $isbn; if ($href =~ m/xmlid=([^&]+)&/) { $isbn = $1; - } elsif ($href =~ m/xmlid=([^&]+)$/) { + } elsif ($href =~ m/xmlid=([^&]+)$/i) { $isbn = $1; } else { print STDERR "skipping $href\n"; @@ -58,17 +58,17 @@ } close(IN); -$html =~ s,()O'Reilly Network Safari Bookshelf\s+-\s+,$1,gsi || die "$infile: title"; +$html =~ s!(<title>)O'Reilly Network Safari Bookshelf\s+-\s+!$1!gsi || die "$infile: title"; -$html =~ s,<body leftmargin="0" topmargin="0" marginwidth="0" marginheight="0">,<body leftmargin="10" topmargin="10" marginwidth="10" marginheight="10">,s || die "$infile: margins"; -$html =~ s,<a name="toppage">.*<!--Copyright.*?-->,,s || die "$infile: surround layout"; +$html =~ s!<body leftmargin="0" topmargin="0" marginwidth="0" marginheight="0">!<body leftmargin="10" topmargin="10" marginwidth="10" marginheight="10">!s || die "$infile: margins"; +$html =~ s;<a name="toppage">.*<!--Copyright.*?-->;;s || die "$infile: surround layout"; -$html =~ s,<td valign="top" class="v2">.*?(<td valign="top" class="v2" align="right">),$1,s || die "$infile: top buttons"; -$html =~ s,<td valign="top" class="v2"><a target="_new".*?(<td valign="top" class="v2" align="right">),$1,s || warn "bottom buttons"; +$html =~ s!<td valign="top" class="v2">.*?(<td valign="top" class="v2" align="right">)!$1!s || die "$infile: top buttons"; +$html =~ s!<td valign="top" class="v2"><a target="_new".*?(<td valign="top" class="v2" align="right">)!$1!s || warn "bottom buttons"; -$html =~ s,<p><b>URL</b>.*$,</body></html>,s || die "$infile: footer"; +$html =~ s!<p><b>URL</b>.*$!</body></html>!s || die "$infile: footer"; -$html =~ s,<!--.+?-->,,gs; +$html =~ s;<!--.+?-->;;gs; $html =~ s!(<a\s+[^>]*href=")([^"]+)("[^>]*>)!xmlid2file("$2","$1","$3")!iegs || die "$infile: links"; @@ -78,3 +78,9 @@ print "$outfile\n"; print OUT $html; close(OUT); + + +# fix timestamp +# atime = 8, ctime = 9 +my @s = stat($infile) || die "stat $infile: $!"; +utime $s[8],$s[9], $outfile || die "touch $outfile: $1";