/[safari]/get_book.sh
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /get_book.sh

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1 by dpavlin, Sun Dec 14 19:11:30 2003 UTC revision 1.2 by dpavlin, Sun Dec 14 22:27:18 2003 UTC
# Line 1  Line 1 
1  #!/bin/sh  #!/bin/sh
2    
3  #export http_proxy=http://proxy.pliva.hr:8080  #export http_proxy=http://proxy:8080
4    
5  #isbn="0-201-41975-0"  if [ -z "$1" ] ; then
6  isbn="0-672-32240-4"          echo "Usage: $0 ISBN"
7            exit 1
8    fi
9    
10    isbn=$1;
11    
12  wait=10  wait=10
13    
# Line 21  function mirror() { Line 25  function mirror() {
25  }  }
26    
27  function geturl() {  function geturl() {
28          hindent -s $1 | grep $2 | grep -i href | grep mode=[st][eo]c | \          hindent -s $1 | grep -i href | grep mode=[st][eo]c | \
29          sed -e 's/^.*<a.*href="//i' \          sed -e 's/^.*<a.*href="//i' \
30                  -e 's/".*//' -e 's/amp;//g' \                  -e 's/".*//' -e 's/amp;//g' \
31                  -e 's,^[^\?]*\?,http://safari.oreilly.com/,' \                  -e 's,^[^\?]*\?,http://safari.oreilly.com/,' \
32                  -e 's/#$//' \                  -e 's/#$//' \
33                  -e 's/srchText=//' | \                  -e 's/\&srchText=//' \
34                  grep -v open=false | \                  -e 's/open=false/open=true/' | \
                 grep -v 'view=[A-Z].*%2F[^i]' | \  
                 grep -v 'view=[A-Z].*/[^i]' | \  
35                  grep '&s=1&b=1&f=1&t=1&c=1&u=1&r=&o=1' | \                  grep '&s=1&b=1&f=1&t=1&c=1&u=1&r=&o=1' | \
36                    grep $2 | \
37                  sort -u >> in                  sort -u >> in
38  }  }
39    
40    function uniqurl() {
41            mv in in.tmp
42            grep -v 'view=[A-Z]' in.tmp | sort -u > in
43            grep 'view=[A-Z].*/index' in.tmp | sort -u >> in
44    }
45    
46  echo > in  echo > in
47  #mirror "http://safari.oreilly.com/?XmlId=$isbn"  mirror "http://safari.oreilly.com/?XmlId=$isbn"
48    
49  echo "extract URLs from first page..."  echo "extract URLs from first page..."
50  geturl "index.html?XmlId=$isbn" $isbn  geturl "index.html?XmlId=$isbn" $isbn
51    uniqurl
52    
53  mirror "-i in"  mirror "-i in"
54    
# Line 49  ls index.html* | while read file ; do Line 59  ls index.html* | while read file ; do
59  done  done
60  echo  echo
61    
62  sort -u in > in2  uniqurl
63    
64  mirror "-i in2"  mirror "-i in"
65    
66  echo > in  echo > in
67  echo -n "extracting URLs [2]"  echo -n "extracting URLs [2]"
# Line 60  ls index.html* | while read file ; do Line 70  ls index.html* | while read file ; do
70          geturl $file $isbn          geturl $file $isbn
71  done  done
72    
73  sort -u in > in2  uniqurl
74    
75    mirror "-i in"
76    
77    # convert links in html
78    bn=`basename $0`
79    dir=`echo $0 | sed "s/$bn$//"`
80    ls index.html* | xargs -i $dir/filter.pl {}
81    mkdir orig
82    mv index.html* orig/
83    
 mirror "-i in2"  

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.2

  ViewVC Help
Powered by ViewVC 1.1.26