--- get_book.sh 2004/02/15 11:40:43 1.7 +++ get_book.sh 2004/02/17 19:06:10 1.8 @@ -38,6 +38,7 @@ file=`echo $url | sed -e s,http://[^?]*?,index.html?, -e s,#.*$,, -e s,/,%2F,g` if [ -e "$file" ] ; then # echo "skip $url" + echo -n "." return fi @@ -68,7 +69,7 @@ sed -e 's/^.* in grep 'view=[A-Z].*/index' in.tmp | sort -u >> in + links=`wc -l in | cut -d" " -f1` + echo "found $links unique links" } function mirror_in() { @@ -108,10 +111,12 @@ exit 1 fi done + echo } echo -n > in mirror "http://safari.oreilly.com/?XmlId=$isbn" +echo echo "extract URLs from first page..." geturl "index.html?XmlId=$isbn" $isbn