--- get_book.sh	2003/12/14 19:11:30	1.1
+++ get_book.sh	2003/12/14 22:27:18	1.2
@@ -1,9 +1,13 @@
 #!/bin/sh
 
-#export http_proxy=http://proxy.pliva.hr:8080
+#export http_proxy=http://proxy:8080
 
-#isbn="0-201-41975-0"
-isbn="0-672-32240-4"
+if [ -z "$1" ] ; then
+	echo "Usage: $0 ISBN"
+	exit 1
+fi
+
+isbn=$1;
 
 wait=10
 
@@ -21,24 +25,30 @@
 }
 
 function geturl() {
-	hindent -s $1 | grep $2 | grep -i href | grep mode=[st][eo]c | \
+	hindent -s $1 | grep -i href | grep mode=[st][eo]c | \
 	sed -e 's/^.*<a.*href="//i' \
 		-e 's/".*//' -e 's/amp;//g' \
 		-e 's,^[^\?]*\?,http://safari.oreilly.com/,' \
 		-e 's/#$//' \
-		-e 's/srchText=//' | \
-		grep -v open=false | \
-		grep -v 'view=[A-Z].*%2F[^i]' | \
-		grep -v 'view=[A-Z].*/[^i]' | \
+		-e 's/\&srchText=//' \
+		-e 's/open=false/open=true/' | \
 		grep '&s=1&b=1&f=1&t=1&c=1&u=1&r=&o=1' | \
+		grep $2 | \
 		sort -u >> in
 }
 
+function uniqurl() {
+	mv in in.tmp
+	grep -v 'view=[A-Z]' in.tmp | sort -u > in
+	grep 'view=[A-Z].*/index' in.tmp | sort -u >> in
+}
+
 echo > in
-#mirror "http://safari.oreilly.com/?XmlId=$isbn"
+mirror "http://safari.oreilly.com/?XmlId=$isbn"
 
 echo "extract URLs from first page..."
 geturl "index.html?XmlId=$isbn" $isbn
+uniqurl
 
 mirror "-i in"
 
@@ -49,9 +59,9 @@
 done
 echo
 
-sort -u in > in2
+uniqurl
 
-mirror "-i in2"
+mirror "-i in"
 
 echo > in
 echo -n "extracting URLs [2]"
@@ -60,6 +70,14 @@
 	geturl $file $isbn
 done
 
-sort -u in > in2
+uniqurl
+
+mirror "-i in"
+
+# convert links in html
+bn=`basename $0`
+dir=`echo $0 | sed "s/$bn$//"`
+ls index.html* | xargs -i $dir/filter.pl {}
+mkdir orig
+mv index.html* orig/
 
-mirror "-i in2"