File tree Expand file tree Collapse file tree 1 file changed +37
-0
lines changed Expand file tree Collapse file tree 1 file changed +37
-0
lines changed Original file line number Diff line number Diff line change
1
+ import mechanize
2
+ import re
3
+ import urllib2
4
+ from random import *
5
+ br = mechanize .Browser ()
6
+ br .addheaders = [('User-Agent' ,'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36' )]
7
+ br .set_handle_robots (False )
8
+ #For page exploration
9
+ page = raw_input ('Enter Page No:' )
10
+ #print type(page)
11
+ p = urllib2 .Request ('https://www.google.co.in/search?q=gate+psu+2017+ext:pdf&start=' + page )
12
+ ht = br .open (p )
13
+ text = '<cite\sclass="_Rm">(.+?)</cite>'
14
+ patt = re .compile (text )
15
+ h = ht .read ()
16
+ urls = re .findall (patt ,h )
17
+ int = 0
18
+ while int < len (urls ):
19
+ urls [int ]= urls [int ].replace ("<b>" ,"" )
20
+ urls [int ]= urls [int ].replace ("</b>" ,"" )
21
+ int = int + 1
22
+ print urls
23
+ for url in urls :
24
+ try :
25
+ temp = url .split ("/" )
26
+ q = temp [len (temp )- 1 ]
27
+ if "http" in url :
28
+ r = urllib2 .urlopen (url )
29
+ else :
30
+ r = urllib2 .urlopen ("http://" + url )
31
+ file = open ('psu2' + q + '.pdf' ,'wb' )
32
+ file .write (r .read ())
33
+ file .close ()
34
+ print "Done"
35
+ except urllib2 .URLError as e :
36
+ print "Sorry there exists a problem with this URL Please Download this Manually " + str (url )
37
+
You can’t perform that action at this time.
0 commit comments