1+ from  urllib .request  import  urlretrieve 
2+ from  urllib .request  import  urlopen 
3+ from  bs4  import  BeautifulSoup 
4+ import  subprocess 
5+ import  requests 
6+ from  PIL  import  Image 
7+ from  PIL  import  ImageOps 
8+ 
9+ def  cleanImage (imagePath ):
10+     image  =  Image .open (imagePath )
11+     image  =  image .point (lambda  x : 0  if  x < 143  else  255 )
12+     borderImage  =  ImageOps .expand (image ,border = 20 ,fill = 'white' )
13+     borderImage .save (imagePath )
14+ 
15+ html  =  urlopen ("http://www.pythonscraping.com/humans-only" )
16+ bsObj  =  BeautifulSoup (html , "html.parser" )
17+ #Gather prepopulated form values 
18+ imageLocation  =  bsObj .find ("img" , {"title" : "Image CAPTCHA" })["src" ]
19+ formBuildId  =  bsObj .find ("input" , {"name" :"form_build_id" })["value" ]
20+ captchaSid  =  bsObj .find ("input" , {"name" :"captcha_sid" })["value" ]
21+ captchaToken  =  bsObj .find ("input" , {"name" :"captcha_token" })["value" ]
22+ 
23+ captchaUrl  =  "http://pythonscraping.com" + imageLocation 
24+ urlretrieve (captchaUrl , "captcha.jpg" )
25+ cleanImage ("captcha.jpg" )
26+ p  =  subprocess .Popen (["tesseract" , "captcha.jpg" , "captcha" ], stdout = 
27+     subprocess .PIPE ,stderr = subprocess .PIPE )
28+ p .wait ()
29+ f  =  open ("captcha.txt" , "r" )
30+ 
31+ #Clean any whitespace characters 
32+ captchaResponse  =  f .read ().replace (" " , "" ).replace ("\n " , "" )
33+ print ("Captcha solution attempt: " + captchaResponse )
34+ 
35+ if  len (captchaResponse ) ==  5 :
36+     params  =  {"captcha_token" :captchaToken , "captcha_sid" :captchaSid ,   
37+               "form_id" :"comment_node_page_form" , "form_build_id" : formBuildId , 
38+                   "captcha_response" :captchaResponse , "name" :"Ryan Mitchell" , 
39+                   "subject" : "I come to seek the Grail" , 
40+                   "comment_body[und][0][value]" : 
41+                                            "...and I am definitely not a bot" }
42+     r  =  requests .post ("http://www.pythonscraping.com/comment/reply/10" , 
43+                           data = params )
44+     responseObj  =  BeautifulSoup (r .text )
45+     if  responseObj .find ("div" , {"class" :"messages" }) is  not   None :
46+         print (responseObj .find ("div" , {"class" :"messages" }).get_text ())
47+ else :
48+     print ("There was a problem reading the CAPTCHA correctly!" )
0 commit comments