1+ from urllib .request import urlretrieve
2+ from urllib .request import urlopen
3+ from bs4 import BeautifulSoup
4+ import subprocess
5+ import requests
6+ from PIL import Image
7+ from PIL import ImageOps
8+
9+ def cleanImage (imagePath ):
10+ image = Image .open (imagePath )
11+ image = image .point (lambda x : 0 if x < 143 else 255 )
12+ borderImage = ImageOps .expand (image ,border = 20 ,fill = 'white' )
13+ borderImage .save (imagePath )
14+
15+ html = urlopen ("http://www.pythonscraping.com/humans-only" )
16+ bsObj = BeautifulSoup (html , "html.parser" )
17+ #Gather prepopulated form values
18+ imageLocation = bsObj .find ("img" , {"title" : "Image CAPTCHA" })["src" ]
19+ formBuildId = bsObj .find ("input" , {"name" :"form_build_id" })["value" ]
20+ captchaSid = bsObj .find ("input" , {"name" :"captcha_sid" })["value" ]
21+ captchaToken = bsObj .find ("input" , {"name" :"captcha_token" })["value" ]
22+
23+ captchaUrl = "http://pythonscraping.com" + imageLocation
24+ urlretrieve (captchaUrl , "captcha.jpg" )
25+ cleanImage ("captcha.jpg" )
26+ p = subprocess .Popen (["tesseract" , "captcha.jpg" , "captcha" ], stdout =
27+ subprocess .PIPE ,stderr = subprocess .PIPE )
28+ p .wait ()
29+ f = open ("captcha.txt" , "r" )
30+
31+ #Clean any whitespace characters
32+ captchaResponse = f .read ().replace (" " , "" ).replace ("\n " , "" )
33+ print ("Captcha solution attempt: " + captchaResponse )
34+
35+ if len (captchaResponse ) == 5 :
36+ params = {"captcha_token" :captchaToken , "captcha_sid" :captchaSid ,
37+ "form_id" :"comment_node_page_form" , "form_build_id" : formBuildId ,
38+ "captcha_response" :captchaResponse , "name" :"Ryan Mitchell" ,
39+ "subject" : "I come to seek the Grail" ,
40+ "comment_body[und][0][value]" :
41+ "...and I am definitely not a bot" }
42+ r = requests .post ("http://www.pythonscraping.com/comment/reply/10" ,
43+ data = params )
44+ responseObj = BeautifulSoup (r .text )
45+ if responseObj .find ("div" , {"class" :"messages" }) is not None :
46+ print (responseObj .find ("div" , {"class" :"messages" }).get_text ())
47+ else :
48+ print ("There was a problem reading the CAPTCHA correctly!" )
0 commit comments