Skip to content

Commit 2ef1be6

Browse files
Ryan MitchellRyan Mitchell
authored andcommitted
2 parents e73888c + 6c3cdcc commit 2ef1be6

File tree

8 files changed

+18
-19
lines changed

8 files changed

+18
-19
lines changed

chapter1/2-beautifulSoup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
from bs4 import BeautifulSoup
33

44
html = urlopen("http://www.pythonscraping.com/exercises/exercise1.html")
5-
bsObj = BeautifulSoup(html.read());
6-
print(bsObj.h1)
5+
bsObj = BeautifulSoup(html.read())
6+
print(bsObj.h1)

chapter11/3-readWebImages.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
driver.get("http://www.amazon.com/War-Peace-Leo-Nikolayevich-Tolstoy/dp/1427030200")
99
time.sleep(2)
1010

11-
driver.find_element_by_id("sitbLogoImg").click()
11+
driver.find_element_by_id("img-canvas").click()
1212
#The easiest way to get exactly one of every page
1313
imageList = set()
1414

@@ -33,4 +33,4 @@
3333
p = subprocess.Popen(["tesseract", "page.jpg", "page"], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
3434
p.wait()
3535
f = open("page.txt", "r")
36-
print(f.read())
36+
print(f.read())

chapter4/6-wikiHistories.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from urllib.request import urlopen
2+
from urllib.request import HTTPError
23
from bs4 import BeautifulSoup
34
import datetime
45
import json
@@ -9,8 +10,7 @@
910
def getLinks(articleUrl):
1011
html = urlopen("http://en.wikipedia.org"+articleUrl)
1112
bsObj = BeautifulSoup(html)
12-
return bsObj.find("div", {"id":"bodyContent"}).findAll("a",
13-
href=re.compile("^(/wiki/)((?!:).)*$"))
13+
return bsObj.find("div", {"id":"bodyContent"}).findAll("a", href=re.compile("^(/wiki/)((?!:).)*$"))
1414

1515
def getHistoryIPs(pageUrl):
1616
#Format of revision history pages is:
@@ -31,8 +31,7 @@ def getHistoryIPs(pageUrl):
3131

3232
def getCountry(ipAddress):
3333
try:
34-
response = urlopen("http://freegeoip.net/json/"
35-
+ipAddress).read().decode('utf-8')
34+
response = urlopen("http://freegeoip.net/json/"+ipAddress).read().decode('utf-8')
3635
except HTTPError:
3736
return None
3837
responseJson = json.loads(response)
@@ -51,4 +50,4 @@ def getCountry(ipAddress):
5150
print(historyIP+" is from "+country)
5251

5352
newLink = links[random.randint(0, len(links)-1)].attrs["href"]
54-
links = getLinks(newLink)
53+
links = getLinks(newLink)

chapter5/6-6DegreesCrawlWiki.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def insertLink(fromPageId, toPageId):
3636
def getLinks(pageUrl, recursionLevel):
3737
global pages
3838
if recursionLevel > 4:
39-
return;
39+
return
4040
pageId = insertPageIfNotExists(pageUrl)
4141
html = urlopen("http://en.wikipedia.org"+pageUrl)
4242
bsObj = BeautifulSoup(html)
@@ -51,4 +51,4 @@ def getLinks(pageUrl, recursionLevel):
5151
print("Skipping: "+str(link.attrs['href'])+" found on "+pageUrl)
5252
getLinks("/wiki/Kevin_Bacon", 0)
5353
cur.close()
54-
conn.close()
54+
conn.close()

chapter6/5-readPdf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def readPDF(pdfFile):
1818
retstr.close()
1919
return content
2020

21-
pdfFile = urlopen("http://pythonscraping.com/pages/warandpeace/chapter1.pdf");
21+
pdfFile = urlopen("http://pythonscraping.com/pages/warandpeace/chapter1.pdf")
2222
outputString = readPDF(pdfFile)
2323
print(outputString)
24-
pdfFile.close()
24+
pdfFile.close()

chapter6/readPdf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def readPDF(pdfFile):
1818
retstr.close()
1919
return content
2020

21-
pdfFile = urlopen("http://pythonscraping.com/pages/warandpeace/chapter1.pdf");
21+
pdfFile = urlopen("http://pythonscraping.com/pages/warandpeace/chapter1.pdf")
2222
outputString = readPDF(pdfFile)
2323
print(outputString)
24-
pdfFile.close()
24+
pdfFile.close()

chapter8/3-markovGenerator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ def retrieveRandomWord(wordList):
1717

1818
def buildWordDict(text):
1919
#Remove newlines and quotes
20-
text = text.replace("\n", " ");
21-
text = text.replace("\"", "");
20+
text = text.replace("\n", " ")
21+
text = text.replace("\"", "")
2222

2323
#Make sure puncuation are treated as their own "word," so they will be included
2424
#in the Markov chain
2525
punctuation = [',','.',';',':']
2626
for symbol in punctuation:
27-
text = text.replace(symbol, " "+symbol+" ");
27+
text = text.replace(symbol, " "+symbol+" ")
2828

2929
words = text.split(" ")
3030
#Filter out empty words

chapter8/4-6DegreesFinder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def searchBreadth(targetPageId, currentPageId, depth, nodes):
4242
print(found)
4343
for node in found:
4444
print(getUrl(node))
45-
break;
45+
break
4646
else:
4747
print("No path found")
4848

0 commit comments

Comments
 (0)