Skip to content

Commit 8889dea

Browse files
author
Mario Burgos
committed
Adds html.parser to BeautifulSoup([markup], html.parser) calls.
1 parent 2ef1be6 commit 8889dea

21 files changed

+24
-24
lines changed

chapter11/4-solveCaptcha.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def cleanImage(imagePath):
1313
borderImage.save(imagePath)
1414

1515
html = urlopen("http://www.pythonscraping.com/humans-only")
16-
bsObj = BeautifulSoup(html)
16+
bsObj = BeautifulSoup(html, "html.parser")
1717
#Gather prepopulated form values
1818
imageLocation = bsObj.find("img", {"title": "Image CAPTCHA"})["src"]
1919
formBuildId = bsObj.find("input", {"name":"form_build_id"})["value"]

chapter2/1-selectByClass.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from bs4 import BeautifulSoup
33

44
html = urlopen("http://www.pythonscraping.com/pages/warandpeace.html")
5-
bsObj = BeautifulSoup(html)
5+
bsObj = BeautifulSoup(html, "html.parser")
66
nameList = bsObj.findAll("span", {"class":"green"})
77
for name in nameList:
88
print(name.get_text())

chapter2/2-selectByAttribute.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
from bs4 import BeautifulSoup
33

44
html = urlopen("http://www.pythonscraping.com/pages/warandpeace.html")
5-
bsObj = BeautifulSoup(html)
5+
bsObj = BeautifulSoup(html, "html.parser")
66
allText = bsObj.findAll(id="text")
77
print(allText[0].get_text())

chapter2/3-findDescendants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from bs4 import BeautifulSoup
33

44
html = urlopen("http://www.pythonscraping.com/pages/page3.html")
5-
bsObj = BeautifulSoup(html)
5+
bsObj = BeautifulSoup(html, "html.parser")
66

77
for child in bsObj.find("table",{"id":"giftList"}).children:
88
print(child)

chapter2/4-findSiblings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from urllib.request import urlopen
22
from bs4 import BeautifulSoup
33
html = urlopen("http://www.pythonscraping.com/pages/page3.html")
4-
bsObj = BeautifulSoup(html)
4+
bsObj = BeautifulSoup(html, "html.parser")
55

66
for sibling in bsObj.find("table",{"id":"giftList"}).tr.next_siblings:
77
print(sibling)

chapter2/5-findParents.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
from bs4 import BeautifulSoup
33

44
html = urlopen("http://www.pythonscraping.com/pages/page3.html")
5-
bsObj = BeautifulSoup(html)
6-
print(bsObj.find("img",{"src":"../img/gifts/img1.jpg"}).parent.previous_sibling.get_text())
5+
bsObj = BeautifulSoup(html, "html.parser")
6+
print(bsObj.find("img",{"src":"../img/gifts/img1.jpg"}).parent.previous_sibling.get_text())

chapter2/6-regularExpressions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import re
44

55
html = urlopen("http://www.pythonscraping.com/pages/page3.html")
6-
bsObj = BeautifulSoup(html)
6+
bsObj = BeautifulSoup(html, "html.parser")
77
images = bsObj.findAll("img", {"src":re.compile("\.\.\/img\/gifts/img.*\.jpg")})
88
for image in images:
99
print(image["src"])

chapter2/7-lambdaExpressions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from urllib.request import urlopen
22
from bs4 import BeautifulSoup
33
html = urlopen("http://www.pythonscraping.com/pages/page2.html")
4-
bsObj = BeautifulSoup(html)
4+
bsObj = BeautifulSoup(html, "html.parser")
55
tags = bsObj.findAll(lambda tag: len(tag.attrs) == 2)
66
for tag in tags:
77
print(tag)

chapter3/1-getWikiLinks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
random.seed(datetime.datetime.now())
88
def getLinks(articleUrl):
99
html = urlopen("http://en.wikipedia.org"+articleUrl)
10-
bsObj = BeautifulSoup(html)
10+
bsObj = BeautifulSoup(html, "html.parser")
1111
return bsObj.find("div", {"id":"bodyContent"}).findAll("a", href=re.compile("^(/wiki/)((?!:).)*$"))
1212
links = getLinks("/wiki/Kevin_Bacon")
1313
while len(links) > 0:

chapter3/2-crawlWikipedia.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
def getLinks(pageUrl):
77
global pages
88
html = urlopen("http://en.wikipedia.org"+pageUrl)
9-
bsObj = BeautifulSoup(html)
9+
bsObj = BeautifulSoup(html, "html.parser")
1010
try:
1111
print(bsObj.h1.get_text())
1212
print(bsObj.find(id ="mw-content-text").findAll("p")[0])

0 commit comments

Comments
 (0)