Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
chap2-4 finished
  • Loading branch information
masamichiIto committed Aug 22, 2023
commit c4ec9a8b7b2dc08f95c8ceb23d2222897971e9c7
8 changes: 7 additions & 1 deletion chap2_work.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re

html = urlopen('http://www.pythonscraping.com/pages/warandpeace.html')
bs = BeautifulSoup(html.read(), 'html.parser')
Expand All @@ -24,4 +25,9 @@
print(sibling)

## 2-2-3-3
print(bs.find('img', {'src':'../img/gifts/img1.jpg'}).parent.previous_sibling.get_text())
print(bs.find('img', {'src':'../img/gifts/img1.jpg'}).parent.previous_sibling.get_text())

## chap2-4: regular expression
images = bs.find_all('img', {'src':re.compile('..\/img\/gifts\/img.*.jpg')})
for image in images:
print(image['src'])