chap3 started

REMitchell · masamichiIto · Aug 16, 2023 · Aug 16, 2023 · Aug 16, 2023 · Aug 21, 2023
commit 90bfc7aa1d1110bab61f738132da153bada546e6
diff --git a/chap2_work.py b/chap2_work.py
@@ -30,4 +30,5 @@
 ## chap2-4: regular expression
 images = bs.find_all('img', {'src':re.compile('..\/img\/gifts\/img.*.jpg')})
 for image in images:
-    print(image['src'])
+    print(image['src'])
+
diff --git a/chap3_work.py b/chap3_work.py
@@ -0,0 +1,10 @@
+from urllib.request import urlopen
+from bs4 import BeautifulSoup
+import re
+
+html = urlopen('http://en.wikipedia.org/wiki/Kevin_Bacon')
+bs = BeautifulSoup(html, 'html.parser')
+for link in bs.find('div', {'id':'bodyContent'}).find_all('a', href=re.compile('^(/wiki/)((?!:).)*$')):
+    # ?!から始まる文字列を()で囲むことで，その文字列を含まないを表現できる．(?!:).で1つのコロンを含まない，((?!:).)*コロン以外の0文字以上の文字列を表している
+    if 'href' in link.attrs:
+        print(link.attrs['href'])