Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
chap3 updated
  • Loading branch information
masamichiIto committed Aug 23, 2023
commit d86e4b3caf9205dd62ed9733de1db1df24b4a7ce
18 changes: 17 additions & 1 deletion chap3_work.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,20 @@
for link in bs.find('div', {'id':'bodyContent'}).find_all('a', href=re.compile('^(/wiki/)((?!:).)*$')):
# ?!から始まる文字列を()で囲むことで,その文字列を含まないを表現できる.(?!:).で1つのコロンを含まない,((?!:).)*コロン以外の0文字以上の文字列を表している
if 'href' in link.attrs:
print(link.attrs['href'])
print(link.attrs['href'])

## p.35あたり
import datetime
import random

random.seed(datetime.datetime.now())
def getLinks(articleUrl):
html = urlopen('http://en.wikipedia.org{}'.format(articleUrl))
bs = BeautifulSoup(html, 'html.parser')
return bs.find('div', {'id':'bodyContent'}).find_all('a', href=re.compile('^(/wiki/)((?!:).)*$'))

links = getLinks('/wiki/Kevin_Bacon')
while len(links) > 0:
newArticle = links[random.randint(0, len(links)-1)].attrs['href']
print(newArticle)
links = getLinks(newArticle)