From 6fd2f7e7c6043ded91537bc3cd1c2299036949d9 Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Wed, 16 Aug 2023 22:55:54 +0900
Subject: [PATCH 01/17] scrapetest.py created

---
 scrapetest.py | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 scrapetest.py

diff --git a/scrapetest.py b/scrapetest.py
new file mode 100644
index 0000000..66fdd21
--- /dev/null
+++ b/scrapetest.py
@@ -0,0 +1,4 @@
+from urllib.request import urlopen
+
+html = urlopen('http://pythonscraping.com/pages/page1.html')
+print(html.read())
\ No newline at end of file

From bd0f127a5fd17e88039e374e6a98672324d6ead7 Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Wed, 16 Aug 2023 22:57:51 +0900
Subject: [PATCH 02/17] scrapetest.py created

---
 scrapetest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scrapetest.py b/scrapetest.py
index 66fdd21..28ea3ea 100644
--- a/scrapetest.py
+++ b/scrapetest.py
@@ -1,4 +1,4 @@
 from urllib.request import urlopen
 
 html = urlopen('http://pythonscraping.com/pages/page1.html')
-print(html.read())
\ No newline at end of file
+print(html.read())

From 5255b7a8e40d38a962e0bdab95f3fad32ccab18e Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Wed, 16 Aug 2023 23:06:57 +0900
Subject: [PATCH 03/17] scrapetest.py updated

---
 scrapetest.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scrapetest.py b/scrapetest.py
index 28ea3ea..7076109 100644
--- a/scrapetest.py
+++ b/scrapetest.py
@@ -1,4 +1,7 @@
 from urllib.request import urlopen
+from bs4 import BeautifulSoup
 
 html = urlopen('http://pythonscraping.com/pages/page1.html')
-print(html.read())
+#print(html.read())
+bs = BeautifulSoup(html.read(), 'html.parser')
+print(bs.h1)
\ No newline at end of file

From 79daf92fb161659ec9bfee5ca59f4f15db653435 Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Mon, 21 Aug 2023 23:08:08 +0900
Subject: [PATCH 04/17] scrapetest.py updated

---
 scrapetest.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/scrapetest.py b/scrapetest.py
index 7076109..6916f2c 100644
--- a/scrapetest.py
+++ b/scrapetest.py
@@ -1,7 +1,18 @@
 from urllib.request import urlopen
+from urllib.error import HTTPError, URLError
 from bs4 import BeautifulSoup
 
-html = urlopen('http://pythonscraping.com/pages/page1.html')
+try:
+    html = urlopen('http://pythonscraping.com/pages/page1.html')
+except HTTPError as e:
+    print(e)
+    # return null, break, あるいは他の処理を実行
 #print(html.read())
-bs = BeautifulSoup(html.read(), 'html.parser')
-print(bs.h1)
\ No newline at end of file
+except URLError as e:
+    print("The server could not be found!")
+else:
+    # プログラムは継続．
+    # ※例外の捕捉でreturnかbreakしたらelse文は実行されないため，いらない．
+    print("It worked!")
+#bs = BeautifulSoup(html.read(), 'html.parser')
+#print(bs.h1) # 上から最初のh1タグを取ってくる．複数h1タグがある場合は，最初のものしか取られないことに注意．
\ No newline at end of file

From b9ae315a2bebf4648ac3ae9271ea20bc6e5a7acd Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Mon, 21 Aug 2023 23:17:20 +0900
Subject: [PATCH 05/17] scrapetest2.py, which includes try-excepetion
 procedure, was created

---
 scrapetest2.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 scrapetest2.py

diff --git a/scrapetest2.py b/scrapetest2.py
new file mode 100644
index 0000000..5f6baee
--- /dev/null
+++ b/scrapetest2.py
@@ -0,0 +1,21 @@
+from urllib.request import urlopen
+from urllib.error import HTTPError, URLError
+from bs4 import BeautifulSoup
+
+def getTitle(url):
+    try:
+        html = urlopen(url)
+    except HTTPError as e:
+        return None
+    try:
+        bs = BeautifulSoup(html.read(), 'html.parser')
+        title = bs.body.h1
+    except AttributeError as e:
+        return None
+    return title
+
+title = getTitle('http://pythonscraping.com/pages/page1.html')
+if title == None:
+    print('Title could not be found')
+else:
+    print(title)

From d149571686f6b3d99c44b0063d9e81edbb05ed3e Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Tue, 22 Aug 2023 22:11:36 +0900
Subject: [PATCH 06/17] chap2-2 finished

---
 chap2_work.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 chap2_work.py

diff --git a/chap2_work.py b/chap2_work.py
new file mode 100644
index 0000000..fb5f504
--- /dev/null
+++ b/chap2_work.py
@@ -0,0 +1,27 @@
+from urllib.request import urlopen
+from bs4 import BeautifulSoup
+
+html = urlopen('http://www.pythonscraping.com/pages/warandpeace.html')
+bs = BeautifulSoup(html.read(), 'html.parser')
+
+nameList = bs.find_all('span', {'class':'green'})
+for name in nameList:
+    print(name.get_text())
+
+# the princeの出現回数をカウントする場合
+nameList = bs.find_all(string='the prince')
+print(len(nameList))
+
+## 2-2-3-1
+html = urlopen('http://www.pythonscraping.com/pages/page3.html')
+bs = BeautifulSoup(html.read(), 'html.parser')
+
+for child in bs.find('table', {'id': 'giftList'}).children:
+    print(child)
+
+## 2-2-3-2
+for sibling in bs.find('table', {'id':'giftList'}).tr.next_siblings:
+    print(sibling)
+
+## 2-2-3-3
+print(bs.find('img', {'src':'../img/gifts/img1.jpg'}).parent.previous_sibling.get_text())
\ No newline at end of file

From c4ec9a8b7b2dc08f95c8ceb23d2222897971e9c7 Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Tue, 22 Aug 2023 22:27:24 +0900
Subject: [PATCH 07/17] chap2-4 finished

---
 chap2_work.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/chap2_work.py b/chap2_work.py
index fb5f504..bba3996 100644
--- a/chap2_work.py
+++ b/chap2_work.py
@@ -1,5 +1,6 @@
 from urllib.request import urlopen
 from bs4 import BeautifulSoup
+import re
 
 html = urlopen('http://www.pythonscraping.com/pages/warandpeace.html')
 bs = BeautifulSoup(html.read(), 'html.parser')
@@ -24,4 +25,9 @@
     print(sibling)
 
 ## 2-2-3-3
-print(bs.find('img', {'src':'../img/gifts/img1.jpg'}).parent.previous_sibling.get_text())
\ No newline at end of file
+print(bs.find('img', {'src':'../img/gifts/img1.jpg'}).parent.previous_sibling.get_text())
+
+## chap2-4: regular expression
+images = bs.find_all('img', {'src':re.compile('..\/img\/gifts\/img.*.jpg')})
+for image in images:
+    print(image['src'])
\ No newline at end of file

From 90bfc7aa1d1110bab61f738132da153bada546e6 Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Wed, 23 Aug 2023 22:55:14 +0900
Subject: [PATCH 08/17] chap3 started

---
 chap2_work.py |  3 ++-
 chap3_work.py | 10 ++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 chap3_work.py

diff --git a/chap2_work.py b/chap2_work.py
index bba3996..c276dae 100644
--- a/chap2_work.py
+++ b/chap2_work.py
@@ -30,4 +30,5 @@
 ## chap2-4: regular expression
 images = bs.find_all('img', {'src':re.compile('..\/img\/gifts\/img.*.jpg')})
 for image in images:
-    print(image['src'])
\ No newline at end of file
+    print(image['src'])
+
diff --git a/chap3_work.py b/chap3_work.py
new file mode 100644
index 0000000..7e619bf
--- /dev/null
+++ b/chap3_work.py
@@ -0,0 +1,10 @@
+from urllib.request import urlopen
+from bs4 import BeautifulSoup
+import re
+
+html = urlopen('http://en.wikipedia.org/wiki/Kevin_Bacon')
+bs = BeautifulSoup(html, 'html.parser')
+for link in bs.find('div', {'id':'bodyContent'}).find_all('a', href=re.compile('^(/wiki/)((?!:).)*$')):
+    # ?!から始まる文字列を()で囲むことで，その文字列を含まないを表現できる．(?!:).で1つのコロンを含まない，((?!:).)*コロン以外の0文字以上の文字列を表している
+    if 'href' in link.attrs:
+        print(link.attrs['href'])
\ No newline at end of file

From d86e4b3caf9205dd62ed9733de1db1df24b4a7ce Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Wed, 23 Aug 2023 23:01:57 +0900
Subject: [PATCH 09/17] chap3 updated

---
 chap3_work.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/chap3_work.py b/chap3_work.py
index 7e619bf..6ec9d4d 100644
--- a/chap3_work.py
+++ b/chap3_work.py
@@ -7,4 +7,20 @@
 for link in bs.find('div', {'id':'bodyContent'}).find_all('a', href=re.compile('^(/wiki/)((?!:).)*$')):
     # ?!から始まる文字列を()で囲むことで，その文字列を含まないを表現できる．(?!:).で1つのコロンを含まない，((?!:).)*コロン以外の0文字以上の文字列を表している
     if 'href' in link.attrs:
-        print(link.attrs['href'])
\ No newline at end of file
+        print(link.attrs['href'])
+
+## p.35あたり
+import datetime
+import random
+
+random.seed(datetime.datetime.now())
+def getLinks(articleUrl):
+    html = urlopen('http://en.wikipedia.org{}'.format(articleUrl))
+    bs = BeautifulSoup(html, 'html.parser')
+    return bs.find('div', {'id':'bodyContent'}).find_all('a', href=re.compile('^(/wiki/)((?!:).)*$'))
+
+links = getLinks('/wiki/Kevin_Bacon')
+while len(links) > 0:
+    newArticle = links[random.randint(0, len(links)-1)].attrs['href']
+    print(newArticle)
+    links = getLinks(newArticle)
\ No newline at end of file

From 7c01c06a193583130bcd97ae5ac66575d7bc6663 Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Wed, 23 Aug 2023 23:14:03 +0900
Subject: [PATCH 10/17] chap3 updated

---
 chap3_work.py | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/chap3_work.py b/chap3_work.py
index 6ec9d4d..5a6cab8 100644
--- a/chap3_work.py
+++ b/chap3_work.py
@@ -10,6 +10,8 @@
         print(link.attrs['href'])
 
 ## p.35あたり
+"""
+# 停止条件が満たされにくいコードのため，どっかでctrl+Cで止める必要あり
 import datetime
 import random
 
@@ -23,4 +25,23 @@ def getLinks(articleUrl):
 while len(links) > 0:
     newArticle = links[random.randint(0, len(links)-1)].attrs['href']
     print(newArticle)
-    links = getLinks(newArticle)
\ No newline at end of file
+    links = getLinks(newArticle)
+"""
+
+## 3-2
+# 停止条件が満たされにくいコードのため，どっかでctrl+Cで止める必要あり
+pages = set()
+def getLinks(pageUrl):
+    global pages
+    html = urlopen('http://en.wikipedia.org{}'.format(pageUrl))
+    bs = BeautifulSoup(html, 'html.parser')
+    for link in bs.find_all('a', href=re.compile('^(/wiki/)')):
+        if 'href' in link.attrs:
+            if link.attrs['href'] not in pages:
+                # 新しいページに出会った
+                newPage = link.attrs['href']
+                print(newPage)
+                pages.add(newPage)
+                getLinks(newPage)
+
+getLinks('')

From 7cbed41405a278b03266488a04dbb1c94eb76a9a Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Wed, 23 Aug 2023 23:16:46 +0900
Subject: [PATCH 11/17] prepare for 3-2-1

---
 chap3_work.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/chap3_work.py b/chap3_work.py
index 5a6cab8..340e766 100644
--- a/chap3_work.py
+++ b/chap3_work.py
@@ -29,6 +29,7 @@ def getLinks(articleUrl):
 """
 
 ## 3-2
+"""
 # 停止条件が満たされにくいコードのため，どっかでctrl+Cで止める必要あり
 pages = set()
 def getLinks(pageUrl):
@@ -45,3 +46,6 @@ def getLinks(pageUrl):
                 getLinks(newPage)
 
 getLinks('')
+"""
+
+## 3-2-1
\ No newline at end of file

From 9a835fde92507e0fde2968a07f0f5701840358b8 Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Thu, 5 Oct 2023 23:59:25 +0900
Subject: [PATCH 12/17] finished 3-2-1

---
 chap3_work.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/chap3_work.py b/chap3_work.py
index 340e766..e393566 100644
--- a/chap3_work.py
+++ b/chap3_work.py
@@ -48,4 +48,26 @@ def getLinks(pageUrl):
 getLinks('')
 """
 
-## 3-2-1
\ No newline at end of file
+## 3-2-1
+pages = set()
+def getLinks(pageUrl):
+    global pages
+    html = urlopen('http://en.wikipedia.org{}'.format(pageUrl))
+    bs = BeautifulSoup(html, 'html.parser')
+    try:
+        print(bs.h1.get_text())
+        print(bs.find(id='mw-content-text').find_all('p')[0])
+        print(bs.find(id='ca-edit').find('span').find('a').attrs['href'])
+    except AttributeError:
+        print('This page is missing something! Continuing.')
+
+    for link in bs.find_all('a', href=re.compile('^(/wiki/)')):
+        if 'href' in link.attrs:
+            if link.attrs['href'] not in pages:
+                # 新しいページに出会った
+                newPage = link.attrs['href']
+                print('-'*20)
+                print(newPage)
+                pages.add(newPage)
+                getLinks(newPage)
+getLinks('')
\ No newline at end of file

From a45bd5ca702d321727d5adc294c00c0a6ea86ea2 Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Fri, 6 Oct 2023 00:36:21 +0900
Subject: [PATCH 13/17] working chap 3-3

---
 chap3_work.py | 61 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 59 insertions(+), 2 deletions(-)

diff --git a/chap3_work.py b/chap3_work.py
index e393566..5353dc7 100644
--- a/chap3_work.py
+++ b/chap3_work.py
@@ -2,13 +2,15 @@
 from bs4 import BeautifulSoup
 import re
 
+"""
 html = urlopen('http://en.wikipedia.org/wiki/Kevin_Bacon')
 bs = BeautifulSoup(html, 'html.parser')
 for link in bs.find('div', {'id':'bodyContent'}).find_all('a', href=re.compile('^(/wiki/)((?!:).)*$')):
     # ?!から始まる文字列を()で囲むことで，その文字列を含まないを表現できる．(?!:).で1つのコロンを含まない，((?!:).)*コロン以外の0文字以上の文字列を表している
     if 'href' in link.attrs:
         print(link.attrs['href'])
-
+"""
+        
 ## p.35あたり
 """
 # 停止条件が満たされにくいコードのため，どっかでctrl+Cで止める必要あり
@@ -48,6 +50,7 @@ def getLinks(pageUrl):
 getLinks('')
 """
 
+"""
 ## 3-2-1
 pages = set()
 def getLinks(pageUrl):
@@ -70,4 +73,58 @@ def getLinks(pageUrl):
                 print(newPage)
                 pages.add(newPage)
                 getLinks(newPage)
-getLinks('')
\ No newline at end of file
+getLinks('')
+
+"""
+
+# 3-3
+from urllib.parse import urlparse
+import datetime
+import random
+
+pages = set()
+random.seed(datetime.datetime.now())
+
+# ページ内のすべての内部リンクのリストを取り出す
+def getInternalLinks(bs, includeUrl):
+    includeUrl = '{}://{}'.format(urlparse(includeUrl).scheme, urlparse(includeUrl).netloc)
+    internalLinks = []
+    # "/"から始まるすべてのリンクを見つける
+    for link in bs.find_all('a', href=re.compile('^(/|.*'+includeUrl+')')):
+        if link.attrs['href'] is not None:
+            if link.attrs['href'] not in internalLinks:
+                if (link.attrs['href'].startswith('/')):
+                    internalLinks.append(includeUrl+link.attrs['href'])
+                else:
+                    internalLinks.append(link.attrs['href'])
+    return internalLinks
+
+# ページ内のすべての外部リンクのリストを取り出す
+def getExternalLinks(bs, excludeUrl):
+    externalLinks = []
+    # 現在のURLを含まない'https'か'www'から始まるすべてのリンクを見つける
+    for link in bs.find_all('a', href=re.compile('^(http|www)((?!'+excludeUrl+').)*$')):
+        if link.attrs['href'] is not None:
+            if link.attrs['href'] not in externalLinks:
+                externalLinks.append(link.attrs['href'])
+    return externalLinks
+
+def getRandomExternalLink(startingPage):
+    html = urlopen(startingPage)
+    bs = BeautifulSoup(html, 'html.parser')
+    externalLinks = getExternalLinks(bs, urlparse(startingPage).netloc)
+    if len(externalLinks) == 0:
+        print('No external links, looking around the site for one')
+        domain = '{}://{}'.format(urlparse(startingPage).scheme, urlparse(startingPage).netloc)
+        internalLinks = getInternalLinks(bs, domain)
+        print("internal links: \n", internalLinks, "\n ==========")
+        return getRandomExternalLink(internalLinks[random.randint(0, len(internalLinks)-1)])
+    else:
+        return externalLinks[random.randint(0, len(externalLinks)-1)]
+    
+def followExternalOnly(startingSite):
+    externalLink = getRandomExternalLink(startingSite)
+    print('Random external link is: {}'.format(externalLink))
+    followExternalOnly(externalLink)
+
+followExternalOnly('http://oreilly.com')
\ No newline at end of file

From 72b35c9b0dbdf47508c2185d1ea9e7c5282bf903 Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Fri, 6 Oct 2023 00:42:31 +0900
Subject: [PATCH 14/17] finished chap 3-3

---
 chap3_work.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/chap3_work.py b/chap3_work.py
index 5353dc7..c24e245 100644
--- a/chap3_work.py
+++ b/chap3_work.py
@@ -127,4 +127,27 @@ def followExternalOnly(startingSite):
     print('Random external link is: {}'.format(externalLink))
     followExternalOnly(externalLink)
 
-followExternalOnly('http://oreilly.com')
\ No newline at end of file
+#followExternalOnly('http://oreilly.com')
+## このサイトで見つかったすべての外部URLのリストを集める
+allExtLinks = set()
+allIntLinks = set()
+
+def getAllExternalLinks(siteUrl):
+    html = urlopen(siteUrl)
+    domain = '{}://{}'.format(urlparse(siteUrl).scheme, urlparse(siteUrl).netloc)
+    bs = BeautifulSoup(html, 'html.parser')
+    internalLinks = getInternalLinks(bs, domain)
+    externalLinks = getExternalLinks(bs, domain)
+
+    for link in externalLinks:
+        if link not in allExtLinks:
+            allExtLinks.add(link)
+            print(link)
+
+    for link in internalLinks:
+        if link not in allIntLinks:
+            allIntLinks.add(link)
+            getAllExternalLinks(link)
+
+allIntLinks.add('http://oreilly.com')
+getAllExternalLinks('http://oreilly.com')
\ No newline at end of file

From a10028d90aeb739eccf2d28d81e07d75c4ab47a6 Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Sat, 7 Oct 2023 13:40:55 +0900
Subject: [PATCH 15/17] finished chap 3-3

---
 chap3_work.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chap3_work.py b/chap3_work.py
index c24e245..5591cc2 100644
--- a/chap3_work.py
+++ b/chap3_work.py
@@ -150,4 +150,4 @@ def getAllExternalLinks(siteUrl):
             getAllExternalLinks(link)
 
 allIntLinks.add('http://oreilly.com')
-getAllExternalLinks('http://oreilly.com')
\ No newline at end of file
+getAllExternalLinks('http://oreilly.com')

From 963bdd0dceb9044fa0ada001da83f006bb20d789 Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Sat, 7 Oct 2023 13:43:15 +0900
Subject: [PATCH 16/17] chap4_work.py created

---
 chap          | 0
 chap4_work.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 chap
 create mode 100644 chap4_work.py

diff --git a/chap b/chap
new file mode 100644
index 0000000..e69de29
diff --git a/chap4_work.py b/chap4_work.py
new file mode 100644
index 0000000..e69de29

From d35df292a111360c125b8b08d0ceb4981c9fa647 Mon Sep 17 00:00:00 2001
From: masamichiIto <masamichi.ito04@gmail.com>
Date: Sun, 8 Oct 2023 23:24:42 +0900
Subject: [PATCH 17/17] chap4_work.py was modified for 4.2 excersise.

---
 chap4_work.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/chap4_work.py b/chap4_work.py
index e69de29..d8fbb40 100644
--- a/chap4_work.py
+++ b/chap4_work.py
@@ -0,0 +1,32 @@
+import requests
+from bs4 import BeautifulSoup
+class Content:
+    def __init__(self, url, title, body) -> None:
+        self.url = url
+        self.title = title
+        self.body = body
+
+def getPage(url):
+    req = requests.get(url)
+    return BeautifulSoup(req.text, 'html.parser')
+
+def scrapeNYTimes(url):
+    bs = getPage(url)
+    title = bs.find('h1').text
+    lines = bs.select('div.StoryBodyCompanionColumn div p')
+    body = '\n'.join([line.text for line in lines])
+    return Content(url, title, body)
+
+def scrapeBrookings(url):
+    bs = getPage(url)
+    title = bs.find('h1').text
+    body = bs.find('div', {'class','post-body'}).text
+    return Content(url, title, body)
+
+url = ('https://www.brookings.edu/blog/future-development/2018/01/26/delivering-inclusive-urban-access-3-uncomfortable-truths/')
+
+content = scrapeBrookings(url)
+print('title: {}'.format(content.title))
+print('URL: {}'.format(content.url))
+print(content.body)
+