Skip to content
Closed
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
cc1eb44
Move notebook to chapter1 folder
will-i-amv Jun 30, 2021
ba2dd87
Add 1st example to chapter1
will-i-amv Jun 30, 2021
874d3bb
Add 2nd example to chapter1
will-i-amv Jun 30, 2021
4121156
Add 3rd example to chapter1
will-i-amv Jun 30, 2021
09a9365
Add 4th example to chapter1
will-i-amv Jun 30, 2021
71ced46
Move notebook to chapter2 folder
will-i-amv Jun 30, 2021
47fbd43
Add 1st example to chapter2
will-i-amv Jun 30, 2021
0642548
Add 2nd example to chapter2
will-i-amv Jun 30, 2021
d1a874b
Add 3rd example to chapter2
will-i-amv Jun 30, 2021
5339a19
Add 4th example to chapter2
will-i-amv Jun 30, 2021
4d44423
Add 5th example to chapter2
will-i-amv Jun 30, 2021
e14730f
Add 6th example to chapter2
will-i-amv Jun 30, 2021
1e8db88
Add 7th example to chapter2
will-i-amv Jun 30, 2021
aec4ac7
Delete chapter2's notebook
will-i-amv Jun 30, 2021
5948047
Delete chapter1's notebook
will-i-amv Jun 30, 2021
54d62e6
Merge pull request #1 from will-i-amv-books/test
will-i-amv Jun 30, 2021
1360c6a
Move notebook to v2/chapter3
will-i-amv Jul 1, 2021
f683f24
Add 1st example to chapter3
will-i-amv Jul 1, 2021
1c133a6
Add 2nd example to chapter3
will-i-amv Jul 1, 2021
0261402
Add 3rd example to chapter3
will-i-amv Jul 1, 2021
108b604
Add 4th example to chapter3
will-i-amv Jul 1, 2021
9389220
Add 5th example to chapter3
will-i-amv Jul 1, 2021
c01d688
Add 6th example to chapter3
will-i-amv Jul 1, 2021
384171c
Move from v1 to v2 folder
will-i-amv Jul 1, 2021
2812eea
Add 7th example to chapter3
will-i-amv Jul 1, 2021
00715d8
Move notebook to v2/chapter4
will-i-amv Jul 2, 2021
6e7e293
Add back chapter1's notebook
will-i-amv Jul 2, 2021
3738c31
Add back chapter2's notebook
will-i-amv Jul 2, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add back chapter1's notebook
  • Loading branch information
will-i-amv authored Jul 2, 2021
commit 6e7e293d3702f58e97bcf140f1ebeede29f0f6fa
143 changes: 143 additions & 0 deletions v2/chapter1/Chapter01_BeginningToScrape.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"b'<html>\\n<head>\\n<title>A Useful Page</title>\\n</head>\\n<body>\\n<h1>An Interesting Title</h1>\\n<div>\\nLorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.\\n</div>\\n</body>\\n</html>\\n'\n"
]
}
],
"source": [
"from urllib.request import urlopen\n",
"\n",
"html = urlopen('http://pythonscraping.com/pages/page1.html')\n",
"print(html.read())"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<h1>An Interesting Title</h1>\n"
]
}
],
"source": [
"from urllib.request import urlopen\n",
"from bs4 import BeautifulSoup\n",
"\n",
"html = urlopen('http://www.pythonscraping.com/pages/page1.html')\n",
"bs = BeautifulSoup(html.read(), 'html.parser')\n",
"print(bs.h1)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The server could not be found!\n"
]
}
],
"source": [
"from urllib.request import urlopen\n",
"from urllib.error import HTTPError\n",
"from urllib.error import URLError\n",
"\n",
"try:\n",
" html = urlopen(\"https://pythonscrapingthisurldoesnotexist.com\")\n",
"except HTTPError as e:\n",
" print(\"The server returned an HTTP error\")\n",
"except URLError as e:\n",
" print(\"The server could not be found!\")\n",
"else:\n",
" print(html.read())"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<h1>An Interesting Title</h1>\n"
]
}
],
"source": [
"from urllib.request import urlopen\n",
"from urllib.error import HTTPError\n",
"from bs4 import BeautifulSoup\n",
"\n",
"\n",
"def getTitle(url):\n",
" try:\n",
" html = urlopen(url)\n",
" except HTTPError as e:\n",
" return None\n",
" try:\n",
" bsObj = BeautifulSoup(html.read(), \"lxml\")\n",
" title = bsObj.body.h1\n",
" except AttributeError as e:\n",
" return None\n",
" return title\n",
"\n",
"\n",
"title = getTitle(\"http://www.pythonscraping.com/pages/page1.html\")\n",
"if title == None:\n",
" print(\"Title could not be found\")\n",
"else:\n",
" print(title)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}