Skip to content

Commit 44c964f

Browse files
committed
lesson 8 commit
1 parent 4c46de9 commit 44c964f

File tree

4 files changed

+259203
-0
lines changed

4 files changed

+259203
-0
lines changed
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 4,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import requests\n",
10+
"import csv"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": null,
16+
"metadata": {},
17+
"outputs": [],
18+
"source": []
19+
},
20+
{
21+
"cell_type": "code",
22+
"execution_count": null,
23+
"metadata": {},
24+
"outputs": [],
25+
"source": []
26+
},
27+
{
28+
"cell_type": "code",
29+
"execution_count": 92,
30+
"metadata": {},
31+
"outputs": [],
32+
"source": [
33+
"def get_html(url):\n",
34+
" r = requests.get(url)\n",
35+
" return r.text"
36+
]
37+
},
38+
{
39+
"cell_type": "code",
40+
"execution_count": 99,
41+
"metadata": {},
42+
"outputs": [],
43+
"source": [
44+
"def write_csv(data):\n",
45+
" with open('websites.csv', 'a', newline = '\\n', encoding = 'utf-8') as f:\n",
46+
" order = ['name', 'url', 'description', 'traffic', 'percent']\n",
47+
" writer = csv.DictWriter(f, fieldnames = order)\n",
48+
" writer.writerow(data)"
49+
]
50+
},
51+
{
52+
"cell_type": "code",
53+
"execution_count": null,
54+
"metadata": {},
55+
"outputs": [],
56+
"source": []
57+
},
58+
{
59+
"cell_type": "code",
60+
"execution_count": null,
61+
"metadata": {},
62+
"outputs": [],
63+
"source": []
64+
},
65+
{
66+
"cell_type": "code",
67+
"execution_count": null,
68+
"metadata": {},
69+
"outputs": [],
70+
"source": []
71+
},
72+
{
73+
"cell_type": "code",
74+
"execution_count": 93,
75+
"metadata": {},
76+
"outputs": [],
77+
"source": [
78+
"# to test error with chars encoding (write_csv ~ utf-8 needed)\n",
79+
"\n",
80+
"# def main():\n",
81+
" \n",
82+
"# # &quote;\n",
83+
"# url = 'https://www.liveinternet.ru/rating/ru//today.tsv?page={}'.format(str(4))\n",
84+
"# response = get_html(url)\n",
85+
"# data = response.strip().split('\\n')[1:]\n",
86+
"# for row in data:\n",
87+
"# columns = row.strip().split('\\t')\n",
88+
"# name = columns[0]\n",
89+
"# url = columns[1]\n",
90+
"# description = columns[2]\n",
91+
"# traffic = columns[3]\n",
92+
"# percent = columns[4]\n",
93+
"\n",
94+
"# data = {'name':name,\n",
95+
"# 'url': url,\n",
96+
"# 'description': description,\n",
97+
"# 'traffic': traffic,\n",
98+
"# 'percent': percent}\n",
99+
"# write_csv(data)"
100+
]
101+
},
102+
{
103+
"cell_type": "code",
104+
"execution_count": 100,
105+
"metadata": {},
106+
"outputs": [],
107+
"source": [
108+
"def main():\n",
109+
" \n",
110+
" # &quote;\n",
111+
" for i in range(1, 8624):\n",
112+
" url = 'https://www.liveinternet.ru/rating/ru//today.tsv?page={}'.format(str(i))\n",
113+
" response = get_html(url)\n",
114+
" data = response.strip().split('\\n')[1:]\n",
115+
" for row in data:\n",
116+
" columns = row.strip().split('\\t')\n",
117+
" name = columns[0]\n",
118+
" url = columns[1]\n",
119+
" description = columns[2]\n",
120+
" traffic = columns[3]\n",
121+
" percent = columns[4]\n",
122+
"\n",
123+
" data = {'name':name,\n",
124+
" 'url': url,\n",
125+
" 'description': description,\n",
126+
" 'traffic': traffic,\n",
127+
" 'percent': percent}\n",
128+
" write_csv(data)"
129+
]
130+
},
131+
{
132+
"cell_type": "code",
133+
"execution_count": null,
134+
"metadata": {},
135+
"outputs": [],
136+
"source": []
137+
},
138+
{
139+
"cell_type": "code",
140+
"execution_count": null,
141+
"metadata": {},
142+
"outputs": [],
143+
"source": []
144+
},
145+
{
146+
"cell_type": "code",
147+
"execution_count": 101,
148+
"metadata": {},
149+
"outputs": [],
150+
"source": [
151+
"# works over 20 minutes\n",
152+
"if __name__ == '__main__':\n",
153+
" main()"
154+
]
155+
},
156+
{
157+
"cell_type": "code",
158+
"execution_count": null,
159+
"metadata": {},
160+
"outputs": [],
161+
"source": []
162+
},
163+
{
164+
"cell_type": "code",
165+
"execution_count": null,
166+
"metadata": {},
167+
"outputs": [],
168+
"source": []
169+
},
170+
{
171+
"cell_type": "code",
172+
"execution_count": null,
173+
"metadata": {},
174+
"outputs": [],
175+
"source": []
176+
},
177+
{
178+
"cell_type": "code",
179+
"execution_count": null,
180+
"metadata": {},
181+
"outputs": [],
182+
"source": []
183+
},
184+
{
185+
"cell_type": "code",
186+
"execution_count": null,
187+
"metadata": {},
188+
"outputs": [],
189+
"source": []
190+
},
191+
{
192+
"cell_type": "code",
193+
"execution_count": null,
194+
"metadata": {},
195+
"outputs": [],
196+
"source": []
197+
},
198+
{
199+
"cell_type": "code",
200+
"execution_count": null,
201+
"metadata": {},
202+
"outputs": [],
203+
"source": []
204+
},
205+
{
206+
"cell_type": "code",
207+
"execution_count": null,
208+
"metadata": {},
209+
"outputs": [],
210+
"source": []
211+
}
212+
],
213+
"metadata": {
214+
"kernelspec": {
215+
"display_name": "Python 3",
216+
"language": "python",
217+
"name": "python3"
218+
},
219+
"language_info": {
220+
"codemirror_mode": {
221+
"name": "ipython",
222+
"version": 3
223+
},
224+
"file_extension": ".py",
225+
"mimetype": "text/x-python",
226+
"name": "python",
227+
"nbconvert_exporter": "python",
228+
"pygments_lexer": "ipython3",
229+
"version": "3.6.5"
230+
}
231+
},
232+
"nbformat": 4,
233+
"nbformat_minor": 2
234+
}

0 commit comments

Comments
 (0)