Skip to content

Commit c0757c8

Browse files
committed
lesson 9 commit muliprocessing
1 parent b607523 commit c0757c8

File tree

7 files changed

+13609
-0
lines changed

7 files changed

+13609
-0
lines changed
Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 8,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import requests\n",
10+
"import csv\n",
11+
"from multiprocessing import Pool\n",
12+
"from multiprocessing.pool import ThreadPool\n",
13+
"from multiprocessing.dummy import Pool as Th_Pool\n",
14+
"from time import sleep"
15+
]
16+
},
17+
{
18+
"cell_type": "code",
19+
"execution_count": null,
20+
"metadata": {},
21+
"outputs": [],
22+
"source": []
23+
},
24+
{
25+
"cell_type": "code",
26+
"execution_count": null,
27+
"metadata": {},
28+
"outputs": [],
29+
"source": []
30+
},
31+
{
32+
"cell_type": "code",
33+
"execution_count": 2,
34+
"metadata": {},
35+
"outputs": [],
36+
"source": [
37+
"def get_html(url):\n",
38+
"# sleep(1)\n",
39+
" r = requests.get(url)\n",
40+
" return r.text"
41+
]
42+
},
43+
{
44+
"cell_type": "code",
45+
"execution_count": 3,
46+
"metadata": {},
47+
"outputs": [],
48+
"source": [
49+
"def write_csv(data):\n",
50+
" with open('websites_test.csv', 'a', newline = '\\n', encoding = 'utf-8') as file:\n",
51+
" order = ['name', 'url', 'description', 'traffic', 'percent']\n",
52+
" writer = csv.DictWriter(file, fieldnames = order)\n",
53+
" writer.writerow(data)"
54+
]
55+
},
56+
{
57+
"cell_type": "code",
58+
"execution_count": 4,
59+
"metadata": {},
60+
"outputs": [],
61+
"source": [
62+
"def get_page_data(text):\n",
63+
"\n",
64+
" data = text.strip().split('\\n')[1:]\n",
65+
" \n",
66+
" for row in data:\n",
67+
" columns = row.strip().split('\\t')\n",
68+
" name = columns[0]\n",
69+
" url = columns[1]\n",
70+
" description = columns[2]\n",
71+
" traffic = columns[3]\n",
72+
" percent = columns[4]\n",
73+
"\n",
74+
" data = {'name':name,\n",
75+
" 'url': url,\n",
76+
" 'description': description,\n",
77+
" 'traffic': traffic,\n",
78+
" 'percent': percent}\n",
79+
" write_csv(data)\n"
80+
]
81+
},
82+
{
83+
"cell_type": "code",
84+
"execution_count": 5,
85+
"metadata": {},
86+
"outputs": [],
87+
"source": [
88+
"def make_all(url):\n",
89+
" text = get_html(url)\n",
90+
" get_page_data(text)"
91+
]
92+
},
93+
{
94+
"cell_type": "code",
95+
"execution_count": null,
96+
"metadata": {},
97+
"outputs": [],
98+
"source": []
99+
},
100+
{
101+
"cell_type": "code",
102+
"execution_count": null,
103+
"metadata": {},
104+
"outputs": [],
105+
"source": []
106+
},
107+
{
108+
"cell_type": "code",
109+
"execution_count": 26,
110+
"metadata": {},
111+
"outputs": [],
112+
"source": [
113+
"def main():\n",
114+
" #8500\n",
115+
"# # &quote;\n",
116+
" url = 'https://www.liveinternet.ru/rating/ru//today.tsv?page={}'\n",
117+
" urls = [url.format(str(i)) for i in range(1,101)]\n",
118+
" \n",
119+
"# multiprocessing with Pool doesn't work on windows 10 in jupyter\n",
120+
"# with Pool(2) as p:\n",
121+
"# p.map(make_all, urls)\n",
122+
"\n",
123+
"# doesn't return result\n",
124+
"# map(make_all, urls)\n",
125+
"\n",
126+
"# just loop - works\n",
127+
"# for url in urls:\n",
128+
"# make_all(url)\n",
129+
"\n",
130+
"# works - uses threads instead of processes to run the workers logic\n",
131+
"# wall time 5.74s for 100 pages\n",
132+
" with ThreadPool(5) as p:\n",
133+
" p.map(make_all, urls)\n",
134+
"\n",
135+
"# another ThreadPool (from multiprocessing.dummy import Pool)\n",
136+
"# wall time 5.94s for 100 pages\n",
137+
"# pool = Th_Pool(5)\n",
138+
"# pool.starmap(make_all, zip(urls))\n",
139+
"# pool.close()\n",
140+
" \n",
141+
" # Exit the completed process\n",
142+
"# pool.join()\n",
143+
"\n",
144+
"\n"
145+
]
146+
},
147+
{
148+
"cell_type": "code",
149+
"execution_count": 17,
150+
"metadata": {},
151+
"outputs": [],
152+
"source": [
153+
"# possible way to import function as module from another file\n",
154+
"# http://qaru.site/questions/6788872/python-multiprocessing-attributeerror-cant-get-attribute-abc"
155+
]
156+
},
157+
{
158+
"cell_type": "code",
159+
"execution_count": null,
160+
"metadata": {},
161+
"outputs": [],
162+
"source": []
163+
},
164+
{
165+
"cell_type": "code",
166+
"execution_count": 27,
167+
"metadata": {},
168+
"outputs": [
169+
{
170+
"name": "stdout",
171+
"output_type": "stream",
172+
"text": [
173+
"Wall time: 10.6 s\n"
174+
]
175+
}
176+
],
177+
"source": [
178+
"%%time\n",
179+
"# works over 20 minutes\n",
180+
"\n",
181+
"if __name__ == '__main__':\n",
182+
" main()"
183+
]
184+
},
185+
{
186+
"cell_type": "code",
187+
"execution_count": null,
188+
"metadata": {},
189+
"outputs": [],
190+
"source": []
191+
},
192+
{
193+
"cell_type": "code",
194+
"execution_count": null,
195+
"metadata": {},
196+
"outputs": [],
197+
"source": []
198+
},
199+
{
200+
"cell_type": "code",
201+
"execution_count": null,
202+
"metadata": {},
203+
"outputs": [],
204+
"source": []
205+
},
206+
{
207+
"cell_type": "code",
208+
"execution_count": null,
209+
"metadata": {},
210+
"outputs": [],
211+
"source": []
212+
},
213+
{
214+
"cell_type": "code",
215+
"execution_count": null,
216+
"metadata": {},
217+
"outputs": [],
218+
"source": []
219+
},
220+
{
221+
"cell_type": "code",
222+
"execution_count": null,
223+
"metadata": {},
224+
"outputs": [],
225+
"source": []
226+
},
227+
{
228+
"cell_type": "code",
229+
"execution_count": null,
230+
"metadata": {},
231+
"outputs": [],
232+
"source": []
233+
},
234+
{
235+
"cell_type": "code",
236+
"execution_count": null,
237+
"metadata": {},
238+
"outputs": [],
239+
"source": []
240+
}
241+
],
242+
"metadata": {
243+
"kernelspec": {
244+
"display_name": "Python 3",
245+
"language": "python",
246+
"name": "python3"
247+
},
248+
"language_info": {
249+
"codemirror_mode": {
250+
"name": "ipython",
251+
"version": 3
252+
},
253+
"file_extension": ".py",
254+
"mimetype": "text/x-python",
255+
"name": "python",
256+
"nbconvert_exporter": "python",
257+
"pygments_lexer": "ipython3",
258+
"version": "3.6.5"
259+
}
260+
},
261+
"nbformat": 4,
262+
"nbformat_minor": 2
263+
}

0 commit comments

Comments
 (0)