|
1 | 1 | # Web Scraping COVID-19 Data for Top 10 Countries Affected (Issue #31)
|
2 | 2 | # https://github.com/Python-World/Python_and_the_Web/issues/31
|
3 | 3 | # Contributed by @tauseefmohammed2 : https://github.com/tauseefmohammed2
|
| 4 | +# Updated by @officialpm : https://github.com/officialpm |
4 | 5 |
|
5 |
| -# Requirements : |
| 6 | +# Requirements : |
6 | 7 | # Selenium (Web Scrapping Python Library. Install : pip install selenium)
|
7 |
| -# ChromeDriver (Used for Automated Navigation to URLs, which are Provided by Selenium as Input. Download : https://chromedriver.chromium.org/downloads) |
8 | 8 | # Pandas (Data Manipulation Library. Install : pip install pandas)
|
9 | 9 |
|
10 |
| -from selenium import webdriver |
11 |
| -import pandas |
12 | 10 | import datetime
|
13 | 11 |
|
| 12 | +import pandas |
| 13 | +from selenium import webdriver |
| 14 | +from webdriver_manager.chrome import ChromeDriverManager |
| 15 | + |
14 | 16 | td = datetime.date.today()
|
15 | 17 | wait_imp = 10
|
16 | 18 | CO = webdriver.ChromeOptions()
|
17 |
| -CO.add_experimental_option('useAutomationExtension', False) |
18 |
| -CO.add_argument('--ignore-certificate-errors') |
19 |
| -CO.add_argument('--headless') |
| 19 | +CO.add_experimental_option("useAutomationExtension", False) |
| 20 | +CO.add_argument("--ignore-certificate-errors") |
| 21 | +CO.add_argument("--headless") |
20 | 22 |
|
21 | 23 | # Creating WebDriver Object
|
22 |
| -wd = webdriver.Chrome(r'C:\\Users\\TEMP\\Downloads\\chromedriver_win32 (1)\\chromedriver.exe',options=CO) |
23 |
| -# Replace the Above Location of chromedriver.exe with your Saved Location |
| 24 | +wd = webdriver.Chrome(ChromeDriverManager().install(), options=CO) |
| 25 | +# Replace the Above Location of chromedriver.exe with your Saved Location |
24 | 26 |
|
25 |
| -print ("Date:",td.strftime("%b-%d-%Y")) |
26 |
| -print ("--------------------------------------------------------------------------------------------") |
27 |
| -print (" COVID-19 Statistics From Around the World (Top 10 Countries) ") |
28 |
| -print ("--------------------------------------------------------------------------------------------") |
| 27 | +print("Date:", td.strftime("%b-%d-%Y")) |
| 28 | +print( |
| 29 | + "--------------------------------------------------------------------------------------------" |
| 30 | +) |
| 31 | +print( |
| 32 | + " COVID-19 Statistics From Around the World (Top 10 Countries) " |
| 33 | +) |
| 34 | +print( |
| 35 | + "--------------------------------------------------------------------------------------------" |
| 36 | +) |
29 | 37 |
|
30 | 38 | # Using get() method to Open a URL (WHO)
|
31 | 39 | wd.get("https://www.who.int/emergencies/diseases/novel-coronavirus-2019")
|
|
39 | 47 | print("-------------------------------------------------------")
|
40 | 48 |
|
41 | 49 | # Using get() method to Open a URL (Worldometers)
|
42 |
| -wd.get("https://www.worldometers.info/coronavirus/countries-where-coronavirus-has-spread/") |
| 50 | +wd.get( |
| 51 | + "https://www.worldometers.info/coronavirus/countries-where-coronavirus-has-spread/" |
| 52 | +) |
43 | 53 |
|
44 | 54 | # Creating Empty Lists to Store Information which will be Retrieved
|
45 | 55 | country_list = []
|
46 | 56 | cases_list = []
|
47 | 57 | deaths_list = []
|
48 | 58 | continent_list = []
|
49 | 59 |
|
50 |
| -table = wd.find_element_by_id("table3") |
| 60 | +table = wd.find_element_by_id("table3") |
51 | 61 | count = 0
|
52 | 62 | for row in table.find_elements_by_xpath(".//tr"):
|
53 | 63 | if count == 0:
|
|
58 | 68 | cases_list.append(lst[1])
|
59 | 69 | deaths_list.append(lst[2])
|
60 | 70 | continent_list.append(lst[3])
|
61 |
| - if count < 11 : |
| 71 | + if count < 11: |
62 | 72 | print("Country : ", lst[0])
|
63 | 73 | print("Total Cases : ", lst[1])
|
64 | 74 | print("Total Deaths : ", lst[2])
|
|
69 | 79 | wd.quit()
|
70 | 80 |
|
71 | 81 | # Creating a DataFrame (2D-Tabular Data Structure) using the Information Collected
|
72 |
| -df = pandas.DataFrame(data={"Country": country_list, "Total Cases": cases_list, "Total Deaths": deaths_list, "Continent": continent_list}) |
| 82 | +df = pandas.DataFrame( |
| 83 | + data={ |
| 84 | + "Country": country_list, |
| 85 | + "Total Cases": cases_list, |
| 86 | + "Total Deaths": deaths_list, |
| 87 | + "Continent": continent_list, |
| 88 | + } |
| 89 | +) |
73 | 90 | # Using to_csv() Function which Dumps the Data from the DataFrame to a CSV File
|
74 |
| -df.to_csv("./data.csv", sep=',',index=False) |
| 91 | +df.to_csv("./data.csv", sep=",", index=False) |
0 commit comments