Skip to content
This repository was archived by the owner on Dec 22, 2023. It is now read-only.

Commit d7718b3

Browse files
authored
COVIDWebScraper - Added Functions
1 parent 8afe60c commit d7718b3

File tree

1 file changed

+83
-72
lines changed

1 file changed

+83
-72
lines changed
Lines changed: 83 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Web Scraping COVID-19 Data for Top 10 Countries Affected (Issue #31)
22
# https://github.com/Python-World/Python_and_the_Web/issues/31
33
# Contributed by @tauseefmohammed2 : https://github.com/tauseefmohammed2
4-
# Updated by @officialpm : https://github.com/officialpm
4+
# 🔥 Updated by @officialpm : https://github.com/officialpm
55

66
# Requirements :
77
# Selenium (Web Scrapping Python Library. Install : pip install selenium)
@@ -13,79 +13,90 @@
1313
from selenium import webdriver
1414
from webdriver_manager.chrome import ChromeDriverManager
1515

16-
td = datetime.date.today()
1716
wait_imp = 10
18-
CO = webdriver.ChromeOptions()
19-
CO.add_experimental_option("useAutomationExtension", False)
20-
CO.add_argument("--ignore-certificate-errors")
21-
CO.add_argument("--headless")
17+
options = webdriver.ChromeOptions()
18+
options.add_experimental_option("useAutomationExtension", False)
19+
options.add_argument("--ignore-certificate-errors")
20+
options.add_argument("--headless")
2221

2322
# Creating WebDriver Object
24-
wd = webdriver.Chrome(ChromeDriverManager().install(), options=CO)
25-
# Replace the Above Location of chromedriver.exe with your Saved Location
26-
27-
print("Date:", td.strftime("%b-%d-%Y"))
28-
print(
29-
"--------------------------------------------------------------------------------------------"
30-
)
31-
print(
32-
" COVID-19 Statistics From Around the World (Top 10 Countries) "
33-
)
34-
print(
35-
"--------------------------------------------------------------------------------------------"
36-
)
37-
38-
# Using get() method to Open a URL (WHO)
39-
wd.get("https://www.who.int/emergencies/diseases/novel-coronavirus-2019")
40-
wd.implicitly_wait(wait_imp)
41-
w_total = wd.find_element_by_id("confirmedCases")
42-
w_death = wd.find_element_by_id("confirmedDeaths")
43-
total_c = wd.find_element_by_id("involvedCountries")
44-
print("WorldWide")
45-
print("Total Cases : ", w_total.text)
46-
print("Total Deaths : ", w_death.text)
47-
print("-------------------------------------------------------")
48-
49-
# Using get() method to Open a URL (Worldometers)
50-
wd.get(
51-
"https://www.worldometers.info/coronavirus/countries-where-coronavirus-has-spread/"
52-
)
53-
54-
# Creating Empty Lists to Store Information which will be Retrieved
55-
country_list = []
56-
cases_list = []
57-
deaths_list = []
58-
continent_list = []
59-
60-
table = wd.find_element_by_id("table3")
61-
count = 0
62-
for row in table.find_elements_by_xpath(".//tr"):
63-
if count == 0:
23+
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
24+
25+
26+
def scrapeCovidStats():
27+
td = datetime.date.today()
28+
print("Date:", td.strftime("%b-%d-%Y"))
29+
print(
30+
"------------------------------------------------------------------------------"
31+
)
32+
print(
33+
" COVID-19 Statistics From Around the World (Top 10 Countries) "
34+
)
35+
print(
36+
"------------------------------------------------------------------------------"
37+
)
38+
39+
# Using get() method to Open a URL (WHO)
40+
driver.get("https://www.who.int/emergencies/diseases/novel-coronavirus-2019")
41+
driver.implicitly_wait(wait_imp)
42+
w_total = driver.find_element_by_id("confirmedCases")
43+
w_death = driver.find_element_by_id("confirmedDeaths")
44+
total_c = driver.find_element_by_id("involvedCountries")
45+
print("WorldWide")
46+
print("Total Cases : ", w_total.text)
47+
print("Total Deaths : ", w_death.text)
48+
print("-------------------------------------------------------")
49+
50+
# Using get() method to Open a URL (Worldometers)
51+
driver.get(
52+
"https://www.worldometers.info/coronavirus/countries-where-coronavirus-has-spread/"
53+
)
54+
55+
# Creating Empty Lists to Store Information which will be Retrieved
56+
country_list = []
57+
cases_list = []
58+
deaths_list = []
59+
continent_list = []
60+
61+
table = driver.find_element_by_id("table3")
62+
count = 0
63+
for row in table.find_elements_by_xpath(".//tr"):
64+
if count == 0:
65+
count += 1
66+
continue
67+
lst = [td.text for td in row.find_elements_by_xpath(".//td")]
68+
country_list.append(lst[0])
69+
cases_list.append(lst[1])
70+
deaths_list.append(lst[2])
71+
continent_list.append(lst[3])
72+
if count < 11:
73+
print("Country : ", lst[0])
74+
print("Total Cases : ", lst[1])
75+
print("Total Deaths : ", lst[2])
76+
print("-------------------------------------------------------")
6477
count += 1
65-
continue
66-
lst = [td.text for td in row.find_elements_by_xpath(".//td")]
67-
country_list.append(lst[0])
68-
cases_list.append(lst[1])
69-
deaths_list.append(lst[2])
70-
continent_list.append(lst[3])
71-
if count < 11:
72-
print("Country : ", lst[0])
73-
print("Total Cases : ", lst[1])
74-
print("Total Deaths : ", lst[2])
75-
print("-------------------------------------------------------")
76-
count += 1
77-
78-
# Closing Chrome After Extraction of Data
79-
wd.quit()
80-
81-
# Creating a DataFrame (2D-Tabular Data Structure) using the Information Collected
82-
df = pandas.DataFrame(
83-
data={
84-
"Country": country_list,
85-
"Total Cases": cases_list,
86-
"Total Deaths": deaths_list,
87-
"Continent": continent_list,
88-
}
89-
)
78+
79+
# Closing Chrome After Extraction of Data
80+
driver.quit()
81+
82+
# Creating a DataFrame (2D-Tabular Data Structure) using the Information Collected
83+
df = pandas.DataFrame(
84+
data={
85+
"Country": country_list,
86+
"Total Cases": cases_list,
87+
"Total Deaths": deaths_list,
88+
"Continent": continent_list,
89+
}
90+
)
91+
92+
return df
93+
94+
9095
# Using to_csv() Function which Dumps the Data from the DataFrame to a CSV File
91-
df.to_csv("./data.csv", sep=",", index=False)
96+
if __name__ == "__main__":
97+
try:
98+
scrapedData = scrapeCovidStats()
99+
scrapedData.to_csv("./data.csv", sep=",", index=False)
100+
except Exception:
101+
print("Please check your internet!")
102+
exit(0)

0 commit comments

Comments
 (0)