COVIDWebScraper - Added Functions

officialpm · web-flow · commit d7718b38e80f · 2020-10-05T01:46:18.000+05:30
diff --git a/Scripts/Web_Scrappers/COVID-19_Scraper/COVIDWebScraper.py b/Scripts/Web_Scrappers/COVID-19_Scraper/COVIDWebScraper.py
@@ -1,7 +1,7 @@
 # Web Scraping COVID-19 Data for Top 10 Countries Affected (Issue #31)
 # https://github.com/Python-World/Python_and_the_Web/issues/31
 # Contributed by @tauseefmohammed2 : https://github.com/tauseefmohammed2
-# Updated by @officialpm : https://github.com/officialpm
+# 🔥 Updated by @officialpm : https://github.com/officialpm
 
 # Requirements :
 # Selenium (Web Scrapping Python Library. Install : pip install selenium)
@@ -13,79 +13,90 @@
 from selenium import webdriver
 from webdriver_manager.chrome import ChromeDriverManager
 
-td = datetime.date.today()
 wait_imp = 10
-CO = webdriver.ChromeOptions()
-CO.add_experimental_option("useAutomationExtension", False)
-CO.add_argument("--ignore-certificate-errors")
-CO.add_argument("--headless")
+options = webdriver.ChromeOptions()
+options.add_experimental_option("useAutomationExtension", False)
+options.add_argument("--ignore-certificate-errors")
+options.add_argument("--headless")
 
 # Creating WebDriver Object
-wd = webdriver.Chrome(ChromeDriverManager().install(), options=CO)
-# Replace the Above Location of chromedriver.exe with your Saved Location
-
-print("Date:", td.strftime("%b-%d-%Y"))
-print(
-    "--------------------------------------------------------------------------------------------"
-)
-print(
-    "               COVID-19 Statistics From Around the World (Top 10 Countries)                 "
-)
-print(
-    "--------------------------------------------------------------------------------------------"
-)
-
-# Using get() method to Open a URL (WHO)
-wd.get("https://www.who.int/emergencies/diseases/novel-coronavirus-2019")
-wd.implicitly_wait(wait_imp)
-w_total = wd.find_element_by_id("confirmedCases")
-w_death = wd.find_element_by_id("confirmedDeaths")
-total_c = wd.find_element_by_id("involvedCountries")
-print("WorldWide")
-print("Total Cases : ", w_total.text)
-print("Total Deaths : ", w_death.text)
-print("-------------------------------------------------------")
-
-# Using get() method to Open a URL (Worldometers)
-wd.get(
-    "https://www.worldometers.info/coronavirus/countries-where-coronavirus-has-spread/"
-)
-
-# Creating Empty Lists to Store Information which will be Retrieved
-country_list = []
-cases_list = []
-deaths_list = []
-continent_list = []
-
-table = wd.find_element_by_id("table3")
-count = 0
-for row in table.find_elements_by_xpath(".//tr"):
-    if count == 0:
+driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
+
+
+def scrapeCovidStats():
+    td = datetime.date.today()
+    print("Date:", td.strftime("%b-%d-%Y"))
+    print(
+        "------------------------------------------------------------------------------"
+    )
+    print(
+        "         COVID-19 Statistics From Around the World (Top 10 Countries)         "
+    )
+    print(
+        "------------------------------------------------------------------------------"
+    )
+
+    # Using get() method to Open a URL (WHO)
+    driver.get("https://www.who.int/emergencies/diseases/novel-coronavirus-2019")
+    driver.implicitly_wait(wait_imp)
+    w_total = driver.find_element_by_id("confirmedCases")
+    w_death = driver.find_element_by_id("confirmedDeaths")
+    total_c = driver.find_element_by_id("involvedCountries")
+    print("WorldWide")
+    print("Total Cases : ", w_total.text)
+    print("Total Deaths : ", w_death.text)
+    print("-------------------------------------------------------")
+
+    # Using get() method to Open a URL (Worldometers)
+    driver.get(
+        "https://www.worldometers.info/coronavirus/countries-where-coronavirus-has-spread/"
+    )
+
+    # Creating Empty Lists to Store Information which will be Retrieved
+    country_list = []
+    cases_list = []
+    deaths_list = []
+    continent_list = []
+
+    table = driver.find_element_by_id("table3")
+    count = 0
+    for row in table.find_elements_by_xpath(".//tr"):
+        if count == 0:
+            count += 1
+            continue
+        lst = [td.text for td in row.find_elements_by_xpath(".//td")]
+        country_list.append(lst[0])
+        cases_list.append(lst[1])
+        deaths_list.append(lst[2])
+        continent_list.append(lst[3])
+        if count < 11:
+            print("Country : ", lst[0])
+            print("Total Cases : ", lst[1])
+            print("Total Deaths : ", lst[2])
+            print("-------------------------------------------------------")
         count += 1
-        continue
-    lst = [td.text for td in row.find_elements_by_xpath(".//td")]
-    country_list.append(lst[0])
-    cases_list.append(lst[1])
-    deaths_list.append(lst[2])
-    continent_list.append(lst[3])
-    if count < 11:
-        print("Country : ", lst[0])
-        print("Total Cases : ", lst[1])
-        print("Total Deaths : ", lst[2])
-        print("-------------------------------------------------------")
-    count += 1
-
-# Closing Chrome After Extraction of Data
-wd.quit()
-
-# Creating a DataFrame (2D-Tabular Data Structure) using the Information Collected
-df = pandas.DataFrame(
-    data={
-        "Country": country_list,
-        "Total Cases": cases_list,
-        "Total Deaths": deaths_list,
-        "Continent": continent_list,
-    }
-)
+
+    # Closing Chrome After Extraction of Data
+    driver.quit()
+
+    # Creating a DataFrame (2D-Tabular Data Structure) using the Information Collected
+    df = pandas.DataFrame(
+        data={
+            "Country": country_list,
+            "Total Cases": cases_list,
+            "Total Deaths": deaths_list,
+            "Continent": continent_list,
+        }
+    )
+
+    return df
+
+
 # Using to_csv() Function which Dumps the Data from the DataFrame to a CSV File
-df.to_csv("./data.csv", sep=",", index=False)
+if __name__ == "__main__":
+    try:
+        scrapedData = scrapeCovidStats()
+        scrapedData.to_csv("./data.csv", sep=",", index=False)
+    except Exception:
+        print("Please check your internet!")
+        exit(0)