Merge pull request #598 from killerpanda101/master

mergify[bot] · web-flow · commit f989864845ca · 2020-10-27T04:06:09.000Z
adding micheline resturent scraper
diff --git a/Scripts/Web_Scrappers/Michelin_Restaurants_Scraper/README.md b/Scripts/Web_Scrappers/Michelin_Restaurants_Scraper/README.md
@@ -0,0 +1,35 @@
+## Micheline Resturent Data Scraper
+Python Web Scraper that Extracts Information Retaled to different michelin resturents.
+
+## Data scraped
+ - Resturent name
+ - Address
+ - Description
+ - Tags (ie:- the number of stars they have etc...)
+ - Facilities of each resturent is listed (ie:-lift, car-parking etc...)
+ - Google map link
+ - Price category of the resturent
+ - Type/Category the resturent falls in (not all resturents have this feild)
+
+## Pre-Requisites
+ - Unidecode
+ - beautifulsoup4
+ - requests
+
+## Instructions To Run
+Run The Command
+`pip install -r requirements.txt`
+`python micheline_scraper.py "{resturent name}"`
+
+Example
+`python micheline_scraper.py "Oxygen Dining Room"`
+
+Image 
+![Screenshot](./screenshot.png?raw=true "Screenshot")
+
+
+
+## *Author Name*
+@killerpanda101
+https://github.com/killerpanda101
+
diff --git a/Scripts/Web_Scrappers/Michelin_Restaurants_Scraper/micheline_scraper.py b/Scripts/Web_Scrappers/Michelin_Restaurants_Scraper/micheline_scraper.py
@@ -0,0 +1,54 @@
+from bs4 import BeautifulSoup
+import requests, string, re, sys
+from unidecode import unidecode
+
+base_url='https://guide.michelin.com/en/restaurant/'
+
+def normalize_input(resturant_name):
+    # converting to lower case and replacing white spaces
+    resturant_name = resturant_name.lower().strip()
+    # removing punctuations
+    resturant_name = resturant_name.translate(str.maketrans("", "", string.punctuation))
+    # converting all charecters to unicode (ie:- é->e) and replacing spaces with -
+    return unidecode(resturant_name.replace(" ", "-"))
+
+def get_resturent_details(resturant_name):
+    url = base_url+resturant_name
+
+    # making the request to the url
+    req = requests.get(url)
+    soup = BeautifulSoup(req.content, 'html.parser')
+
+    data = {}
+    
+    # getting the name, address and description
+    data['name'] = soup.h2.text
+
+    data['address'] = soup.find(class_='restaurant-details__heading--list').li.text
+
+    data['description'] = soup.find('p').text
+
+    # each resturent has tags (ie:- the number of stars they have etc...)
+    data['tags'] = [re.sub(r'[^a-zA-Z0-9]','',tag.text) for tag in soup.select('.restaurant-details__classification--list li')]
+
+    # facilities of each resturent is listed (ie:-lift, car-parking etc...)
+    data['facilities'] = [re.sub(r'[^a-zA-Z0-9]', '', facility.text) for facility in soup.select('.restaurant-details__services--list li')]
+
+    data['gmaps_link'] = soup.select('.google-map__static iframe')[0]['src']
+
+    price_and_type_string = soup.find(class_='restaurant-details__heading-price').text.split('•')
+
+    data['price'] = re.sub(r'[^a-zA-Z0-9-]', '', price_and_type_string[0])
+
+    #some resturents so not have the "type" listed
+    if len(price_and_type_string) == 2:
+        data['type'] = re.sub(r'[^a-zA-Z0-9-]', '', price_and_type_string[1])
+    
+    return data
+
+def main():
+    resturent = normalize_input(str(sys.argv[1]))
+    print(get_resturent_details(resturent))
+
+if __name__ == "__main__":
+    main()
diff --git a/Scripts/Web_Scrappers/Michelin_Restaurants_Scraper/requirements.txt b/Scripts/Web_Scrappers/Michelin_Restaurants_Scraper/requirements.txt
@@ -0,0 +1,3 @@
+Unidecode == 1.1.1
+beautifulsoup4 == 4.8.2
+requests == 2.22.0
diff --git a/Scripts/Web_Scrappers/Michelin_Restaurants_Scraper/screenshot.png b/Scripts/Web_Scrappers/Michelin_Restaurants_Scraper/screenshot.png

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+Unidecode == 1.1.1`
	`2`	`+beautifulsoup4 == 4.8.2`
	`3`	`+requests == 2.22.0`