|
| 1 | +from bs4 import BeautifulSoup |
| 2 | +import requests, string, re, sys |
| 3 | +from unidecode import unidecode |
| 4 | + |
| 5 | +base_url='https://guide.michelin.com/en/restaurant/' |
| 6 | + |
| 7 | +def normalize_input(resturant_name): |
| 8 | + # converting to lower case and replacing white spaces |
| 9 | + resturant_name = resturant_name.lower().strip() |
| 10 | + # removing punctuations |
| 11 | + resturant_name = resturant_name.translate(str.maketrans("", "", string.punctuation)) |
| 12 | + # converting all charecters to unicode (ie:- é->e) and replacing spaces with - |
| 13 | + return unidecode(resturant_name.replace(" ", "-")) |
| 14 | + |
| 15 | +def get_resturent_details(resturant_name): |
| 16 | + url = base_url+resturant_name |
| 17 | + |
| 18 | + # making the request to the url |
| 19 | + req = requests.get(url) |
| 20 | + soup = BeautifulSoup(req.content, 'html.parser') |
| 21 | + |
| 22 | + data = {} |
| 23 | + |
| 24 | + # getting the name, address and description |
| 25 | + data['name'] = soup.h2.text |
| 26 | + |
| 27 | + data['address'] = soup.find(class_='restaurant-details__heading--list').li.text |
| 28 | + |
| 29 | + data['description'] = soup.find('p').text |
| 30 | + |
| 31 | + # each resturent has tags (ie:- the number of stars they have etc...) |
| 32 | + data['tags'] = [re.sub(r'[^a-zA-Z0-9]','',tag.text) for tag in soup.select('.restaurant-details__classification--list li')] |
| 33 | + |
| 34 | + # facilities of each resturent is listed (ie:-lift, car-parking etc...) |
| 35 | + data['facilities'] = [re.sub(r'[^a-zA-Z0-9]', '', facility.text) for facility in soup.select('.restaurant-details__services--list li')] |
| 36 | + |
| 37 | + data['gmaps_link'] = soup.select('.google-map__static iframe')[0]['src'] |
| 38 | + |
| 39 | + price_and_type_string = soup.find(class_='restaurant-details__heading-price').text.split('•') |
| 40 | + |
| 41 | + data['price'] = re.sub(r'[^a-zA-Z0-9-]', '', price_and_type_string[0]) |
| 42 | + |
| 43 | + #some resturents so not have the "type" listed |
| 44 | + if len(price_and_type_string) == 2: |
| 45 | + data['type'] = re.sub(r'[^a-zA-Z0-9-]', '', price_and_type_string[1]) |
| 46 | + |
| 47 | + return data |
| 48 | + |
| 49 | +def main(): |
| 50 | + resturent = normalize_input(str(sys.argv[1])) |
| 51 | + print(get_resturent_details(resturent)) |
| 52 | + |
| 53 | +if __name__ == "__main__": |
| 54 | + main() |
0 commit comments