1
+ import csv
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
+ # Method for Scrapping the RottenTomatoes site and storing the list of top 100 movies of a particular genre in csv file
6
+
7
+ def search_with_genre (genre ):
8
+
9
+ # Creating the file name (for CSV file) same as input provided
10
+ filename = genre
11
+
12
+ # Formatting the genre variable for use
13
+ genre = genre .replace ("&" ,"" )
14
+ genre = (genre .lower ()).replace (" " ,"_" )
15
+
16
+ url = "https://www.rottentomatoes.com/top/bestofrt/top_100_" + genre + "_movies"
17
+
18
+ # Requesting and storing the contents of desired webpage in "soup" variable
19
+ result = requests .get (url )
20
+ soup = BeautifulSoup (result .text , "html.parser" )
21
+
22
+ # Finding all the contents stored in the table format
23
+ find_contents = soup .find (attrs = {"class" : "table" })
24
+
25
+ if find_contents is None :
26
+ print ("\n Error - Can't find the movies of genre you are looking for!" )
27
+
28
+ # If input genre is not found then providing the user with a list of valid genres from which user can choose
29
+
30
+ print ("Try using these GENRES:" )
31
+ genre_list = [ "Action & Adventure" , "Animation" , "Art House & International" , "Classics" ,
32
+ "Comedy" , "Documentary" , "Drama" , "Horror" , "Kids & Family" ,
33
+ "Musical & Performing Arts" , "Mystery & Suspense" , "Romance" , "Science Fiction & Fantasy" ,
34
+ "Special Interest" , "Sports & Fitness" , "Television" , "Western"
35
+ ]
36
+ for i in genre_list :
37
+ print (i )
38
+
39
+ else :
40
+ rows = find_contents .find_all ("tr" )
41
+
42
+ # Creating a csv file and storing the movie information in it
43
+ # If a file already exists the program overwrites its contents
44
+
45
+ with open (filename + ".csv" , "w" ) as csv_file :
46
+ writer = csv .writer (csv_file )
47
+
48
+ # Initializing the first row with the column title
49
+
50
+ writer .writerow (["Rank" , "Rating" , "Title" , "No of Reviews" ])
51
+
52
+ # Iterating all the rows of the scrapped contents and storing them in desired csv file
53
+
54
+ for row in range (1 ,101 ):
55
+ dataset = rows [row ].find_all ("td" )
56
+ rank = dataset [0 ].text .replace ("." ,"" )
57
+ rating = dataset [1 ].find (attrs = {"class" : "tMeterScore" }).text [1 :]
58
+ title = dataset [2 ].find ("a" ,attrs = {"class" : "unstyled articleLink" }).text .replace (" " ,"" )
59
+ reviews = dataset [3 ].text
60
+ writer .writerow ([rank , rating , title , reviews ])
61
+
62
+
63
+
64
+ # Taking the input from the user and searching for movies of particular genre
65
+ print ("Enter the genre of the movies you are looking for: " )
66
+ genre = input ()
67
+ search_with_genre (genre )
0 commit comments