Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
added something
  • Loading branch information
zehraozturkk committed Feb 2, 2025
commit 35c5ea6d45484b1555af06221f5b960f0ee39119
107 changes: 107 additions & 0 deletions you_do_1/open_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from pathlib import Path
from dataclasses import dataclass
from typing import List, Dict, Tuple
from collections import defaultdict


@dataclass
class Movies:
published_year: int
movie_name: str

@dataclass
class Ratings:
user_id: int
movie_id: int
created_date: int
rate: int

def read_txt(file_name: List[str], delimiter: str = ","):
ratings_dict: Dict[int, ratings]



def read_file(file_name: str | Path,header:bool, delimiter: str = ","):
movie_dict: Dict[int, Movies] = {}


with Path(file_name).open() as rf:
if header:
for i,line in enumerate(rf):
print(line)
if i == 0:
headers = [h.lower() for h in line.strip().split(delimiter)]
else:
movie_id,published_year, *movie_name = line.strip().split(delimiter)

movie_name = ",".join(movie_name)

movie = Movies(
published_year=published_year,
movie_name=str(movie_name)
)

movie_dict[int(movie_id)] = movie
else:
for i, line in enumerate(rf):
movie_id, published_year, *movie_name = line.strip().split(delimiter)
movie_name = ",".join(movie_name) # *name de liste olarka geldi biz de bunun ,le birleştierek strgine çeviridk.

movie = Movies(
# movie_id = int(movie_id),
published_year=published_year,
movie_name=movie_name
)

movie_dict[int(movie_id)] = movie



return movie_dict
file_paths =["D:/indirilenler/binge/rating_1.txt","D:/indirilenler/binge/rating_2.txt","D:/indirilenler/binge/rating_3.txt","D:/indirilenler/binge/rating_4.txt"]

def read_txt():
film_ratings: Dict[int, List[Tuple[int, str, int]]] = defaultdict(list)
user_ids: Dict[int, List[Tuple[int,int]] ] = defaultdict(list)
for file_path in file_paths:
with open(file_path, "r") as file:

for i ,line in enumerate(file):

if i>20000:
break

movie_id, user_id, date, rating = line.strip().split(",")

movie_id =int(movie_id)
user_id= int(user_id)
rating= float(rating)

film_ratings[movie_id].append((user_id, date, rating))
user_ids[user_id].append((movie_id, rating))

return dict(film_ratings), dict(user_ids)

def calculate_avg_ratings(ratings):
movie_avg_Rate= {}
watcher_count = {}
for movie_id, movie_ratings in ratings.items():

ratings_list = [rating for (_,_,rating) in movie_ratings]
avg_rate = sum(ratings_list) / len(ratings_list)
movie_avg_Rate[movie_id] = avg_rate

watcher_count[movie_id] = {"izlenme": len(ratings_list), "rate": avg_rate}

return watcher_count

if __name__=="__main__":

movies = read_file("movie_titles.csv", header=False)
txts, a = read_txt()
a = calculate_avg_ratings(txts)
print(type(txts))
print(a)


# print(movies[17764].movie_name)
206 changes: 206 additions & 0 deletions you_do_1/read_file copy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
from typing import Dict, List, Tuple
from collections import defaultdict
from datetime import datetime, time
from dataclasses import dataclass
import csv
from pathlib import Path


@dataclass
class Movies:
movie_id: int
published_year: int
movie_name: str



class Recommendation_movie:
def __init__(self):
self.file_paths =["D:/indirilenler/binge/rating_1.txt","D:/indirilenler/binge/rating_2.txt","D:/indirilenler/binge/rating_3.txt","D:/indirilenler/binge/rating_4.txt"]
self.csv_path = "D:/indirilenler/binge/movie_titles.csv"

self.film_ratings_cache = None
self.user_ids_cache = None

self.film_ratings_cache, self.user_ids_cache = self.read_txt()
self.movie_dict = self.read_file("movie_titles.csv", header=False)
self.avg_ratings = self.calculate_avg_ratings(self.film_ratings_cache)
self.avg_rating_by_year = self.calculate_avg_ratings_by_year()
self.top_movies = self.sort_by_ratings(self.avg_ratings)
self.user_list = list(self.user_ids_cache.keys())

# start_time = datetime.now()
def read_txt(self):
film_ratings: Dict[int, List[Tuple[int, str, int]]] = defaultdict(list)
user_ids: Dict[int, List[Tuple[int,int]] ] = defaultdict(list)
for file_path in self.file_paths:
with open(file_path, "r") as file:

for i ,line in enumerate(file):

movie_id, user_id, date, rating = line.strip().split(",")

movie_id =int(movie_id)
user_id= int(user_id)
rating= float(rating)

film_ratings[movie_id].append((user_id, date, rating))
user_ids[user_id].append((movie_id, rating))

return dict(film_ratings), dict(user_ids)



def read_file(file_name: str | Path,header:bool, delimiter: str = ","):
movies: List[Movies] = []


with Path(file_name).open() as rf:
if header:
for i,line in enumerate(rf):
print(line)
if i == 0:
headers = [h.lower() for h in line.strip().split(delimiter)]
else:
movie_id,published_year, *movie_name = line.strip().split(delimiter)

movie_name = ",".join(movie_name)

movie = Movies(
movie_id = int(movie_id),
published_year=published_year,
movie_name=str(movie_name)
)

movies.append(movie)
else:
for i, line in enumerate(rf):
movie_id, published_year, *movie_name = line.strip().split(delimiter)
movie_name = ",".join(movie_name) # *name de liste olarka geldi biz de bunun ,le birleştierek strgine çeviridk.

movie = Movies(
movie_id = int(movie_id),
published_year=published_year,
movie_name=movie_name
)

movies.append(movie),

return movies

def calculate_avg_ratings(self, ratings: Dict[int, List[Tuple[int, str, float]]]) -> Dict[int,float]:
movie_avg_Rate= {}
watcher_count = 0
for movie_id, movie_ratings in self.film_ratings_cache.items():

ratings_list = [rating for (_,_,rating) in movie_ratings]
avg_rate = sum(ratings_list) / len(ratings_list)
watcher_count[movie_id] = {"izlenme": len(ratings_list), "rate": avg_rate}

movie_avg_Rate[movie_id] = avg_rate

print(watcher_count)

return movie_avg_Rate

def calculate_avg_ratings_by_year(self) -> Dict[str, List[Dict]]:
movie_rate_by_year = {}

for movie_info in self.movie_dict:
year = movie_info.published_year
movie_name = movie_info.movie_name
movie_id = movie_info.movie_id

# Check if the movie has ratings
if movie_id in self.film_ratings_cache:
movie_ratings = self.film_ratings_cache[movie_id]
ratings_list = [rating for (_, _, rating) in movie_ratings]
avg_rate = sum(ratings_list) / len(ratings_list)

# Group movies by year
if year not in movie_rate_by_year:
movie_rate_by_year[year] = []
movie_rate_by_year[year].append({
"movie_name": movie_name,
"rate": avg_rate,
"movie_id": movie_id
})

return movie_rate_by_year

def sort_by_ratings(self, ratings) -> List[Tuple[int,float]]:
return sorted(self.avg_ratings.items(), key=lambda item: item[1], reverse=True)

def cold_start(self, user_id):
if user_id not in self.user_list:
print(f"Welcome {user_id}! Here are the top 5 movies for your fresh start:")
for i,(movie_id, rating) in enumerate(self.top_movies[:5]):
movie_name = self.movie_dict[movie_id].movie_name
print(f"{i+1}-) {movie_name}, Rate: {rating:.2f}")
else:
print(f"User {user_id} is already registered. No cold start recommendations needed.")

# #count of review ekle her bir film için

def our_customer(self, user_id):
rated_movies = {}
published_years = []

if user_id in self.user_ids_cache:
print(f"hey you are.. it is good to see you again. what do you want to watch? these your movies that watched and rated. you can chose like them")
for movie_id, rating in self.user_ids_cache[user_id]:
movie_name = self.movie_dict[movie_id].movie_name
published_year = self.movie_dict[movie_id].published_year
movie_info = self.movie_dict[movie_id]

rated_movies[movie_id] = { "name": movie_name, "rating": rating, "published_year": published_year}
# rated_movies.append((movie_name, rating))
if str(published_year).isdigit():
published_years.append(int(published_year))

top_rated_movies = sorted(
rated_movies.items(),
key=lambda x: x[1]["rating"],
reverse=True
)[:5]

print("\nYour top rated movies:")
for movie_id, info in top_rated_movies:
print(f"- {info['name']} ({info['published_year']}) - Your rating: {info['rating']}")


recommendations = []
for year in published_years:
if str(year) in self.avg_rating_by_year: # Yıl string olarak tutuluyor
for movie in self.avg_rating_by_year[str(year)]:
recommendations.append(movie["movie_name"])

print(f"\nHere are your recommendations: {recommendations[:5]}")


def compare_movies(self,movie_id, movie_id2):
now = datetime.now().year

movie_1 = self.movie_dict[movie_id]
movie_2 = self.movie_dict[movie_id2]
print(f"Compare {movie_1} and {movie_2} ")

if(self.avg_ratings[movie_id] < self.avg_ratings[movie_id2]):
print(f"{movie_1["movie_name"]} rate is less than {movie_2["movie_name"]}")
else:
print(f"{self.movie_dict[movie_id2].movie_name} rate is less than {self.movie_dict[movie_id].movie_name}")

age1 = now - int(movie_1["published_year"])
age2 = now - int(movie_2["published_year"])

print(f"lets look the age of movies. \n{movie_1["movie_name"]}: {age1} \n{movie_2["movie_name"]} is published: {age2}")


recommender = Recommendation_movie()

recommendations = recommender.cold_start(2)

avggg = recommender.avg_ratings()
compare = recommender.compare_movies(1,2)

our_customer = recommender.our_customer(387418)