Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
deneme
  • Loading branch information
Kutay4 committed Feb 2, 2025
commit bfcd197e851c2ddd7c39041dba8f3d596f345886
11 changes: 11 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"sqltools.useNodeRuntime": true,
"sqltools.connections": [
{
"previewLimit": 50,
"driver": "SQLite",
"database": "${workspaceFolder:tt-bootcamp}/you_do_1/data/new_db.db",
"name": "connection 1"
}
]
}
2 changes: 1 addition & 1 deletion week2/sunday/create_netflix.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
create table rating as select * from read_csv('../../you_do_1/data/rating_*.txt', columns = {'MovieId': 'int', 'UserId': 'int', 'Date': 'date', 'Rate':'int'} ) ;

-- This causes error because of fields containing ,
-- create table movie_title as select * from read_csv('../../you_do_1/data/movie_titles.csv', columns = {'MovieId':'int','PublishedYear':'int', 'Title':'varchar'}, header=false, delim=',', auto_detect=false);
-- create table movie_title as select * from read_csv('../../you_do_1/data/movie_titles.csv', columns = {'MovieId':'int','PublishedYear':'int', 'Title':'varchar'}, header=false, delim=',', auto_detect=false)
5 changes: 5 additions & 0 deletions week2/sunday/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import duckdb

conn = duckdb.connect(database="you_do_1/data/my_database.db")


56 changes: 56 additions & 0 deletions you_do_1/SQL_Queries.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
create table if not exists rating as
select *
from read_csv(
'rating_*.txt',
columns = { 'MovieId': 'int',
'UserId': 'int',
'Date': 'date',
'Rate' :'int' }
);

CREATE TABLE IF NOT EXISTS titles AS
SELECT *
FROM read_csv(
'movie_titles_new.csv',
columns = {
"index": "int",
'MovieId': 'int',
'Year': 'varchar',
"Title" : 'text'
},
delim = ',',
quote = '"',
null_padding = TRUE,
header = TRUE
);


CREATE TABLE IF NOT EXISTS normalized_rates AS(
SELECT MovieId, UserId, Date, NormalizedRate
FROM (
WITH user_info AS (
SELECT UserId, AVG(Rate) as RateAVG, NULLIF(STDDEV(Rate),0) as RateSTD
FROM rating
GROUP BY UserId
)
SELECT a.MovieId, a.UserId, a.Date, a.Rate AS OldRate, (a.Rate - b.RateAVG)/b.RateSTD AS NormalizedRate
FROM rating a
LEFT JOIN user_info b
USING(UserId)
)
);

-- cold start recommendation:
-- izlenme sayısı düşük olan filmleri henüz eleyemedim ancak ilerlememi paylaşmak istedim.
-- Bu sorunu çözdükten sonra diğer 2 maddedeki talepler için geliştirmeye devam edeceğim.
SELECT a.Title, a.MovieId, MEDIAN(b.NormalizedRate) as NormalizedRateMEDIAN, COUNT(UserId) AS WatchCount
FROM titles a
LEFT JOIN normalized_rates b
USING(MovieId)
GROUP BY a.Title, a.MovieId
ORDER BY NormalizedRateMEDIAN DESC;





Loading