deneme

husnusensoy · Kutay4 · Feb 2, 2025 · Feb 3, 2025 · Feb 3, 2025 · Feb 5, 2025
commit bfcd197e851c2ddd7c39041dba8f3d596f345886
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,11 @@
+{
+    "sqltools.useNodeRuntime": true,
+    "sqltools.connections": [
+        {
+            "previewLimit": 50,
+            "driver": "SQLite",
+            "database": "${workspaceFolder:tt-bootcamp}/you_do_1/data/new_db.db",
+            "name": "connection 1"
+        }
+    ]
+}
diff --git a/week2/sunday/create_netflix.sql b/week2/sunday/create_netflix.sql
@@ -1,4 +1,4 @@
 create table rating as select * from read_csv('../../you_do_1/data/rating_*.txt', columns = {'MovieId': 'int', 'UserId': 'int', 'Date': 'date', 'Rate':'int'} ) ;
 
 -- This causes error because of fields containing ,
--- create table movie_title as select * from read_csv('../../you_do_1/data/movie_titles.csv', columns = {'MovieId':'int','PublishedYear':'int', 'Title':'varchar'}, header=false, delim=',', auto_detect=false);
+-- create table movie_title as select * from read_csv('../../you_do_1/data/movie_titles.csv', columns = {'MovieId':'int','PublishedYear':'int', 'Title':'varchar'}, header=false, delim=',', auto_detect=false)
diff --git a/week2/sunday/test.py b/week2/sunday/test.py
@@ -0,0 +1,5 @@
+import duckdb
+
+conn = duckdb.connect(database="you_do_1/data/my_database.db")
+
+
diff --git a/you_do_1/SQL_Queries.sql b/you_do_1/SQL_Queries.sql
@@ -0,0 +1,56 @@
+create table if not exists rating as
+select *
+from read_csv(
+        'rating_*.txt',
+        columns = { 'MovieId': 'int',
+        'UserId': 'int',
+        'Date': 'date',
+        'Rate' :'int' }
+    );
+
+CREATE TABLE IF NOT EXISTS titles AS
+SELECT * 
+FROM read_csv(
+        'movie_titles_new.csv',
+        columns = {
+            "index": "int",
+            'MovieId': 'int',
+            'Year': 'varchar',
+            "Title" : 'text'
+        },
+        delim = ',',
+        quote = '"',
+        null_padding = TRUE,
+        header = TRUE
+    );
+
+
+CREATE TABLE IF NOT EXISTS normalized_rates AS(
+    SELECT MovieId, UserId, Date, NormalizedRate
+    FROM (
+        WITH user_info AS (
+            SELECT UserId, AVG(Rate) as RateAVG, NULLIF(STDDEV(Rate),0) as RateSTD
+            FROM rating
+            GROUP BY UserId
+        )
+        SELECT  a.MovieId, a.UserId, a.Date, a.Rate AS OldRate, (a.Rate - b.RateAVG)/b.RateSTD AS NormalizedRate
+        FROM rating a
+        LEFT JOIN user_info b
+        USING(UserId)
+    )
+);
+
+-- cold start recommendation:
+-- izlenme sayısı düşük olan filmleri henüz eleyemedim ancak ilerlememi paylaşmak istedim.
+-- Bu sorunu çözdükten sonra diğer 2 maddedeki talepler için geliştirmeye devam edeceğim.
+SELECT a.Title, a.MovieId, MEDIAN(b.NormalizedRate) as NormalizedRateMEDIAN, COUNT(UserId) AS WatchCount
+FROM titles a 
+LEFT JOIN normalized_rates b
+USING(MovieId)
+GROUP BY a.Title, a.MovieId
+ORDER BY NormalizedRateMEDIAN DESC;
+
+
+
+
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,5 @@
		import duckdb

		conn = duckdb.connect(database="you_do_1/data/my_database.db")