refactored DAG

dbt-labs · dataders · Sep 16, 2022 · Sep 16, 2022 · Sep 16, 2022 · Sep 16, 2022
commit a88a980569a28317e85106169f4d38970fbbdea8
diff --git a/dbt_project.yml b/dbt_project.yml
@@ -34,13 +34,10 @@ clean-targets:         # directories to be removed by `dbt clean`
 models:
   python_wrench:
     # Config indicated by + and applies to all files under models/example/
-    example:
-      +materialized: view
-
-quoting:
-  database: false
-  schema: false
-  identifier: false
+    stage:
+      stg_fruit_user_input:
+        +materialized: table
+
 
 seeds:
     # to allow for lowercase

diff --git a/models/fruit_join.sql b/models/fruit_join.sql
@@ -0,0 +1,12 @@
+WITH
+stg_input AS (SELECT * FROM {{ ref('stg_fruit_user_input') }}),
+
+stg_fact AS (SELECT * FROM {{ ref('stg_fruit_prices_fact') }})
+
+SELECT
+    stg_fact."fruit_name",
+    stg_input."user_name",
+    stg_input."quantity" * stg_fact."cost" AS "total"
+FROM
+    stg_input LEFT JOIN stg_fact
+    ON stg_input."fruit_name" = stg_fact."fruit_name"
diff --git a/models/fruit_summary.sql b/models/fruit_summary.sql
@@ -0,0 +1,13 @@
+WITH
+fruit_join AS (
+    SELECT * FROM {{ ref('fruit_join') }}
+)
+
+SELECT
+    "user_name",
+    SUM("total") AS "total_final"
+
+FROM fruit_join
+WHERE "user_name" IS NOT NULL
+GROUP BY "user_name"
+ORDER BY SUM("total") DESC
diff --git a/models/stage/scehma.yml b/models/stage/scehma.yml
@@ -0,0 +1,43 @@
+version: 2
+
+models:
+  - name: stg_fruit_user_input
+    description: prepare to fuzzymatch
+    columns:
+      - name: fruit_user_input
+        quote: true
+        description: what the user manually typed in the app
+        tests:
+          - not_null
+      - name: quantity
+        quote: true
+        description: how many user wants to buy
+        tests:
+          - not_null
+      - name: user_name
+        quote: true
+        description: the internal ID of the app user
+        tests:
+          - not_null
+      - name: fruit_name
+        quote: true
+        description: best possible fuzzy match b/w user input and fact table
+        tests:
+          - not_null:
+              config:
+                  severity: warn
+                  error_if: ">5"
+                  warn_if: ">2"
+  - name: fruit_summary
+    description: total each customer definitely owes minus mismatches
+    columns:
+      - name: user_name
+        quote: true
+        description: what the user manually typed in the app
+        tests:
+          - not_null
+      - name: total_final
+        quote: true
+        description: total amount each user owes
+        tests:
+          - not_null
diff --git a/models/stage/stg_fruit_prices_fact.sql b/models/stage/stg_fruit_prices_fact.sql
@@ -0,0 +1,3 @@
+SELECT
+*
+FROM {{ ref('fruit_prices_fact') }}
diff --git a/models/fruit_join.py → models/stage/stg_fruit_user_input.py b/models/fruit_join.py → models/stage/stg_fruit_user_input.py
@@ -7,9 +7,7 @@ def model(dbt, session):
         packages=["fuzzywuzzy"]
     )
 
-    df_input = dbt.ref("fruit_user_input").to_pandas()
-
-    df_price = dbt.ref("fruit_prices_fact").to_pandas()
+    df_price = dbt.ref("stg_fruit_prices_fact").to_pandas()
 
     def custom_scorer(string):
         '''
@@ -24,17 +22,7 @@ def custom_scorer(string):
         else:
             return None
 
-    df_final = (df_input
+    return  (dbt.ref("fruit_user_input").to_pandas()
                 # make new col, `fruit_name`, with best match against actual table
                 .assign(fruit_name=lambda df: df["fruit_user_input"].apply(custom_scorer))
-                # join the actual fruit price table
-                .merge(df_price, on="fruit_name")
-                # calculate subtotal
-                .assign(total=lambda df: df.quantity * df.cost)
-                # find total for each user
-                .groupby("user_name")["total"].sum()
-                .reset_index()
-                .sort_values("total", ascending=False)
                 )
-
-    return df_final