Skip to content
This repository was archived by the owner on Dec 4, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
refactored DAG
  • Loading branch information
dataders committed Sep 16, 2022
commit a88a980569a28317e85106169f4d38970fbbdea8
11 changes: 4 additions & 7 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,10 @@ clean-targets: # directories to be removed by `dbt clean`
models:
python_wrench:
# Config indicated by + and applies to all files under models/example/
example:
+materialized: view

quoting:
database: false
schema: false
identifier: false
stage:
stg_fruit_user_input:
+materialized: table


seeds:
# to allow for lowercase
Expand Down
12 changes: 12 additions & 0 deletions models/fruit_join.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
WITH
stg_input AS (SELECT * FROM {{ ref('stg_fruit_user_input') }}),

stg_fact AS (SELECT * FROM {{ ref('stg_fruit_prices_fact') }})

SELECT
stg_fact."fruit_name",
stg_input."user_name",
stg_input."quantity" * stg_fact."cost" AS "total"
FROM
stg_input LEFT JOIN stg_fact
ON stg_input."fruit_name" = stg_fact."fruit_name"
13 changes: 13 additions & 0 deletions models/fruit_summary.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH
fruit_join AS (
SELECT * FROM {{ ref('fruit_join') }}
)

SELECT
"user_name",
SUM("total") AS "total_final"

FROM fruit_join
WHERE "user_name" IS NOT NULL
GROUP BY "user_name"
ORDER BY SUM("total") DESC
43 changes: 43 additions & 0 deletions models/stage/scehma.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
version: 2

models:
- name: stg_fruit_user_input
description: prepare to fuzzymatch
columns:
- name: fruit_user_input
quote: true
description: what the user manually typed in the app
tests:
- not_null
- name: quantity
quote: true
description: how many user wants to buy
tests:
- not_null
- name: user_name
quote: true
description: the internal ID of the app user
tests:
- not_null
- name: fruit_name
quote: true
description: best possible fuzzy match b/w user input and fact table
tests:
- not_null:
config:
severity: warn
error_if: ">5"
warn_if: ">2"
- name: fruit_summary
description: total each customer definitely owes minus mismatches
columns:
- name: user_name
quote: true
description: what the user manually typed in the app
tests:
- not_null
- name: total_final
quote: true
description: total amount each user owes
tests:
- not_null
3 changes: 3 additions & 0 deletions models/stage/stg_fruit_prices_fact.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT
*
FROM {{ ref('fruit_prices_fact') }}
16 changes: 2 additions & 14 deletions models/fruit_join.py → models/stage/stg_fruit_user_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@ def model(dbt, session):
packages=["fuzzywuzzy"]
)

df_input = dbt.ref("fruit_user_input").to_pandas()

df_price = dbt.ref("fruit_prices_fact").to_pandas()
df_price = dbt.ref("stg_fruit_prices_fact").to_pandas()

def custom_scorer(string):
'''
Expand All @@ -24,17 +22,7 @@ def custom_scorer(string):
else:
return None

df_final = (df_input
return (dbt.ref("fruit_user_input").to_pandas()
# make new col, `fruit_name`, with best match against actual table
.assign(fruit_name=lambda df: df["fruit_user_input"].apply(custom_scorer))
# join the actual fruit price table
.merge(df_price, on="fruit_name")
# calculate subtotal
.assign(total=lambda df: df.quantity * df.cost)
# find total for each user
.groupby("user_name")["total"].sum()
.reset_index()
.sort_values("total", ascending=False)
)

return df_final