Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

week one project #53

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
# Analytics engineering with dbt

Template repository for the projects and environment of the course: Analytics engineering with dbt
Questions to week 1 project:
Q: How many users do we have?
A: 130

Q: On average, how many orders do we receive per hour?
A: 7.52

Q: On average, how long does an order take from being placed to being delivered?
A: 3.89

Q: How many users have only made one purchase? Two purchases? Three+ purchases?
A: 1 order- 25, 2 orders - 28, 3+ orders - 71

Q: On average, how many unique sessions do we have per hour?
A: 16.33

> Please note that this sets some environment variables so if you create some new terminals please load them again.

Expand Down
4 changes: 4 additions & 0 deletions greenery/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

target/
dbt_packages/
logs/
Empty file added greenery/analyses/.gitkeep
Empty file.
42 changes: 42 additions & 0 deletions greenery/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@

# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'greenery'
version: '1.0.0'
config-version: 2

# This setting configures which "profile" dbt uses for this project.
profile: 'greenery'

# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"


# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models

# In this example config, we tell dbt to build all models in the example/ directory
# as tables. These settings can be overridden in the individual model files
# using the `{{ config(...) }}` macro.
models:
greenery:
# Config indicated by + and applies to all files under models/example/
example:
+materialized: view

post-hook:
- "{{ grant(role='reporting') }}"

Empty file added greenery/macros/.gitkeep
Empty file.
10 changes: 10 additions & 0 deletions greenery/macros/grant_usage.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{% macro grant(role) %}

{% set sql %}
GRANT USAGE ON SCHEMA {{ schema }} TO ROLE {{ role }};
GRANT SELECT ON {{ this }} TO ROLE {{ role }};
{% endset %}

{% set table = run_query(sql) %}

{% endmacro %}
18 changes: 18 additions & 0 deletions greenery/macros/row_to_columns.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{% macro row_to_columns(table_name,column_name,count_column) %}

{% set new_columns = dbt_utils.get_column_values(
table = ref( table_name ),
column = column_name
) %}
{% for new_column in new_columns %}
count( case when {{ column_name }} = '{{ new_column }}'
then {{count_column}}
else NULL
end ) as {{new_column}}_count
{% if not loop.last %}
,
{% endif %}
{% endfor %}

{% endmacro %}

29 changes: 29 additions & 0 deletions greenery/models/example/marts/Core/core_models.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
version: 2

models:
- name: dim_products
description: "Products we sell"
columns:
- name: product_guid
description: "The primary key for this table"
tests:
- unique
- not_null

- name: dim_users
description: "Prospective and purchasing customers "
columns:
- name: user_guid
description: "The primary key for this table"
tests:
- unique
- not_null
- name: fact_orders
description: "Orders that have been placed "
columns:
- name: order_guid
description: "The primary key for this table"
tests:
- unique
- not_null

16 changes: 16 additions & 0 deletions greenery/models/example/marts/Core/dim_products.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{{ config(materialized='table') }}

with product as (
select
*
from {{ref('src_products')}}
)



SELECT
product_guid
,product_name
,price as product_price
,inventory as product_inventory
FROM product
52 changes: 52 additions & 0 deletions greenery/models/example/marts/Core/dim_users.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{{ config(materialized='table') }}


with users as (
SELECT
*
from {{ref('src_users')}}
)

,first_order AS (
SELECT
DISTINCT
USER_GUID,
CREATED_AT_TSTAMP_EST as FIRST_ORDER_CREATED_TSTAMP_EST
FROM {{ref('fact_orders')}}
WHERE USER_ORDER_RANK = 1
)

,first_delivered_order AS (
SELECT
DISTINCT
USER_GUID,
DELIVERED_AT_TSTAMP_EST as FIRST_ORDER_DELIVERED_TSTAMP_EST
FROM {{ref('fact_orders')}}
WHERE USER_DELIVERED_RANK = 1
)


,combined AS (
select
u.USER_GUID
,u.FIRST_NAME
,u.LAST_NAME
,u.FULL_NAME
,u.EMAIL
,u.PHONE_NUMBER
,u.CREATED_AT_TSTAMP_EST as SYSTEM_CREATED_TSTAMP_EST
,u.UPDATED_AT_TSTAMP_EST AS SYSTEM_UPDATED_TSTAMP_EST
,fo.FIRST_ORDER_CREATED_TSTAMP_EST
,fd.FIRST_ORDER_DELIVERED_TSTAMP_EST
from users u
left join first_order fo
on u.user_guid = fo.user_guid
left join first_delivered_order fd
on u.user_guid = fd.user_guid

)


SELECT
*
from combined
58 changes: 58 additions & 0 deletions greenery/models/example/marts/Core/fact_orders.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{{ config(materialized='table') }}

with int_order_table as (
SELECT
*
from {{ref('int_fact_orders')}}
)

,additions as (
SELECT

--ids
o.ORDER_GUID
,o.USER_GUID
,o.PROMO_GUID
,o.ADDRESS_GUID
,o.TRACKING_GUID
--timestamps
,o.CREATED_AT_TSTAMP_EST
,o.DELIVERED_AT_TSTAMP_EST
,o.ESTIMATED_DELIVERY_TSTAMP_EST
--user
,o.CUSTOMER_FULL_NAME
,o.SHIP_ADDRESS
,o.SHIP_ZIPCODE
,o.SHIP_STATE
,o.SHIP_COUNTRY

--order details
,o.SHIPPING_SERVICE
,o.STATUS
,case when estimated_delivery_tstamp_est < delivered_at_tstamp_est then 'Delivered Late'
when estimated_delivery_tstamp_est >= delivered_at_tstamp_est then 'Delivered On Time'
when delivered_at_tstamp_est is null and estimated_delivery_tstamp_est >= current_timestamp() then 'Pending delivery - On Time'
when delivered_at_tstamp_est is null and estimated_delivery_tstamp_est < current_timestamp() then 'Pending delivery - Late'
when estimated_delivery_tstamp_est is null then 'No estimated delivery given'
else 'NA'
end as delivery_timeframe_compliance
,rank() over (partition by user_guid order by created_at_tstamp_est) as user_order_rank
,case when rank() over (partition by user_guid order by created_at_tstamp_est desc) = 1 then TRUE else FALSE end as user_most_recent_order_flag
,case
when delivered_at_tstamp_est is not null then rank() over (partition by user_guid order by delivered_at_tstamp_est)
end as user_delivered_rank

--numbers
,o.ORDER_COST
,o.ORDER_DISCOUNT_AMT
,o.PRE_DISCOUNT_TOTAL
,o.CUSTOMER_SHIPPING_COST
,o.CUSTOMER_ORDER_TOTAL

from int_order_table o

)

SELECT
*
from additions
12 changes: 12 additions & 0 deletions greenery/models/example/marts/Marketing/marketing_models.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@

version: 2

models:
- name: user_order_fact
description: "High level information about our users such as most recent order, how many total orders, and customer attributes "
columns:
- name: user_guid
description: "The primary key for this table"
tests:
- unique
- not_null
55 changes: 55 additions & 0 deletions greenery/models/example/marts/Marketing/user_order_fact.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{{ config(materialized='table') }}

with user_most_recent_orders as (
select
user_guid,
ship_address,
ship_state,
ship_zipcode,
ship_country,
created_at_tstamp_est as last_order_tstamp_est
from {{ref('fact_orders')}} fo
where user_most_recent_order_flag = TRUE
)

,user_info as (
select
*
from {{ref('dim_users')}}

)

,user_order_history as (
select
user_guid,
count(distinct order_guid) as order_count,
count(distinct case when delivered_at_tstamp_est is not null then order_guid end) as delivered_order_count
from {{ref('fact_orders')}}
group by 1
)

,combined as (
select
ui.user_guid,
ui.full_name,
ui.email,
ui.phone_number,
ui.first_order_created_tstamp_est,
ui.first_order_delivered_tstamp_est,
uo.ship_address as last_ship_address,
uo.ship_state as last_ship_state,
uo.ship_zipcode as last_ship_zipcode,
uo.ship_country as last_ship_country,
uo.last_order_tstamp_est,
coalesce(oh.order_count,0) as order_count,
coalesce(oh.delivered_order_count,0) as delivered_order_count
from user_info ui
left join user_order_history oh
on ui.user_guid = oh.user_guid
left join user_most_recent_orders uo
on ui.user_guid = uo.user_guid
)

SELECT
*
from combined
49 changes: 49 additions & 0 deletions greenery/models/example/marts/Product/fact_page_views.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{{ config(materialized='table') }}

with website_events as (
SELECT
*
from {{ref('src_events')}}
where lower(event_type) = 'page_view'
)

,products as (
SELECT
*
from {{ref('dim_products')}}
)

,users as (
SELECT
*
from {{ref('dim_users')}}
)

,combined as (
select
--about event
e.event_guid
,e.session_guid
,e.created_at_tstamp_est as event_tstamp_est
,e.page_url

--about user
,e.user_guid
,u.first_order_created_tstamp_est as user_first_order_tstamp_est
,u.email as user_email
--about order
,order_guid

--about product
,e.product_guid
,p.product_name
from website_events e
left join products p
on e.product_guid = p.product_guid
left join users u
on e.user_guid = u.user_guid
)

SELECT
*
from combined
Loading