Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Data modelling - week 2 #66

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
4 changes: 4 additions & 0 deletions greenery/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

target/
dbt_packages/
logs/
15 changes: 15 additions & 0 deletions greenery/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Welcome to your new dbt project!

### Using the starter project

Try running the following commands:
- dbt run
- dbt test


### Resources:
- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
- Find [dbt events](https://events.getdbt.com) near you
- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices
Empty file added greenery/analyses/.gitkeep
Empty file.
38 changes: 38 additions & 0 deletions greenery/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@

# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'greenery'
version: '1.0.0'
config-version: 2

# This setting configures which "profile" dbt uses for this project.
profile: 'greenery'

# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"


# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models

# In this example config, we tell dbt to build all models in the example/ directory
# as tables. These settings can be overridden in the individual model files
# using the `{{ config(...) }}` macro.
models:
greenery:
# Config indicated by + and applies to all files under models/example/
example:
+materialized: view
Empty file added greenery/macros/.gitkeep
Empty file.
24 changes: 24 additions & 0 deletions greenery/models/Intermediate /int_session_events_agg.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{{
config(
MATERIALIZED = 'table'
)
}}

with events as (
select * from {{ ref('stg_postgres_events') }}
),
final as (
select
event_id,
session_id,
sum(case when event_type = 'add_to_cart' then 1 else 0 end) as add_to_carts,
sum(case when event_type = 'checkout' then 1 else 0 end) as checkouts,
sum(case when event_type = 'package_shipped' then 1 else 0 end) as package_shippeds,
sum(case when event_type = 'page_view' then 1 else 0 end) as page_views,
min(event_created_at_utc) as first_session_event_at_utc,
max(event_created_at_utc) as last_session_event_at_utc
from {{ref('stg_postgres_events')}}
group by 1,2

)
select * from final
23 changes: 23 additions & 0 deletions greenery/models/Intermediate /int_session_events_macro_agg.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{{
config (
MATERIALIZED = 'table'
)
}}

{%-
set event_types = dbt_utils.get_column_values(
table = ref('stg_postgres_events'),
column = 'event_type',
order_by = 'event_type asc'
)
-%}

select
event_user_guid,
event_session_guid
{%- for event_type in event_types %},
sum(case when event_type = '{{event_type}}' then 1 else 0 end) as {{event_type}}s
{%- endfor %}
from {{ref('stg_postgres_events')}}

group by 1,2
27 changes: 27 additions & 0 deletions greenery/models/marts/fct_users_sessions.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{{
config(
MATERIALIZED = 'table'
)
}}

with
session_events_agg as (
select * from {{ref('int_session_events_agg')}}
),
users as (
select * from {{ ref('stg_postgres_users')}} )

select
session_events_agg.session_id,
session_events_agg.event_id,
users.user_first_name,
users.user_last_name,
users.user_email,
session_events_agg.page_views,
session_events_agg.add_to_carts,
session_events_agg.checkouts,
session_events_agg.package_shippeds

from session_events_agg
left join users
on session_events_agg.event_id = users.user_id
40 changes: 40 additions & 0 deletions greenery/models/marts/product/fct_user_sessions.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{{
config(
MATERIALIZED = 'table'
)
}}

with session_length as (
select
session_id,
min (created_at::timestamp_ntz as event_created_at_utc) as first_event,
max (created_at::timestamp_ntz as event_created_at_utc) as last_event
from {{ref ('stg_postgres_events')}}
group by 1
)
, session_events_agg as (
select * from {{ ref('int_session_events_agg')}}
)
, users as (
select * from {{'stg_postgres_users'}}
)

select
session_events_agg.session_id,
session_events_agg.event_id,
users.user_first_name,
users.user_last_name,
users.user_email,
session_events_agg.page_views,
session_events_agg.add_to_carts,
session_events_agg.checkouts,
session_events_agg.package_shippeds,
session_length.first_event as first_session_event,
session_length.last_event as last_session_event,
datediff ('minute', session_length.first_event, session_length.last_event) as session_length_minutes

from session_events_agg
left join users
on session_events_agg.event_id = users.user_id
left join session_length
on session_events_agg.event_id = session_length.session_id
11 changes: 11 additions & 0 deletions greenery/models/staging/postgres/postgres_model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
version: 2

models:
- name: stg_postgres_addresses
- name: stg_postgres_events
- name: stg_postgres_order_items
- name: stg_postgres_orders
- name: stg_postgres_products
- name: stg_postgres_promos
- name: stg_postgres_users

17 changes: 17 additions & 0 deletions greenery/models/staging/postgres/postgres_source.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
version: 2

sources:
- name: postgres
database: raw
schema: public
tables:
- name: orders
- name: order_items
- name: addresses
- name: products
- name: events
- name: promos
- name: users



14 changes: 14 additions & 0 deletions greenery/models/staging/postgres/stg_postgres_addresses.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
with src_address as (
select * from {{ source('postgres', 'addresses') }}
),
renamed_recast as (
select
address_id as address_id,
address as address_line_1,
zipcode as address_zipcode,
state as address_state,
country as address_country
from src_address
)

select * from renamed_recast
18 changes: 18 additions & 0 deletions greenery/models/staging/postgres/stg_postgres_events.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
with src_events as (
select * from {{ source('postgres', 'events') }}
),
renamed_recast as (
select
event_id,
session_id,
user_id,
page_url as event_page_url,
created_at::timestamp_ntz as event_created_at_utc,
event_type,
order_id,
product_id
from src_events

)

select * from renamed_recast
14 changes: 14 additions & 0 deletions greenery/models/staging/postgres/stg_postgres_order_items.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
with src_order_items as (
select * from {{ source('postgres', 'order_items') }}
),
renamed_recast as (
select
order_id as order_id,
product_id as product_id,
quantity as order_item_quantity,
md5(concat(order_id, product_id)) as order_items_id

from src_order_items
)

select * from renamed_recast
22 changes: 22 additions & 0 deletions greenery/models/staging/postgres/stg_postgres_orders.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
with src_orders as (
select * from {{ source('postgres', 'orders') }}
),
renamed_recast as (
select
ORDER_ID,
USER_ID,
md5(PROMO_ID) as promo_id,
ADDRESS_ID,
CREATED_AT::timestamp_ntz as created_at_utc,
ORDER_COST,
SHIPPING_COST,
ORDER_TOTAL,
TRACKING_ID,
SHIPPING_SERVICE,
ESTIMATED_DELIVERY_AT::timestamp_ntz as estimated_delivery_at_utc,
DELIVERED_AT::timestamp_ntz as delivery_at_utc,
STATUS
from src_orders
)

select * from renamed_recast
31 changes: 31 additions & 0 deletions greenery/models/staging/postgres/stg_postgres_orders.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
version: 2

model:
- name: stg_postgres_orders
description: all orders, all the time!
columns:
- name: order_id
description: unqiue identifier for an order
tests:
- not null
- name: user_id
description: user identifier related to who placed the order
- name: order_cost
description: cost of the order less shipping
- name: order_shipping_cost
description: shipping cost of the order
- name: order_total_cost
description: total order cost
database: raw
schema: public
tables:
- name: orders
- name: order_items
- name: addresses
- name: products
- name: events
- name: promos
- name: users



14 changes: 14 additions & 0 deletions greenery/models/staging/postgres/stg_postgres_products.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
with src_products as (
select * from {{ source('postgres', 'products') }}
),
renamed_recast as (
select
product_id,
name as product_name,
price as product_price,
inventory as product_inventory

from src_products
)

select * from renamed_recast
13 changes: 13 additions & 0 deletions greenery/models/staging/postgres/stg_postgres_promos.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
with src_promos as (
select * from {{ source('postgres', 'promos') }}
),
renamed_recast as (
select
md5(promo_id) as promo_id,
promo_id as promo_name,
discount as promo_discount,
status as promo_status
from src_promos
)

select * from renamed_recast
17 changes: 17 additions & 0 deletions greenery/models/staging/postgres/stg_postgres_users.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
with src_users as (
select * from {{ source('postgres', 'users') }}
),
renamed_recast as (
select
user_id,
first_name as user_first_name,
last_name as user_last_name,
email as user_email,
phone_number as user_phone_number,
created_at::timestamp_ntz as user_created_at_utc,
updated_at::timestamp_ntz as user_updated_at_utc,
address_id
from src_users
)

select * from renamed_recast
Empty file added greenery/seeds/.gitkeep
Empty file.
Empty file added greenery/snapshots/.gitkeep
Empty file.
Loading