Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wk 4 #58

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open

Wk 4 #58

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions greenery/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

target/
dbt_packages/
logs/
16 changes: 16 additions & 0 deletions greenery/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Welcome to your new dbt project!

### Using the starter project

Try running the following commands:
- dbt run
- dbt test


### Resources:
- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
- Find [dbt events](https://events.getdbt.com) near you
- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices

Empty file added greenery/analyses/.gitkeep
Empty file.
46 changes: 46 additions & 0 deletions greenery/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@

# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'greenery'
version: '1.0.0'
config-version: 2

# This setting configures which "profile" dbt uses for this project.
profile: 'greenery'

# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"


# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models

# In this example config, we tell dbt to build all models in the example/ directory
# as tables. These settings can be overridden in the individual model files
# using the `{{ config(...) }}` macro.
models:
greenery:
# Config indicated by + and applies to all files under models/example/
staging:
+materialized: tables
marts:
+post-hook:
- "GRANT SELECT ON {{ this }} TO ROLE reporting;"

snapshots:
greenery:
target_schema: dbt_ple
target_database: dev_db
Empty file added greenery/macros/.gitkeep
Empty file.
21 changes: 21 additions & 0 deletions greenery/macros/get_event_types.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{% macro get_event_types() %}

{% set event_types_query %}
select distinct
event_type
from stg_events
order by 1
{% endset %}

{% set results = run_query(event_types_query) %}

{% if execute %}
{# Return the first column #}
{% set results_list = results.columns[0].values() %}
{% else %}
{% set results_list = [] %}
{% endif %}

{{ return(results_list) }}

{% endmacro %}
9 changes: 9 additions & 0 deletions greenery/macros/grant_select.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{% macro grant_select(role) %}

{% set sql %}
GRANT SELECT ON {{ this }} TO ROLE {{ role }};
{% endset %}

{% set table = run_query(sql) %}

{% endmacro %}
10 changes: 10 additions & 0 deletions greenery/macros/positive_values.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

{% test positive_values(model, column_name) %}


select *
from {{ model }}
where {{ column_name }} < 0


{% endtest %}
27 changes: 27 additions & 0 deletions greenery/models/example/my_first_dbt_model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@

/*
Welcome to your first dbt model!
Did you know that you can also configure models directly within SQL files?
This will override configurations stated in dbt_project.yml
Try changing "table" to "view" below
*/

{{ config(materialized='table') }}

with source_data as (

select 1 as id
union all
select null as id

)

select *
from source_data

/*
Uncomment the line below to remove records with null `id` values
*/

-- where id is not null
6 changes: 6 additions & 0 deletions greenery/models/example/my_second_dbt_model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

-- Use the `ref` function to select from other models

select *
from {{ ref('my_first_dbt_model') }}
where id = 1
21 changes: 21 additions & 0 deletions greenery/models/example/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@

version: 2

models:
- name: my_first_dbt_model
description: "A starter dbt model"
columns:
- name: id
description: "The primary key for this table"
tests:
- unique
- not_null

- name: my_second_dbt_model
description: "A starter dbt model"
columns:
- name: id
description: "The primary key for this table"
tests:
- unique
- not_null
134 changes: 134 additions & 0 deletions greenery/models/intermediate/_intermediate.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
version: 2

models:
- name: int_user_address
description: "All user information"
columns:
- name: user_id
description: "unqiue user id"
tests:
- unique
- not_null

- name: first_name
description: "user first name"

- name: last_name
description: "user last name"

- name: email
description: "user email"

- name: phone_number
description: "user phone number"

- name: created_at
description: "user created at date and time"
tests:
- not_null

- name: updated_at
description: "user information update date and time"
tests:
- not_null

- name: address_id
description: "user address id"
tests:
- not_null

- name: address
description: "user address"

- name: zipcode
description: "user zipcode"

- name: state
description: "user state"

- name: country
description: "user country"

- name: int_user_events
description: "user events data"
columns:
- name: user_id
description: "unique user id per event session on the site"
tests:
- unique
- not_null

- name: num_sessions
description: "total number of sessions per user id"

- name: first_session_date
description: "user first session"
tests:
- not_null

- name: last_session_date
description: "user last session"
tests:
- not_null

- name: total_session_duration
description: "total hours user spent on the site"

- name: checkout
description: "user checked out during session"

- name: package_shipped
description: "user's package was shipped during session"

- name: page_view
description: "user viewed a page during session"

- name: add_to_cart
description: "user added to cart during session"

- name: total_orders
description: "total orders related to the session"

- name: total_products
description: "total products related to the session"

- name: int_user_orders
description: "total order information by user"
columns:
- name: user_id
description: "unique user_id"

- name: total_orders
description: "total orders each user purchased"
tests:
- positive_values

- name: total_promos_used
description: "total promo code used for all orders"

- name: recent_order_date
description: "most recent order date"

- name: total_spent
description: "total spent on all orders by user"
test:
- positive_values

- name: int_product_conv_rate
description: "product conversion rate"
columns:
- name: product_id
description: "product id"

- name: total_orders
description: "total orders each user purchased by product"
tests:
- positive_values

- name: total_sessions
description: "total sessions by product"

- name: conversion_rate
description: "total orders divide by total sessions by product"


37 changes: 37 additions & 0 deletions greenery/models/intermediate/int_product_conv_rate.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@

{{
config(
materialized='table'
)
}}

with ts as (
select
split_part(page_url, '/',5) product_id,
products.name product_name,
count( distinct events.session_id) total_sessions
from {{ ref('stg_events')}} events
join {{ ref('stg_products')}} products
on events.product_id = products.product_id
group by 1,2),

cs as (
select
orders.product_id,
products.name product_name,
count( orders.order_id) total_orders
from {{ ref('stg_order_items')}} orders
left join {{ ref('stg_products')}} products
on orders.product_id = products.product_id
group by 1,2)

select
ts.product_id,
ts.product_name,
cs.total_orders,
ts.total_sessions,
cs.total_orders/ts.total_sessions conversion_rate
from ts
left join cs
on ts.product_id = cs.product_id
order by conversion_rate desc
23 changes: 23 additions & 0 deletions greenery/models/intermediate/int_user_address.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{{
config(
materialized='table'
)
}}


select
u.user_id,
u.first_name,
u.last_name,
u.email,
u.phone_number,
u.created_at,
u.updated_at,
u.address_id,
a.address,
lpad(a.zipcode:: varchar, 5,'0') as zipcode,
a.state,
a.country
from {{ ref('stg_users')}} u
left join {{ ref('stg_addresses')}} a
on u.address_id = a.address_id
22 changes: 22 additions & 0 deletions greenery/models/intermediate/int_user_events.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

{{
config(
materialized='table'
)
}}

{%- set event_types = get_event_types() -%}

select
e.user_id,
count(distinct e.session_id) num_sessions,
min(e.created_at) first_session_date,
max(e.created_at) last_session_date,
datediff(hour, first_session_date, last_session_date) total_session_duration,
count(e.order_id) total_orders,
count(e.product_id) total_products
{%- for event_type in event_types %}
,sum(case when event_type = '{{event_type}}' then 1 else 0 end) {{event_type}}
{%- endfor %}
from {{ ref('stg_events')}} e
group by 1
Loading