This repository has been archived by the owner on Jun 1, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
/
5_data_munge.yml
132 lines (111 loc) · 4.54 KB
/
5_data_munge.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
target_default: 5_data_munge
packages:
- scipiper
- dplyr
- tidyverse
- maps
- sf
sources:
- 5_data_munge/src/munge_data_files.R
- 5_data_munge/src/munge_wqp_files.R
- 5_data_munge/src/qaqc_daily_data.R
- 5_data_munge/src/qaqc_groups_summaries.R
targets:
5_data_munge:
depends:
- 5_data_munge/out/daily_temperatures_summary.csv
- 5_data_munge/out/all_sites.rds.ind
- 5_data_munge/out/daily_temperatures_qaqc.rds.ind
- 5_data_munge/out/flagged_temperature_summary.csv
# -- For WQP and NWISuv, first do a coarse QA, then reduce to daily -- #
# -- For NWISdv, do the coarse QA -- #
min_value:
command: c(I(-0.5))
max_value:
command: I(35)
max_daily_range:
command: I(30)
stream_types:
command: c(I(c('Stream', 'Spring')))
# first, limit to stream data
5_data_munge/out/wqp_data_streams.rds.ind:
command: filter_site_types(
in_ind = '1_wqp_pull/out/wqp_data.rds.ind',
sites_ind = '1_wqp_pull/inout/wqp_inventory.feather.ind',
keep_types = stream_types,
out_ind = target_name)
# resolve a few known issues regarding start/end dates
# drop those where start/end is different and there is more than one
# obs per site-date. Try to recover those obs where the collection
# date is in comments.
5_data_munge/out/wqp_data_streams_datesres.rds.ind:
command: resolve_statcodes(
in_ind = '5_data_munge/out/wqp_data_streams.rds.ind',
out_ind = target_name)
5_data_munge/out/wqp_daily_depths.rds.ind:
command: munge_wqp_withdepths(
in_ind = '5_data_munge/out/wqp_data_streams_datesres.rds.ind',
min_value, max_value, max_daily_range,
out_ind = target_name)
5_data_munge/out/wqp_daily_nodepths.rds.ind:
command: munge_wqp_withoutdepths(
in_ind = '5_data_munge/out/wqp_data_streams_datesres.rds.ind',
min_value, max_value, max_daily_range,
out_ind = target_name)
# NWIS uv data #
5_data_munge/out/nwis_daily.rds.ind:
command: munge_nwis(
dv_ind = '2_nwis_pull/out/nwis_dv_data.rds.ind',
uv_ind = '2_nwis_pull/out/nwis_uv_data.rds.ind',
min_value, max_value,
out_ind = target_name)
# EcoSHEDS data
5_data_munge/out/ecosheds_munged.rds.ind:
command: munge_ecosheds(
in_ind = '4_other_sources/out/ecosheds_data.rds.ind',
sites_ind = '4_other_sources/out/ecosheds_sites.rds.ind',
min_value, max_value,
out_ind = target_name)
5_data_munge/out/norwest_munged.rds.ind:
command: munge_norwest(
sites_ind = '4_other_sources/out/norwest_raw_site_data.rds.ind',
dat_ind = '4_other_sources/out/norwest_raw_temp_data.feather.ind',
min_value, max_value, out_ind = target_name)
# bring it all together
5_data_munge/out/daily_temperatures.rds.ind:
command: combine_all_dat(
nwis_ind = '5_data_munge/out/nwis_daily.rds.ind',
wqp_ind = '5_data_munge/out/wqp_daily_nodepths.rds.ind',
ecosheds_ind = '5_data_munge/out/ecosheds_munged.rds.ind',
norwest_ind = '5_data_munge/out/norwest_munged.rds.ind',
out_ind = target_name,
wqp_pull_date, nwis_dv_pull_date, nwis_uv_pull_date)
5_data_munge/out/all_sites.rds.ind:
command: combine_all_sites(
out_ind = target_name,
nwis_dv_sites_ind = '2_nwis_pull/inout/nwis_dv_inventory.feather.ind',
nwis_uv_sites_ind = '2_nwis_pull/inout/nwis_uv_inventory_reduced.feather.ind',
ecosheds_sites_ind = '4_other_sources/out/ecosheds_sites.rds.ind',
wqp_sites_ind = '1_wqp_pull/inout/wqp_inventory.feather.ind',
norwest_sites_ind = '4_other_sources/out/norwest_raw_site_data.rds.ind')
# clean up sites
# get rid of non-stream sites
# get rid of sites outside of US states/territories
5_data_munge/out/stream_sites_us.rds.ind:
command: clean_sites(in_ind = '5_data_munge/out/all_sites.rds.ind', out_ind = target_name)
5_data_munge/out/daily_temperatures_summary.csv:
command: summarize_all_dat(
in_ind = '5_data_munge/out/daily_temperatures.rds.ind',
out_file = target_name)
# filter to sites left in the stream_sites_us file
# qaqc the daily temperature data.
5_data_munge/out/daily_temperatures_qaqc.rds.ind:
command: qaqc_daily_temp_site_data(
out_ind = target_name,
temp_in_ind = '5_data_munge/out/daily_temperatures.rds.ind',
site_in_ind = '5_data_munge/out/stream_sites_us.rds.ind',
doy_lim = I(6), long_deg = I(1), lat_deg = I(1))
5_data_munge/out/flagged_temperature_summary.csv:
command: outliers_summary(
in_ind = '5_data_munge/out/daily_temperatures_qaqc.rds.ind',
out_file = target_name)