Skip to content

Commit

Permalink
Merge branch 'gh-pages' of https://github.com/OHI-Science/ohiprep_v2024
Browse files Browse the repository at this point in the history
… into gh-pages
  • Loading branch information
sophialecuona committed Aug 19, 2024
2 parents 4c1a112 + 7a0d66c commit 108b7d1
Show file tree
Hide file tree
Showing 2 changed files with 923 additions and 16 deletions.
278 changes: 262 additions & 16 deletions globalprep/np/v2024/STEP2_np_weighting_prep.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ weighting scheme will be applied to the scores to determine how much of
each natural products layer affects the overall NP score when updating
in ohi global.


## Updates from previous year

- updated syntax to follow tidyverse style when reasonable; updated functions `read.csv()` and `write.csv()` to `read_csv()` and `write_csv()`

- updated file paths to be more reproducible, use of `here()` package and updated syntax to reduce system-dependent file paths

------------------------------------------------------------------------

# Data Source
Expand Down Expand Up @@ -279,7 +286,9 @@ orn_tonnes <- np_tonnes %>%
sw_fill_df <- region_product %>%
filter(product == "seaweeds")
sw_tonnes <- read_csv(here(current_np_dir, "int", "np_seaweeds_tonnes_weighting.csv")) %>%
sw_tonnes_raw <- read_csv(here(current_np_dir, "int", "np_seaweeds_tonnes_weighting.csv"))
sw_tonnes <- sw_tonnes_raw %>%
mutate(product = "seaweeds") %>%
group_by(rgn_id, year, product) %>%
summarise(tonnes = sum(tonnes, na.rm = TRUE)) %>%
Expand All @@ -298,7 +307,7 @@ sw_tonnes <- read_csv(here(current_np_dir, "int", "np_seaweeds_tonnes_weighting.
Now we will calculate the weights per each product. To do this we need
to multiply our average \$ value for each product \* tonnes of each
product, and then divide by the total per each region. We will also
assign year = 2022 so that this can be read into OHI-global (2022
assign year = (YYYY, e.g., 2022) so that this can be read into OHI-global (YYYY
corresponds to the year that these weights were calculated).

```{r}
Expand All @@ -321,12 +330,19 @@ prod_weights <- orn_tonnes %>%
filter(rgn_id != 213)
#write.csv(prod_weights, file.path(prep, "output/np_product_weights.csv"), row.names = FALSE)
write_csv(prod_weights, here(current_np_dir, "output", "np_product_weights.csv"))
readr::write_csv(prod_weights, here(current_np_dir, "output", "np_product_weights.csv"))
```

```{r}
##datacheck

### Datacheck

Note: we're not comparing data for a specific year, e.g., 2019, as we do in many data checks. The data processing that creates `np_product_weights.csv` fills the `year` column with the scenario year (e.g., 2024), which means that the previous year's `np_product_weights.csv` and the current year's weights cannot be compared by filtering to a specific year.


```{r fig.height=5, fig.width=8}
# Read in previous and current years' np_product_weights to compare
#old_prod_weights <- read_csv(file.path(prep, paste0("../v", prep_year-3, "/output/np_product_weights.csv")))
#old_prod_weights <- read_csv(file.path(prep, paste0("../v", prep_year-2, "/output/np_product_weights.csv")))
old_prod_weights <- read_csv(here(previous_np_dir, "output", "np_product_weights.csv"))
Expand All @@ -338,21 +354,45 @@ check <- prod_weights %>%
mutate(diff = new_weight - weight) %>%
left_join(rgns_eez)
plot(check$new_weight, check$weight)
abline(0,1, col="red")
# note -- these are across all years...
plot(check$new_weight, check$weight, main = "All Products")
abline(0,1, col = "red")
check_sw <- check %>%
filter(product == "seaweeds")
plot(check_sw$new_weight, check_sw$weight)
abline(0,1, col="red")
plot(check_sw$new_weight, check_sw$weight, main = "Seaweeds")
abline(0,1, col = "red")
check_orn <- check %>%
filter(product == "ornamentals") %>%
mutate(difference = new_weight - weight)
plot(check_orn$new_weight, check_orn$weight)
abline(0,1, col="red")
plot(check_orn$new_weight, check_orn$weight, main = "Ornamentals")
abline(0,1, col = "red")
check_fofm <- check %>%
filter(product == "fish_oil")
plot(check_fofm$new_weight, check_fofm$weight, main = "Fish Oil / Fish Meal (FOFM)")
abline(0, 1, col = "red") ## these change because of new SAUP fisheries catch data...
# max(check$diff, na.rm = TRUE)
# min(check$diff, na.rm = TRUE)
top_10_diffs <- check %>% arrange(desc(abs(diff))) %>% head(n = 10)
top_10_diffs %>% relocate(diff, .after = "rgn_id")
# rgn 164 = Belize
# ~ had a "seaweeds" weight of 0.00 in 2023, now has a weight of 0.98 ≈ 0.9845 diff (v2024)
# ~ had a "fish_oil" weight of 1.00 in 2023, now has a weight of 0.015 ≈ -0.9845 diff (v2024)
# 155 = Tonga
# ~ had an "ornamentals" weight of 0.97 in 2023, now has a weight of 0.00 ≈ -9.77 (v2024)
# 153 = Cook Islands
# 11 = Marshall Islands
# for Cook Islands and Marshall Islands, almost feels like values for `ornamentals` and `fish_oil` got flipped?
## check saint martin, Niue, Bonaire, Sint Eustasius - should decrease in production
## check djibouti - should increase in production
Expand All @@ -367,13 +407,219 @@ harvest_tonnes_usd_old <- #read_csv(file.path(prep, paste0("../v", prep_year-1,
test2 <- harvest_tonnes_usd_old %>%
filter(rgn_id == 46)
# test
# test2
# v2024: Djibouti (rgn 46) does increase in production
gf_orn <- read_csv(here(current_np_dir, "output", "np_ornamentals_harvest_tonnes_gf.csv"))
#gf_orn <- read_csv(here(current_np_dir, "output", "np_ornamentals_harvest_tonnes_gf.csv"))
check_fofm <- check %>%
filter(product == "fish_oil")
plot(check_fofm$new_weight, check_fofm$weight)
abline(0, 1, col = "red") ## these change because of new SAUP fisheries catch data...
```




Old old check:

```{r}
# Read in previous and current years' np_product_weights to compare
#old_prod_weights <- read_csv(file.path(prep, paste0("../v", prep_year-3, "/output/np_product_weights.csv")))
#old_prod_weights <- read_csv(file.path(prep, paste0("../v", prep_year-2, "/output/np_product_weights.csv")))
old_prod_weights <- read_csv(here(previous_np_dir, "output", "np_product_weights.csv"))
older_prod_weights <- read_csv(here("globalprep", "np", "v2022", "output", "np_product_weights.csv"))
check_older <- older_prod_weights %>%
rename("older_weight" = "weight") %>%
left_join(old_prod_weights, by = c("rgn_id", "product")) %>%
mutate(diff = weight - older_weight) %>%
left_join(rgns_eez)
plot(check_older$weight, check_older$older_weight, main = "v2023 vs v2022"); abline(0,1, col="red")
plot(check$new_weight, check$weight, main = "v2024 vs v2023"); abline(0,1, col="red")
```



```{r}
library(plotly)
library(RColorBrewer)
test_comp <- check_older %>% plotly::plot_ly(x = ~older_weight, y = ~weight,
type = "scatter", mode = "markers") %>%
add_trace(
# text = ~paste("Region ID:", rgn_id #, "<br>Product:", product
# ),
color = ~rgn_id,
# marker = list(
# color = ~product,
# ),
hoverinfo = "text+x+y",
hovertemplate = paste(
'<b>Region ID</b>: %{color}',
'<br><b>Old Weight</b>: %{x:.2f}',
'<br><b>New Weight</b>: %{y:.2f}<br>'
),
# showlegend = F
) %>%
# add AB line
add_trace(
x = c(0,1),
y = c(0,1),
mode = "lines",
line = list(color = "red"),
showlegend = FALSE,
hoverinfo = "none"
) %>%
layout(xaxis = list(hoverformat = '.3f'),
yaxis = list(hoverformat = '.3f'))
test_comp
```


- RAM lost some stocks, so some stocks get removed in v4.65 (2024 data download); also got some new stocks


### New datacheck: interactive plot

```{r}
# ======== Setup ==========================================================
# (mostly copied from earlier setup with addition of library(plotly))
# Read in packages necessary for visualization (in case you cleared your environment)
library(ggplot2)
library(plotly)
library(dplyr)
library(tidyr)
library(here)
# Set scenario year, reproducible file paths
scen_year_number <- 2024 # update this!!
scenario_year <- as.character(scen_year_number)
v_scen_year <- paste0("v", scenario_year)
current_np_dir <- here::here("globalprep", "np", v_scen_year)
previous_np_dir <- here::here("globalprep", "np", paste0("v", scen_year_number - 1))
# ----- Read in NP weights data -----
# Read in current, previous, and 2 years previous data
old_prod_weights <- read_csv(here(previous_np_dir, "output", "np_product_weights.csv"))
prod_weights <- read_csv(here(current_np_dir, "output", "np_product_weights.csv"))
check <- prod_weights %>%
rename("new_weight" = "weight") %>%
left_join(old_prod_weights, by = c("rgn_id", "product")) %>%
mutate(diff = new_weight - weight) %>%
left_join(rgns_eez)
#old_prod_weights <- read_csv(here(previous_np_dir, "output", "np_product_weights.csv"))
older_prod_weights <- read_csv(here("globalprep", "np", paste0("v", scen_year_number - 2), "output", "np_product_weights.csv"))
check_older <- older_prod_weights %>%
rename("older_weight" = "weight") %>%
left_join(old_prod_weights, by = c("rgn_id", "product")) %>%
mutate(diff = weight - older_weight) %>%
left_join(rgns_eez)
# ======== Plot new NP weights data vs. previous year's data ==================
# ---- Create ggplot object with `text` aes argument to prep for plotly ----- #
g_new <- ggplot(check, aes(x = weight, y = new_weight, color = product,
text = paste("<b>Region ID:</b>", rgn_id, "<br><b>Product:</b>", product #,
#"<br>Old weight:", older_weight, "<br>New weight:", weight
))) +
geom_point(alpha = 0.8) +
geom_abline(intercept = 0, slope = 1, alpha = 0.7,
color = "red", linetype = "dashed") +
scale_color_manual(values = c("fish_oil" = "#edae49", "seaweeds" = "#386641", "ornamentals" = "#83c5be")) +
labs(title = "v2024 vs v2023 Natural Product Weights by Product",
x = "v2023 Weight",
y = "v2024 Weight",
color = "Product") +
theme_minimal()
# ---- Convert to plotly ------------------------------ #
p_new <- ggplotly(g_new, tooltip = "text") %>%
layout(
hovermode = "closest",
xaxis = list(hoverformat = ".4f"), # customize number formatting
yaxis = list(hoverformat = ".4f") # (number of places after decimal)
)
# ------- Create custom hover template ---------------- #
p_new$x$data <- lapply(p_new$x$data, function(trace) {
if (trace$mode == "markers") {
trace$hovertemplate <- paste(
"%{text}<br>",
"<b>Old Weight</b>: %{x:.4f}<br>",
"<b>New Weight</b>: %{y:.4f}<br>"
)
}
return(trace)
})
# Display customized plot
p_new
# ========= Plot old vs. older NP weights data ================================
# Plot previous year's NP weights data vs. year before previous year's data
#color_scale <- c("fish_oils" = "#edae49", "seaweeds" = "#00798c", "ornamentals" = "#d1495b")
# ---- Create ggplot object with added `text` field to prep for plotly ------ #
g_old <- ggplot(check_older, aes(
x = older_weight, y = weight, color = product,
text = paste("<b>Region ID:</b>", rgn_id, "<br><b>Product:</b>", product #,
#"<br>Old weight:", older_weight, "<br>New weight:", weight
)
)) +
# add scatterplot points
geom_point(alpha = 0.8) +
# add ab line
geom_abline(intercept = 0, slope = 1, alpha = 0.7,
color = "red", linetype = "solid") +
# customize colors
scale_color_manual(values = c("fish_oil" = "#edae49", "seaweeds" = "#386641", "ornamentals" = "#83c5be")) +
# update labels
labs(title = "v2023 vs v2022 Natural Products Weights by Product",
x = "v2022 Weights",
y = "v2023 Weights",
color = "Product") +
# set base theme
theme_minimal()
# ---- Convert to plotly ------------------------------ #
p_old <- ggplotly(g_old, tooltip = "text") %>%
layout(
hovermode = "closest",
xaxis = list(hoverformat = ".4f"), # customize number formatting
yaxis = list(hoverformat = ".4f")
)
# ------- Custom hover template ----------------------- #
p_old$x$data <- lapply(p_old$x$data, function(trace) {
if (trace$mode == "markers") {
trace$hovertemplate <- paste(
"%{text}<br>",
"<b>Old Weight</b>: %{x:.4f}<br>",
"<b>New Weight</b>: %{y:.4f}<br>"
)
}
return(trace)
})
# Display customized plot!
p_old
```


Loading

0 comments on commit 108b7d1

Please sign in to comment.