diff --git a/globalprep/le/v2024/Dustin_wb_service_dataprep.Rmd b/globalprep/le/v2024/Dustin_wb_service_dataprep.Rmd
index fb4e1e9b..d11606bd 100644
--- a/globalprep/le/v2024/Dustin_wb_service_dataprep.Rmd
+++ b/globalprep/le/v2024/Dustin_wb_service_dataprep.Rmd
@@ -186,6 +186,16 @@ line_plot <- plotly::plot_ly(proportional_employement_nona, x = ~year, y = ~prop
          yaxis = list(title = "Number of people employed by the tourism sector"))
 
 line_plot
+
+
+penona_pivot <- proportional_employement_nona %>% 
+  pivot_longer(cols = c(proportional_tourism_employment, yearly_wages_ppp_adjusted_by_year), names_to = "metric", values_to = "value") 
+# %>% 
+  filter(admin_country_name.x %in% c("Indonesia", "Brazil", "Cyprus", "Argentina", "India"))
+
+ggplot(penona_pivot, aes(x = as.numeric(year), y = value, color = admin_country_name.x)) +
+  geom_line() + 
+  facet_wrap(~metric, scales = "free", ncol = 1)
 ```
 
 
diff --git a/globalprep/le/v2024/jobs_dataprep_anna.qmd b/globalprep/le/v2024/jobs_dataprep_anna.qmd
index d1c652af..e7128f64 100644
--- a/globalprep/le/v2024/jobs_dataprep_anna.qmd
+++ b/globalprep/le/v2024/jobs_dataprep_anna.qmd
@@ -52,6 +52,7 @@ un_pivot_clean <- un_jobs_pivot %>%
   mutate(year = str_remove_all(year, "x"))
 
 # using countrycode::countrycode
+# and following this example on Rpubs: https://rpubs.com/Teal_Emery/cleaning_intl_data_tips_and_tricks 
 
 library(countrycode)
 
@@ -125,3 +126,51 @@ ggplot(jobs_plot_df) +
 
 ```
 
+```{r}
+plotly::plot_ly(jobs_plot_df, x = ~year, y = ~thousand_jobs, color = ~rgn_name, type = "scatter", mode = "lines") %>% 
+  layout(title = "All Regions: Number of Tourism Jobs per Country over Time", 
+         xaxis = list(title = "Year"),
+         yaxis = list(title = "Number of people working in the tourism sector"))
+```
+
+
+India is seriously skewing the scales of this plot -- this, among general curiosity and conversation about how we should weight the number of jobs by the size of countries led us to create a proportional employment variable.
+
+
+
+
+```{r}
+
+jobs_wages_join <- left_join(jobs_rgn_2012, pop_ppp_converted, by = c("rgn_id", "year"))
+
+jobs_wages_proportional_employment <- jobs_wages_join %>% 
+  mutate(total_tourism_employees = thousand_employees*1000) %>% 
+  dplyr::relocate(total_tourism_employees, .before = total_population) %>% 
+  mutate(proportional_tourism_employment = total_tourism_employees/total_population) %>% 
+  select(-c(notes.x, notes.y))
+  
+proportional_employement_nona <- jobs_wages_proportional_employment %>% 
+  na.omit() %>% 
+  mutate(year = as.numeric(year))
+
+ggplot(proportional_employement_nona, aes(x = year, y = proportional_tourism_employment, color = rgn_id)) + 
+  geom_line()
+
+line_plot <- plotly::plot_ly(proportional_employement_nona, x = ~year, y = ~proportional_tourism_employment, color = ~admin_country_name.x, type = "scatter", mode = "lines")
+  layout(title = "All Regions: Proportional Employment Line Chart", 
+         xaxis = list(title = "Year"),
+         yaxis = list(title = "Number of people employed by the tourism sector"))
+
+line_plot
+
+
+penona_pivot <- proportional_employement_nona %>% 
+  pivot_longer(cols = c(proportional_tourism_employment, yearly_wages_ppp_adjusted_by_year), names_to = "metric", values_to = "value") 
+# %>% 
+  filter(admin_country_name.x %in% c("Indonesia", "Brazil", "Cyprus", "Argentina", "India"))
+
+ggplot(penona_pivot, aes(x = as.numeric(year), y = value, color = admin_country_name.x)) +
+  geom_line() + 
+  facet_wrap(~metric, scales = "free", ncol = 1)
+```
+
diff --git a/globalprep/le/v2024/livelihood_dataprep.qmd b/globalprep/le/v2024/livelihood_dataprep.qmd
new file mode 100644
index 00000000..4c703bdf
--- /dev/null
+++ b/globalprep/le/v2024/livelihood_dataprep.qmd
@@ -0,0 +1,438 @@
+---
+title: "Livelihood Dataprep: Tourism Jobs & Service Sector Wages"
+format: html
+---
+
+---
+title: "World Bank Service Earnings Data Prep"
+
+---
+
+
+#### Setup
+```{r}
+# load packages ----
+library(tidyverse) # <3 
+library(readxl) # for reading in excel data
+library(here) # for reproducible file paths
+library(janitor) # for cleaning column names
+library(countrycode) # for cleaning country codes
+
+
+# source common.R
+source(here("workflow/R/common.R"))
+
+# set file paths to data in Mazu ----
+raw_data_path <- file.path(here(dir_M, #"/home/shares/ohi"
+                                "git-annex", 
+                                "globalprep",
+                                "_raw_data"))
+
+un_wto_path <- file.path(here(raw_data_path, "UNWTO", "d2024"))
+
+service_data <- file.path(un_wto_path, "join_database_w_definitions.xlsx")
+
+un_job_data <- file.path(dir_M, "git-annex/globalprep/_raw_data/UNWTO/d2024/unwto-all-data-download_0.xlsx")
+
+```
+
+
+# Wage Data
+
+### Read in wage data from World Bank
+
+[](https://www.unwto.org/tourism-statistics/key-tourism-statistics)
+
+Downloaded June 26, 2024. Last updated (according to the website): January 31, 2024
+
+
+```{r}
+# read in .xlsx wage data from Mazu
+service_data_raw <- read_xlsx(service_data, skip = 3, col_names = TRUE)
+
+# quick view:
+head(service_data_raw)
+```
+
+The income column we're interested in is:
+Median Earnings for wage workers per month in service, local nominal currency
+
+
+First, let's tidy the data and select the columns we're interested in. Then we'll join with OHI regions to see what we're working with.
+
+```{r}
+# ---------- Tidy Data ------------------------
+service_data_clean <- service_data_raw %>% 
+  select("Country Name", "Country Code", "Region Code", "Year of survey", "Subsample",
+         "Total population", "Median Earnings for wage workers per month in service, local nominal currency") %>% 
+  janitor::clean_names() %>% 
+  mutate(year_of_survey = as.integer(year_of_survey),
+         country_name = as.factor(country_name)) %>% 
+  filter(subsample %in% c("All")) %>% # filtering to All because data also contains "Urban", "Young", etc.
+  select(-subsample)
+
+# quick look
+head(service_data_clean)
+
+# ----------- Join with OHI Regions -----------
+
+# read in OHI regions for joining
+region_names <- read_csv("https://raw.githubusercontent.com/OHI-Science/ohi-global/draft/eez/spatial/regions_list.csv") %>% 
+  janitor::clean_names()
+
+
+# prep column names for joining
+service_clean <- service_data_clean %>% 
+  rename(eez_iso3 = country_code,
+         admin_country_name = country_name)
+
+
+# join by country_code and eez_iso3
+service_join <- inner_join(x = service_clean, region_names, by = c("eez_iso3", "admin_country_name"))
+
+# note: many-to-many relationship bc. admin country name applies to multiple regions from region_names -- would need to note this in analysis etc.
+
+
+service_join
+
+
+
+# Filter out NAs for service wages
+
+service_no_na <- service_join %>% 
+  drop_na(median_earnings_for_wage_workers_per_month_in_service_local_nominal_currency)
+
+service_no_na
+```
+
+Preliminary plot of wage data
+
+```{r}
+# find most recent year of data
+service_newest <- service_join %>% 
+  group_by(admin_country_name) %>% 
+  summarize(max_year = max(year_of_survey))
+
+# filter to years >= 2013
+
+post_2013_data <- service_join %>% 
+  filter((year_of_survey) >= 2013)
+
+
+
+ggplot(data = post_2013_data) +
+  geom_line(aes(x = year_of_survey, y = median_earnings_for_wage_workers_per_month_in_service_local_nominal_currency)) +
+#  facet_wrap(~admin_country_name) +
+  theme_bw()
+
+```
+
+
+
+Adjust service wage data to be PPP USD using OECD data
+```{r}
+# read in ppp data
+oecd_data <- read_csv(file.path(here(un_wto_path, "SNA_TABLE4_27062024190625956.csv"))) %>% 
+  janitor::clean_names()
+
+```
+
+Join oecd and joined service data
+```{r}
+
+service_yearly <- service_no_na %>% 
+  mutate(median_annual_service_local_wages = median_earnings_for_wage_workers_per_month_in_service_local_nominal_currency 
+* 12) %>% 
+  select(-notes)
+  
+ppp_service_join <- left_join(service_yearly, oecd_data, by = c("year_of_survey" = "year", "eez_iso3" = "location"))
+
+
+# filter to ones we have "value" (conversion) for
+
+ppp_service <- ppp_service_join %>% 
+  drop_na(value) %>% 
+  rename(year = year_of_survey)
+
+# selecting relevant columns
+ppp_service_clean <- ppp_service %>% 
+  select(
+    # country info
+    eez_iso3, admin_country_name, rgn_name, rgn_id, admin_rgn_id,
+    # currency info
+    unit_code, unit,
+    # year
+    year,
+    # wages info
+    median_earnings_for_wage_workers_per_month_in_service_local_nominal_currency,
+    median_annual_service_local_wages,
+    # conversion per country
+    value,
+    
+    # type of PPP operation
+    transact,
+    transaction,
+    
+    # socio-demographic info
+    total_population
+  )
+
+ppp_service_clean
+
+
+
+# --------- Calculating Adjusted Wages -------------
+
+# --------- PPP GDP -------------
+# make PPP subset
+# using PPP GDP: covers both final consumption expenditure (household and government) and gross capital formation
+ppp_gdp <- ppp_service_clean %>% 
+  filter(transact %in% c("PPPGDP"))
+
+# check: number of unique countries, regions:
+unique(ppp_gdp$admin_country_name)
+cat("Number of unique countries in this subset", length(unique(ppp_gdp$admin_country_name)))
+# 37
+
+unique(ppp_gdp$rgn_name)
+cat("Number of unique regions in this subset", length(unique(ppp_gdp$rgn_name)))
+# 43
+
+
+# make new column of adjusted wages (by year by country)!
+
+# divide annual wage in local currency by PPP (local / USD, adjusted)
+
+adjusted_gdp <- ppp_gdp %>% 
+  mutate(adjusted_wages = median_annual_service_local_wages / value)
+
+
+# subset for viewing
+
+adjusted_gdp %>% select(admin_country_name, rgn_name, year, value, median_annual_service_local_wages, adjusted_wages)
+
+
+max(adjusted_gdp$adjusted_wages)
+mean(adjusted_gdp$adjusted_wages)
+
+```
+
+
+Super basic exploratory plots
+
+```{r}
+
+gdp_plot <- adjusted_gdp %>% 
+  ggplot() +
+  geom_line(aes(x = year, y = adjusted_wages,
+                color = admin_country_name)) +
+  theme_bw() +
+  labs(title = "Adjusted Service Wages using PPP GDP",
+       x = "Year",
+       y = "Adjusted Wages")
+
+gdp_plot
+```
+
+
+There is a clear, significant drop off in data points following 2016. We are still investigating as to why this is the case.
+
+
+
+```{r}
+
+gdp_year_subset <- adjusted_gdp %>% 
+  filter(year >=2013)
+
+
+gdp_subset_plot <- gdp_year_subset %>% 
+  ggplot() +
+  geom_line(aes(x = year, y = adjusted_wages,
+                color = admin_country_name)) +
+  theme_bw() +
+  labs(title = "Adjusted Service Wages using PPP GDP (2013-)",
+       x = "Year",
+       y = "Adjusted Wages")
+
+gdp_subset_plot
+```
+
+
+```{r}
+library(plotly)
+
+line_plot <- plot_ly(gdp_year_subset, x = ~as.numeric(year),
+                     y = ~adjusted_wages, color = ~admin_country_name,
+                     type = "scatter", mode = "lines") %>%
+  layout(title = "All Regions: Median Wages Line Chart",
+         xaxis = list(title = "Year"),
+         yaxis = list(title = "PPP GDP Adjusted Median Annual Wages (USD)"))
+
+line_plot
+```
+
+
+
+
+
+# Employment Data
+
+```{r}
+
+
+un_jobs_data_raw <- read_xlsx(un_job_data, col_names = TRUE, sheet = "Employment", skip = 2, na = c("", "..")) %>%
+  janitor::clean_names() 
+
+un_jobs_clean <- un_jobs_data_raw %>%
+  select(-c(1:3)) %>%
+  fill(1, .direction = "down") %>%
+  select(-c(x5)) %>% 
+  select(-c(3:6)) %>% 
+  filter(x6 %in% c("Total")) %>% 
+  select(-c(x6, x38))
+
+# Now we can pivot our data from wide to long -- get yeras in 1 column, values in another!
+un_jobs_pivot <- un_jobs_clean %>% 
+  pivot_longer(cols = 2:28,
+               names_to = "year", 
+               values_to = "thousand_jobs"
+               )
+
+
+# We still have a lot of tidying to do....
+
+# -------- Tidy ----------
+
+un_pivot_clean <- un_jobs_pivot %>% 
+  # clean country names
+  rename(country = basic_data_and_indicators) %>% 
+  mutate(country = str_to_title(country)) %>% 
+  # clean up years (still in the format of x2020)
+  mutate(year = str_remove_all(year, "x"))
+
+# using countrycode::countrycode
+# and following this example on Rpubs: https://rpubs.com/Teal_Emery/cleaning_intl_data_tips_and_tricks 
+
+country_regex_to_iso3c <- function(country_string) {
+  country_string %>%
+    countrycode::countrycode(origin = "country.name", destination = "iso3c", origin_regex = TRUE)
+}
+
+
+# adding iso3 codes
+un_pivot_clean_codes <- un_pivot_clean %>% 
+  mutate(iso3c = country_regex_to_iso3c(country))
+
+
+# read in OHI regions for joining
+region_names <- read_csv("https://raw.githubusercontent.com/OHI-Science/ohi-global/draft/eez/spatial/regions_list.csv") %>% 
+  janitor::clean_names()
+
+
+jobs_rgn_join <- left_join(region_names, un_pivot_clean_codes, by = c("eez_iso3" = "iso3c")) %>% 
+  select(-c(notes))
+
+
+# test: drop rows where thousand_jobs is na
+
+jobs_no_na <- jobs_rgn_join %>% 
+  drop_na(thousand_jobs)
+
+
+length(unique(jobs_no_na$country))
+length(unique(jobs_no_na$rgn_name))
+
+
+# drop country column
+jobs_rgn_join_ohi <- jobs_no_na %>% 
+  select(-country)
+```
+
+
+Preliminary plot
+
+```{r}
+jobs_data_final <- jobs_rgn_join_ohi %>% 
+  mutate(year = as.numeric(year))
+
+
+ggplot(jobs_data_final) +
+  geom_line(aes(x = year, y = thousand_jobs,
+                 color = rgn_name)) +
+  theme_bw() +
+  theme(legend.position = "none")
+  
+
+jobs_plot_df <- jobs_data_final %>% 
+  filter(year >= 2013) %>% 
+  mutate(year = as.integer(year))
+
+length(unique(jobs_plot_df$rgn_id))
+
+ggplot(jobs_plot_df) +
+  geom_line(aes(x = year, y = thousand_jobs,
+                 color = rgn_name)) +
+  theme_bw() +
+  labs(
+    title = "Preliminary plot of jobs (thousands) over time (2013-2021)",
+    x = "Year",
+    y = "Jobs (Thousands)"
+  ) +
+  theme(legend.position = "none")
+
+```
+
+
+Interactive `plotly` version:
+```{r}
+plotly::plot_ly(jobs_plot_df, x = ~year, y = ~thousand_jobs, color = ~admin_country_name, type = "scatter", mode = "lines") %>% 
+  layout(title = "All Regions: Number of Tourism Jobs per Country over Time", 
+         xaxis = list(title = "Year"),
+         yaxis = list(title = "Number of people working in the tourism sector"))
+```
+
+
+
+India is seriously skewing the scales of this plot -- this, among general curiosity and conversation about how we should weight the number of jobs by the size of countries led us to create a proportional employment variable.
+
+
+
+# COME BACK 
+(update names -- copied from other doc w diff variable names so this chunk won't work)
+
+
+```{r}
+
+jobs_wages_join <- left_join(jobs_rgn_2012, pop_ppp_converted, by = c("rgn_id", "year"))
+
+jobs_wages_proportional_employment <- jobs_wages_join %>% 
+  mutate(total_tourism_employees = thousand_employees*1000) %>% 
+  dplyr::relocate(total_tourism_employees, .before = total_population) %>% 
+  mutate(proportional_tourism_employment = total_tourism_employees/total_population) %>% 
+  select(-c(notes.x, notes.y))
+  
+proportional_employement_nona <- jobs_wages_proportional_employment %>% 
+  na.omit() %>% 
+  mutate(year = as.numeric(year))
+
+ggplot(proportional_employement_nona, aes(x = year, y = proportional_tourism_employment, color = rgn_id)) + 
+  geom_line()
+
+line_plot <- plotly::plot_ly(proportional_employement_nona, x = ~year, y = ~proportional_tourism_employment, color = ~admin_country_name.x, type = "scatter", mode = "lines")
+  layout(title = "All Regions: Proportional Employment Line Chart", 
+         xaxis = list(title = "Year"),
+         yaxis = list(title = "Number of people employed by the tourism sector"))
+
+line_plot
+
+
+penona_pivot <- proportional_employement_nona %>% 
+  pivot_longer(cols = c(proportional_tourism_employment, yearly_wages_ppp_adjusted_by_year), names_to = "metric", values_to = "value") 
+# %>% 
+  filter(admin_country_name.x %in% c("Indonesia", "Brazil", "Cyprus", "Argentina", "India"))
+
+ggplot(penona_pivot, aes(x = as.numeric(year), y = value, color = admin_country_name.x)) +
+  geom_line() + 
+  facet_wrap(~metric, scales = "free", ncol = 1)
+```
+
diff --git a/globalprep/le/v2024/wb_service_dataprep.Rmd b/globalprep/le/v2024/wb_service_dataprep.Rmd
index 317ce88e..0064e59b 100644
--- a/globalprep/le/v2024/wb_service_dataprep.Rmd
+++ b/globalprep/le/v2024/wb_service_dataprep.Rmd
@@ -284,21 +284,28 @@ prc_plot <- adjusted_prc %>%
   geom_line(aes(x = year, y = adjusted_wages,
                 color = admin_country_name)) +
   theme_bw() +
-  labs(title = "Adjusted Service Wages using PPP PRC")
+  labs(title = "Adjusted Service Wages using PPP PRC",
+       x = "Year",
+       y = "Adjusted Wages"
+       )
 
 gdp_plot <- adjusted_gdp %>% 
   ggplot() +
   geom_line(aes(x = year, y = adjusted_wages,
                 color = admin_country_name)) +
   theme_bw() +
-  labs(title = "Adjusted Service Wages using PPP GDP")
+  labs(title = "Adjusted Service Wages using PPP GDP",
+       x = "Year",
+       y = "Adjusted Wages")
 
 p41_plot <- adjusted_p41 %>% 
   ggplot() +
   geom_line(aes(x = year, y = adjusted_wages,
                 color = admin_country_name)) +
   theme_bw() +
-  labs(title = "Adjusted Service Wages using PPP P41")
+  labs(title = "Adjusted Service Wages using PPP P41",
+       x = "Year",
+       y = "Adjusted Wages")
 
 
 
@@ -307,3 +314,40 @@ gdp_plot
 p41_plot
 ```
 
+
+There is a clear, significant drop off in data points following 2016. We are still investigating as to why this is the case.
+
+
+
+```{r}
+
+gdp_year_subset <- adjusted_gdp %>% 
+  filter(year >=2013)
+
+
+gdp_subset_plot <- gdp_year_subset %>% 
+  ggplot() +
+  geom_line(aes(x = year, y = adjusted_wages,
+                color = admin_country_name)) +
+  theme_bw() +
+  labs(title = "Adjusted Service Wages using PPP GDP (2013-)",
+       x = "Year",
+       y = "Adjusted Wages")
+
+gdp_subset_plot
+```
+
+
+```{r}
+library(plotly)
+
+line_plot <- plot_ly(gdp_year_subset, x = ~as.numeric(year),
+                     y = ~adjusted_wages, color = ~admin_country_name,
+                     type = "scatter", mode = "lines") %>%
+  layout(title = "All Regions: Median Wages Line Chart",
+         xaxis = list(title = "Year"),
+         yaxis = list(title = "PPP GDP Adjusted Median Annual Wages (USD)"))
+
+line_plot
+```
+