forked from jbloomlab/SARS-CoV-2-RBD_DMS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
homolog_validations.Rmd
229 lines (199 loc) · 6.84 KB
/
homolog_validations.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
---
title: "Isogenic validation experiments"
output:
github_document:
html_preview: false
editor_options:
chunk_output_type: inline
---
```{r setup, message=FALSE, warning=FALSE, error=FALSE}
require("knitr")
knitr::opts_chunk$set(echo = T)
knitr::opts_chunk$set(dev.args = list(png = list(type = "cairo")))
#list of packages to install/load
packages = c("ggplot2", "data.table", "tidyverse", "dplyr", "broom", "gridExtra")
#install any packages not already installed
installed_packages <- packages %in% rownames(installed.packages())
if(any(installed_packages == F)){
install.packages(packages[!installed_packages])
}
#load packages
invisible(lapply(packages, library, character.only=T))
#make results directory
if(!file.exists("results")){
dir.create(file.path("results"))
}
```
## Experiment: isogenic validation of yeast-display RBD mutants and homologs
**2020-05-01**
WT SARS-CoV-2 Spike RBD was validated in triplicate.
**2020-05-08**
WT SARS-CoV-2 Spike RBD and 6 homologs were validated in isogenic experiments.
### Read in data table with mean bin at each concentration
```{r read_input}
dt <- read.csv(file="homolog_validations.csv", stringsAsFactors=F)
```
### Duplicate rows that correspond to May01 wild type triplicate experiment so we can fit models to the pooled replicates
```{r prep_triplicate_WT}
wildtype <- dt %>%
filter(expt=="200501") %>%
mutate(replicate = "pooled",
titration = "wt_pooled")
dt <- dt %>% rbind(wildtype)
print(nrow(dt))
```
### Calculate log-mean `geomean_FITC` and `FITC+` for each titration
```{r calculate_expression_values}
dt <- dt %>%
group_by(titration) %>%
mutate(mean_FITCpos = mean(FITCpos),
stderr_FITCpos = sd(FITCpos)/sqrt(length(FITCpos)),
log_geomean_FITC = log(geomean_FITC),
mean_logMFI_FITC = mean(log(geomean_FITC)),
stderr_logMFI_FITC = sd(log(geomean_FITC))/sqrt(length(geomean_FITC))
) %>%
ungroup()
head(dt, n=5)
```
### Use `broom` to get the results from fitting `nls` model by group
```{r fit_titrations}
nls_broom <- dt %>%
group_by(titration) %>%
do(tidy(nls(mean_bin ~ a*(conc_M/(conc_M+Kd))+b,
data=.,
start=list(a=3,b=1,Kd=1e-10),
lower=list(a=2,b=1,Kd=1e-15),
upper=list(a=3,b=1.5,Kd=1e-5),
algorithm="port"
)
)
)
dt <- dt %>%
merge(nls_broom %>%
filter(term=="Kd") %>%
select(estimate, std.error) %>%
rename(Kd="estimate",
Kd_SE="std.error"), by="titration")
head(dt, n=5)
```
### Write summary table to CSV file
```{r write_output_table}
isogenic_titrations_summary <- dt %>%
select(expt, titration, genotype, replicate, Kd, Kd_SE, mean_FITCpos, stderr_FITCpos, mean_logMFI_FITC, stderr_logMFI_FITC) %>%
unique()
isogenic_titrations_summary
write.csv(isogenic_titrations_summary,"./results/isogenic_titrations_summary.csv", row.names = FALSE)
```
### Now predict `mean_bin` using the models
```{r predict_y}
conc_M = c(1:20 %o% 10^(-13:-7)) # this should only generate 120 estimates per titration (faster!)
nls_predictions <- dt %>%
select(titration, expt, genotype, replicate) %>%
merge(nls_broom %>%
select(-statistic, -p.value, -std.error) %>%
spread(term, estimate),
by="titration") %>%
unique() %>%
merge(dt %>% select(titration, Kd_SE) %>% unique(), by="titration") %>%
merge(as.data.frame(conc_M), all=TRUE) %>%
mutate(mean_bin = a*(conc_M/(conc_M+Kd))+b)
head(nls_predictions, n=5)
```
### Make plots for titration curves for May08 homolog experiment
```{r titrations_plot_homolog_panel, fig.width=8,fig.height=8,fig.align="center", dpi=500,dev="png",message=FALSE,error=FALSE,warning=FALSE}
annotations <- dt %>%
filter(expt != "200501") %>%
select(titration, genotype, expt, replicate, Kd, Kd_SE) %>%
unique() %>%
remove_rownames()
ggplot(dt %>% filter(expt != "200501"), aes(conc_M, mean_bin)) +
geom_point() +
geom_line(data = nls_predictions %>% filter(expt != "200501"),
aes(conc_M, mean_bin),
color="red") +
scale_x_log10(lim=c(2e-14,2e-07)) +
xlab("ACE2 (M)") +
ylab("mean bin") +
facet_wrap(~ genotype) +
geom_text(
data = annotations,
mapping = aes(x = 2.5e-12,
y = 3.75,
label = c(paste(
"Kd=", format(Kd, digits=2),
"+/-", format(Kd_SE, digits=1), "M"))),
size=3) +
theme_bw()
ggsave(
"./results/homolog_titration.pdf",
scale = 1,
width = NA,
height = NA,
useDingbats=F
)
```
### Make plots just for May01 wildtype triplicate titrations
```{r wildtype_triplicate_titrations_plot, fig.width=5,fig.height=5,fig.align="center", dpi=500,dev="png",message=FALSE,error=FALSE,warning=FALSE}
annotations <- dt %>%
filter(expt == "200501") %>%
select(titration, genotype, expt, replicate, Kd, Kd_SE) %>%
unique() %>%
remove_rownames()
ggplot(dt %>% filter(expt == "200501"), aes(conc_M, mean_bin)) +
geom_point() +
geom_line(data = nls_predictions %>% filter(expt == "200501"),
aes(conc_M, mean_bin),
color="red") +
scale_x_log10(lim=c(2e-14,2e-07)) +
xlab("ACE2 (M)") +
ylab("mean bin") +
facet_wrap(~ replicate) + # this time facet on replicate
geom_text(
data = annotations,
mapping = aes(x = 2.5e-12,
y = 3.75,
label = c(paste(
"Kd=", format(Kd, digits=2),
"+/-", format(Kd_SE, digits=1), "M"))),
size=3) +
ggtitle("Wild Type SARS-CoV-2") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
ggsave(
"./results/wildtype_triplicate_titration.pdf",
scale = 1,
width = NA,
height = NA,
useDingbats = F
)
```
### Plot FITC+ and geomean_FITC by genotype
We will ignore the original WT isogenic titration experiment (May01) because it is not really possible to compare across experiments due to variability with induction, etc.
```{r expression_plots, fig.width=8,fig.height=5,fig.align="center", dpi=500,dev="png",message=FALSE,error=FALSE,warning=FALSE}
p1 <- ggplot(dt %>% filter(expt!="200501"), aes(genotype, FITCpos)) +
geom_boxplot() +
geom_point() +
scale_y_continuous(lim=c(0, 100)) +
ylab("% FITC+") +
facet_wrap(~ expt, scales="free_x") +
theme_bw() +
theme(text = element_text(size=12),
axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
p2 <- ggplot(dt %>% filter(expt!="200501"), aes(genotype, log_geomean_FITC)) +
geom_boxplot() +
geom_point() +
ylab("mean FITC+ log-intensity") +
facet_wrap(~ expt, scales="free_x") +
theme_bw() +
theme(text = element_text(size=12),
axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
grid.arrange(p1, p2, ncol=2, widths=c(8,8), heights=c(6))
g <- arrangeGrob(p1, p2, ncol=2, widths=c(8,8), heights=c(6))
ggsave(
"./results/homolog_FITC_expression.pdf",
g,
scale = 1,
width = NA,
height = NA
)
```