forked from jbloomlab/SARS-CoV-2-RBD_DMS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
antibody_epitopes.Rmd
359 lines (274 loc) · 26.8 KB
/
antibody_epitopes.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
---
title: "Mutational tolerance in antibody epitopes"
author: "Tyler Starr"
date: "5/12/2020"
output:
github_document:
toc: true
html_preview: false
editor_options:
chunk_output_type: inline
---
This notebook analyzes the mutational tolerance of residues within epitopes of different monoclonal antibodies. I suggest opening the PyMol session in the repo at `data/structures/surface_constraint_features.pse` when browsing this notebook, as I think the structures loaded in this session communicate a lot about the variation in these epitopes and their structural relationship. (And a few times, I'll allude to points where I feel like the plots are capturing what I can *see* in this structural representation...)
## Setup
```{r setup, message=FALSE, warning=FALSE, error=FALSE}
require("knitr")
knitr::opts_chunk$set(echo = T)
knitr::opts_chunk$set(dev.args = list(png = list(type = "cairo")))
#list of packages to install/load
packages = c("yaml","data.table","tidyverse","gridExtra","bio3d")
#install any packages not already installed
installed_packages <- packages %in% rownames(installed.packages())
if(any(installed_packages == F)){
install.packages(packages[!installed_packages])
}
#load packages
invisible(lapply(packages, library, character.only=T))
#read in config file
config <- read_yaml("config.yaml")
#read in file giving concordance between RBD numbering and SARS-CoV-2 Spike numbering
RBD_sites <- data.table(read.csv(file="data/RBD_sites.csv",stringsAsFactors=F))
#make output directory
if(!file.exists(config$antibody_epitopes_dir)){
dir.create(file.path(config$antibody_epitopes_dir))
}
```
Session info for reproducing environment:
```{r print_sessionInfo}
sessionInfo()
```
Read in tables of variant effects on binding and expression for single mutations to the SARS-CoV-2 RBD and for homolog RBDs.
```{r read_data}
homologs <- data.table(read.csv(file=config$homolog_effects_file,stringsAsFactors = F))
mutants <- data.table(read.csv(file=config$single_mut_effects_file,stringsAsFactors = F))
#rename mutants site indices to prevent shared names with RBD_sites, simplifying some downstream calculations that cross-index these tables
setnames(mutants, "site_RBD", "RBD_site");setnames(mutants, "site_SARS2", "SARS2_site")
#add mean, max, min mut effects per site annotations
RBD_sites[,mean_bind := mean(mutants[SARS2_site==site_SARS2 & wildtype != mutant & mutant != "*",bind_avg],na.rm=T),by=site_SARS2]
RBD_sites[,max_bind := max(mutants[SARS2_site==site_SARS2 & wildtype != mutant & mutant != "*",bind_avg],na.rm=T),by=site_SARS2]
RBD_sites[,min_bind := min(mutants[SARS2_site==site_SARS2 & wildtype != mutant & mutant != "*",bind_avg],na.rm=T),by=site_SARS2]
RBD_sites[,mean_expr := mean(mutants[SARS2_site==site_SARS2 & wildtype != mutant & mutant != "*",expr_avg],na.rm=T),by=site_SARS2]
RBD_sites[,max_expr := max(mutants[SARS2_site==site_SARS2 & wildtype != mutant & mutant != "*",expr_avg],na.rm=T),by=site_SARS2]
RBD_sites[,min_expr := min(mutants[SARS2_site==site_SARS2 & wildtype != mutant & mutant != "*",expr_avg],na.rm=T),by=site_SARS2]
```
## Compare mutational tolerance within antibody epitopes
We have annotated antibody epitope residues for eight mAbs with published structures -- seven of the eight mAbs were raised against SARS-CoV-1, but at least three of them cross-react with SARS-CoV-2 (CR3022, VHH72, S309), and more generally, they highlight the types of epitopes that SARS-related CoV RBDs can induce. The seventh, B38, was isolated from a SARS-CoV-2 convalescent patient.
Let's compare patterns of mutational sensitivity within each of the mAb epitopes, and compare to ACE2 contact residues for reference. Below, we output jitter plots that illustrate the range of individual mutational effects within each epitope (black points). The red diamond indicates the median effect of mutations within each epitope. We also output jiitter plots illustrating the *maximum* effect of any of the 19 mutations within each antibody epitope -- this reflects the most extreme form of constraint, where we see if a position has *any* mutations that are tolerated.
```{r violin_plot_epitope_mut_effects, fig.width=9, fig.height=6, fig.align="center",dpi=300,dev="png",echo=F}
#output long data table of mutants with a new column of "epitope" to facet by in violin plots (needed to do this way because some sites are duplicated among multiple epitopes)
mutants_ACE2 <- mutants[SARS2_site %in% RBD_sites[SARS2_ACE2_contact==T,site_SARS2],];mutants_ACE2$epitope <- "ACE2"
mutants_B38 <- mutants[SARS2_site %in% RBD_sites[epitope_B38==T,site_SARS2],];mutants_B38$epitope <- "B38"
mutants_80R <- mutants[SARS2_site %in% RBD_sites[epitope_80R==T,site_SARS2],];mutants_80R$epitope <- "80R"
mutants_m396 <- mutants[SARS2_site %in% RBD_sites[epitope_m396==T,site_SARS2],];mutants_m396$epitope <- "m396"
mutants_F26G19 <- mutants[SARS2_site %in% RBD_sites[epitope_F26G19==T,site_SARS2],];mutants_F26G19$epitope <- "F26G19"
mutants_S230 <- mutants[SARS2_site %in% RBD_sites[epitope_S230==T,site_SARS2],];mutants_S230$epitope <- "S230"
mutants_VHH72 <- mutants[SARS2_site %in% RBD_sites[epitope_VHH72==T,site_SARS2],];mutants_VHH72$epitope <- "VHH72"
mutants_CR3022 <- mutants[SARS2_site %in% RBD_sites[epitope_CR3022==T,site_SARS2],];mutants_CR3022$epitope <- "CR3022"
mutants_S309 <- mutants[SARS2_site %in% RBD_sites[epitope_S309==T,site_SARS2],];mutants_S309$epitope <- "S309"
mutants_epitope <- rbind(mutants_ACE2,mutants_B38,mutants_80R,mutants_m396,mutants_F26G19,mutants_S230,mutants_VHH72,mutants_CR3022, mutants_S309);mutants_epitope$epitope <- factor(mutants_epitope$epitope,levels=c("ACE2","B38","80R","m396","F26G19","S230","VHH72","CR3022","S309"))
#output long data table of sites with a new column of "epitope" to facet by in violin plots for max stat
sites_ACE2 <- RBD_sites[SARS2_ACE2_contact==T,]; sites_ACE2$epitope <- "ACE2"
sites_B38 <- RBD_sites[epitope_B38==T,]; sites_B38$epitope <- "B38"
sites_80R <- RBD_sites[epitope_80R==T,]; sites_80R$epitope <- "80R"
sites_m396 <- RBD_sites[epitope_m396==T,]; sites_m396$epitope <- "m396"
sites_F26G19 <- RBD_sites[epitope_F26G19==T,]; sites_F26G19$epitope <- "F26G19"
sites_S230 <- RBD_sites[epitope_S230==T,]; sites_S230$epitope <- "S230"
sites_VHH72 <- RBD_sites[epitope_VHH72==T,]; sites_VHH72$epitope <- "VHH72"
sites_CR3022 <- RBD_sites[epitope_CR3022==T,]; sites_CR3022$epitope <- "CR3022"
sites_S309 <- RBD_sites[epitope_S309==T,]; sites_S309$epitope <- "S309"
sites_epitope <- rbind(sites_ACE2,sites_B38,sites_80R,sites_m396,sites_F26G19,sites_S230,sites_VHH72,sites_CR3022,sites_S309);sites_epitope$epitope <- factor(sites_epitope$epitope,levels=c("ACE2","B38","80R","m396","F26G19","S230","VHH72","CR3022","S309"))
set.seed(198)
p1 <- ggplot(mutants_epitope[!is.na(bind_avg),],aes(x=epitope,y=bind_avg))+
geom_jitter(width=0.2, alpha=0.2)+stat_summary(fun.y=median,geom="point",size=3,color="red",shape=18)+
xlab("epitope")+ylab("delta_log10Ka,app (binding)")+
theme_classic()
p2 <- ggplot(mutants_epitope[!is.na(expr_avg),],aes(x=epitope,y=expr_avg))+
geom_jitter(width=0.2, alpha=0.2)+stat_summary(fun.y=median,geom="point",size=3,color="red",shape=18)+
xlab("epitope")+ylab("delta_meanF (expression)")+
theme_classic()
p3 <- ggplot(sites_epitope[!is.na(max_bind),],aes(x=epitope,y=max_bind))+
geom_jitter(width=0.2, alpha=0.5)+stat_summary(fun.y=median,geom="point",size=3,color="red",shape=18)+
xlab("epitope")+ylab("maximum per-site delta_log10Ka,app (binding)")+
theme_classic()
p4 <- ggplot(sites_epitope[!is.na(max_expr),],aes(x=epitope,y=max_expr))+
geom_jitter(width=0.2, alpha=0.5)+stat_summary(fun.y=median,geom="point",size=3,color="red",shape=18)+
xlab("epitope")+ylab("maximum per-site delta_meanF (expression)")+
theme_classic()
grid.arrange(p1,p2,p3,p4,ncol=2)
invisible(dev.print(pdf, paste(config$antibody_epitopes_dir,"/distribution-mut-effect_per_epitope.pdf",sep="")))
```
Integrate these plots onto a single scatter plot, showing the median mutational effect on binding versus expression for residues in each antibody epitope
```{r scatterplot_epitope_mut_effects, fig.width=4.5, fig.height=4.5, fig.align="center",dpi=300,dev="png",echo=F}
x <- c(); for(i in unique(mutants_epitope$epitope)){x <- c(x, median(mutants_epitope[epitope==i,expr_avg],na.rm=T))}
y <- c(); for(i in unique(mutants_epitope$epitope)){y <- c(y, median(mutants_epitope[epitope==i,bind_avg],na.rm=T))}
x.sd <- c(); for(i in unique(mutants_epitope$epitope)){x.sd <- c(x.sd, sd(mutants_epitope[epitope==i,expr_avg],na.rm=T))}
y.sd <- c(); for(i in unique(mutants_epitope$epitope)){y.sd <- c(y.sd, sd(mutants_epitope[epitope==i,bind_avg],na.rm=T))}
plot(x,y,xlab="mean mutational effect on expression",ylab="mean mutational effect on binding",pch=16,cex=1.3,col="#00000090",ylim=c(-1,0),xlim=c(-0.6,0))
# #error bars for SD
# arrows(x0=x-x.sd, y0=y,x1=x+x.sd, y1=y,code=3,angle=90,length=0.02)
# arrows(x0=x, y0=y-y.sd,x1=x,y1=y+y.sd,code=3,angle=90,length=0.02)
invisible(dev.print(pdf, paste(config$antibody_epitopes_dir,"/scatter_epitopes_mean-bind-v-mean-expr.pdf",sep="")))
```
We can see for the first 5 antibodies, all of which bind epitopes mostly or fully within the RBM, that epitope sites exhibit mutational constraint with regards to binding (though not as much constraint as on the ACE2-contact residues themselves). There is *perhaps* some visual variation among antibody epitopes in the severity of the average mutational effect to epitope contact positions. (However, statistically, there is not variation in the median effect of mutations on binding in these five epitopes, Kruskal-Wallis ANOVA P-value `r round(kruskal.test(mutants_epitope[epitope %in% c("B38", "80R", "m396", "F26G19", "S230"),bind_avg] ~ mutants_epitope[epitope %in% c("B38", "80R", "m396", "F26G19", "S230"),epitope])$p.value,digits=3)`). The average mutation in these RBM motif epitope sites incurs a ~0.5-0.6 reduction in log<sub>10</sub>(*K*<sub>A,app</sub>) ACE2-binding affinity, which is likely meaningful (more extreme than SARS-CoV-1 reduction in affinity (0.25 log10Ka units), on par with LYRa11 (0.5 log10Ka units) which can still promote huACE2-mediated cellular entry, but with reduced quantitative in vitro infectivity according to Letko et al. 2020).
As we can see in the structural alignment of mAbs bound to the RBD in the PyMol session file I point to at the beginning of this notebook, these mAbs all clue into a couple "patches" of constrained residues in the RBM. 80R and B38 both bind to two patches at the direct ACE2 contact interface that are mutationally constrained, centered around residues ~Y489 and ~Y505. (These two mAbs also seem to have the most contact residues, suggesting they achieve the most "engagement" compared to other mAbs, apparently without sacrificing by engaging lots of mutationally tolerant sites.) S230 engages just the ~Y489 patch. F26G19 and m396, on the other hand, focus on this ~Y505 lobe, and continue to follow two channels of mutational constraint down the "side" of the RBD, toward ~N437 and ~G404, respectively. These two "modes" of RBM recognition do seem to occupy the major patches of mutational constraint in this region, suggesting refining the approaches of these two types of RBM-directed mAbs might be fruitful.
The other three antibodies, VHH72, CR3022, and S309, bind epitopes within the "core RBD", meaning mutational constraint on expression might be more relevant -- and we do see that mutations in these epitopes suffer expression defects, with an average mutation defect of ~0.5 log-MFI units, which is substantial relative to the tight range of expression seen among our panel of RBD homologs. We can see that much of the core RBD surface is constrained with respect to mutational effects on expression, along a lateral "belt" around the middle of the core RBD, though many of the more mutationally sensitive positions are buried in little 'crevices' on the RBD surface, compared to the more mutationally tolerant knobs that jut out. CR3022 does hone in one of these more obvious expression-constrained 'patches', centered around residue ~Y380. VHH72 binds to a seemingly less "red" patch just to the side of CR3022, centered around ~S375. S309 engages a partially constrained patch near the N343 glycan, on the opposite "face" of the RBD compared to the other 7 mAbs. This face faces outward in the "down" RBD conformation in the full spike trimer structure -- whereas the main face is actually buried in this structure, but becomes exposed when the RBD samples the "up" conformation.. Our structural analysis points to another patch of mutational constraint on this S309-side "exposed" surface, for which we have *not* seen any mAbs described (though perhaps we should look?), centered around E465. This region contacts the NTD/S<sup>A</sup> domain in the closed full spike quaternary context (but is exposed in the RBD-up conformation), which could enforce additional mutational constraint on this hypothetical epitope. Could be intersting to look more into!
For a more high-resolution look at the effects of particular mutations within each antibody epitope, we can visualize heatmaps of the sites that constitute each antibody's epitope. (Not sure there's much interesting here, haven't looked in detail but keeping it here for now anyway.) We visualize mutational effects on *binding* for the RBM-directed mAbs, and *expression* for the core RBD mAbs.
```{r heatmaps_epitope_mut_effects_RBM_mAbs, fig.width=16, fig.height=12, fig.align="center",dpi=300,dev="png",echo=F}
mutants_epitope$mutant <- factor(mutants_epitope$mutant, levels=c("*","C","P","G","V","M","L","I","A","F","W","Y","T","S","N","Q","E","D","H","K","R"))
#add character vector indicating wildtype to use as plotting symbols for wt
mutants_epitope[,wildtype_indicator := ""]
mutants_epitope[mutant==wildtype,wildtype_indicator := "x"]
mutants_epitope$SARS2_site <- as.factor(mutants_epitope$SARS2_site)
p1 <- ggplot(mutants_epitope[mutant!="*" & epitope=="ACE2",],aes(SARS2_site,mutant))+geom_tile(aes(fill=bind_avg))+
scale_fill_gradientn(colours=c("#A94E35","#F48365","#FFFFFF","#7378B9","#383C6C"),limits=c(-5,0.5),values=c(0,2.5/5.5,5/5.5,5.25/5.5,5.5/5.5),na.value="gray")+
scale_x_discrete(expand=c(0,0))+
labs(x="ACE2 contact site",y="mutant")+theme_classic(base_size=9)+
coord_equal()+theme(axis.text.x = element_text(angle=45,hjust=1))+
geom_text(aes(label=wildtype_indicator),size=2,color="gray10")
p2 <- ggplot(mutants_epitope[mutant!="*" & epitope=="B38",],aes(SARS2_site,mutant))+geom_tile(aes(fill=bind_avg))+
scale_fill_gradientn(colours=c("#A94E35","#F48365","#FFFFFF","#7378B9","#383C6C"),limits=c(-5,0.5),values=c(0,2.5/5.5,5/5.5,5.25/5.5,5.5/5.5),na.value="gray")+
scale_x_discrete(expand=c(0,0))+
labs(x="B38 epitope site",y="mutant")+theme_classic(base_size=9)+
coord_equal()+theme(axis.text.x = element_text(angle=45,hjust=1))+
geom_text(aes(label=wildtype_indicator),size=2,color="gray10")
p3 <- ggplot(mutants_epitope[mutant!="*" & epitope=="80R",],aes(SARS2_site,mutant))+geom_tile(aes(fill=bind_avg))+
scale_fill_gradientn(colours=c("#A94E35","#F48365","#FFFFFF","#7378B9","#383C6C"),limits=c(-5,0.5),values=c(0,2.5/5.5,5/5.5,5.25/5.5,5.5/5.5),na.value="gray")+
scale_x_discrete(expand=c(0,0))+
labs(x="80R epitope site",y="mutant")+theme_classic(base_size=9)+
coord_equal()+theme(axis.text.x = element_text(angle=45,hjust=1))+
geom_text(aes(label=wildtype_indicator),size=2,color="gray10")
p4 <- ggplot(mutants_epitope[mutant!="*" & epitope=="m396",],aes(SARS2_site,mutant))+geom_tile(aes(fill=bind_avg))+
scale_fill_gradientn(colours=c("#A94E35","#F48365","#FFFFFF","#7378B9","#383C6C"),limits=c(-5,0.5),values=c(0,2.5/5.5,5/5.5,5.25/5.5,5.5/5.5),na.value="gray")+
scale_x_discrete(expand=c(0,0))+
labs(x="m396 epitope site",y="mutant")+theme_classic(base_size=9)+
coord_equal()+theme(axis.text.x = element_text(angle=45,hjust=1))+
geom_text(aes(label=wildtype_indicator),size=2,color="gray10")
p5 <- ggplot(mutants_epitope[mutant!="*" & epitope=="F26G19",],aes(SARS2_site,mutant))+geom_tile(aes(fill=bind_avg))+
scale_fill_gradientn(colours=c("#A94E35","#F48365","#FFFFFF","#7378B9","#383C6C"),limits=c(-5,0.5),values=c(0,2.5/5.5,5/5.5,5.25/5.5,5.5/5.5),na.value="gray")+
scale_x_discrete(expand=c(0,0))+
labs(x="F26G19 epitope site",y="mutant")+theme_classic(base_size=9)+
coord_equal()+theme(axis.text.x = element_text(angle=45,hjust=1))+
geom_text(aes(label=wildtype_indicator),size=2,color="gray10")
p6 <- ggplot(mutants_epitope[mutant!="*" & epitope=="S230",],aes(SARS2_site,mutant))+geom_tile(aes(fill=bind_avg))+
scale_fill_gradientn(colours=c("#A94E35","#F48365","#FFFFFF","#7378B9","#383C6C"),limits=c(-5,0.5),values=c(0,2.5/5.5,5/5.5,5.25/5.5,5.5/5.5),na.value="gray")+
scale_x_discrete(expand=c(0,0))+
labs(x="S230 epitope site",y="mutant")+theme_classic(base_size=9)+
coord_equal()+theme(axis.text.x = element_text(angle=45,hjust=1))+
geom_text(aes(label=wildtype_indicator),size=2,color="gray10")
grid.arrange(p1,p2,p3,p4,p5,p6,nrow=2,widths=c(1.2,1.7,1.3),heights=c(1,1))
```
```{r heatmaps_epitope_mut_effects_core_mAbs, fig.width=9, fig.height=4, fig.align="center",dpi=300,dev="png",echo=F}
p1 <- ggplot(mutants_epitope[mutant!="*" & epitope=="CR3022",],aes(SARS2_site,mutant))+geom_tile(aes(fill=expr_avg))+
scale_fill_gradientn(colours=c("#A94E35","#F48365","#FFFFFF","#7378B9","#383C6C"),limits=c(-5,1),values=c(0,2.5/6,5/6,5.5/6,6/6),na.value="gray")+
scale_x_discrete(expand=c(0,0))+
labs(x="CR3022 epitope site",y="mutant")+theme_classic(base_size=9)+
coord_equal()+theme(axis.text.x = element_text(angle=45,hjust=1))+
geom_text(aes(label=wildtype_indicator),size=2,color="gray10")
p2 <- ggplot(mutants_epitope[mutant!="*" & epitope=="VHH72",],aes(SARS2_site,mutant))+geom_tile(aes(fill=expr_avg))+
scale_fill_gradientn(colours=c("#A94E35","#F48365","#FFFFFF","#7378B9","#383C6C"),limits=c(-5,1),values=c(0,2.5/6,5/6,5.5/6,6/6),na.value="gray")+
scale_x_discrete(expand=c(0,0))+
labs(x="VHH72 epitope site",y="mutant")+theme_classic(base_size=9)+
coord_equal()+theme(axis.text.x = element_text(angle=45,hjust=1))+
geom_text(aes(label=wildtype_indicator),size=2,color="gray10")
p3 <- ggplot(mutants_epitope[mutant!="*" & epitope=="S309",],aes(SARS2_site,mutant))+geom_tile(aes(fill=expr_avg))+
scale_fill_gradientn(colours=c("#A94E35","#F48365","#FFFFFF","#7378B9","#383C6C"),limits=c(-5,1),values=c(0,2.5/6,5/6,5.5/6,6/6),na.value="gray")+
scale_x_discrete(expand=c(0,0))+
labs(x="S309 epitope site",y="mutant")+theme_classic(base_size=9)+
coord_equal()+theme(axis.text.x = element_text(angle=45,hjust=1))+
geom_text(aes(label=wildtype_indicator),size=2,color="gray10")
grid.arrange(p1,p2,p3,nrow=1)
```
To put constraint on epitopes in context, let's look at the fraction of mutations within each epitope that are compatible with two levels of affinity -- that of SARS-CoV-1, the lowest known affinity capable of mediating human infectivity, and LYRa11, which can promote huACE2-mediated cellular infection in cell culture, though at reduced infectivity compared to e.g. SARS-CoV-1 RBD. Finally, we output the fraction of mutations within each epitope that have an expression effect of >-0.2, a somewhat arbitrary value (the range of expression phenotypes was ~0.18, but all higher than our SARS-CoV-2 wildtype).
```{r table affinity cutoffs, echo=F}
epitopes_table <- data.frame(epitope=unique(mutants_epitope$epitope))
for(i in 1:nrow(epitopes_table)){
epitopes_table$median_bind[i] <- median(mutants_epitope[epitope==epitopes_table$epitope[i] & mutant!=wildtype & mutant!="*",bind_avg],na.rm=T)
epitopes_table$median_expr[i] <- median(mutants_epitope[epitope==epitopes_table$epitope[i] & mutant!=wildtype & mutant!="*",expr_avg],na.rm=T)
epitopes_table$frac_SARS_CoV_1[i] <- nrow(mutants_epitope[epitope==epitopes_table$epitope[i] & mutant!=wildtype & mutant!="*" & bind_avg > homologs[homolog=="SARS-CoV-1",bind_avg],])/nrow(mutants_epitope[epitope==epitopes_table$epitope[i] & mutant!=wildtype & mutant!="*",])
epitopes_table$frac_LYRa11[i] <- nrow(mutants_epitope[epitope==epitopes_table$epitope[i] & mutant!=wildtype & mutant!="*" & bind_avg > homologs[homolog=="LYRa11",bind_avg],])/nrow(mutants_epitope[epitope==epitopes_table$epitope[i] & mutant!=wildtype & mutant!="*",])
epitopes_table$frac_expr_0.2[i] <- nrow(mutants_epitope[epitope==epitopes_table$epitope[i] & mutant!=wildtype & mutant!="*" & expr_avg > -0.2,])/nrow(mutants_epitope[epitope==epitopes_table$epitope[i] & mutant!=wildtype & mutant!="*",])
}
kable(epitopes_table, digits=2, col.names=c("epitope","median delta_log<sub>10</sub>(*K*<sub>A,app</sub>)","median delta_log-fluorescence","fraction muts > SARS-CoV-1 affinity","fraction muts > LYRa11 affinity", "fraction muts > -0.2 expression effect"))
```
## Comparison to natural sequence diversity
Let's compare our mutational constraint on antibody epitopes to natural diversity in different antibody epitopes from an alignment of sarbecovirus RBDs. We previously calculated the effective number of amino acids (Neff) at each site in an alignment of RBD amino acid sequences from across the sarbecovirus clade (noting that many of these sequences are so-called "Clade 2" sequences, which have not been shown to bind human or any other ACE2 -- so whether they evolve under constraint for ACE2-binding, at this point, is unclear. Bat ACE2 is also under elevated positive selection, so these Clade 2 sequences could be adapted to "odd" ACE2s within bat hosts, or who knows what...). We then compute the entropy of each alignment position, and compare the site-wise entropy/number of effective amino acids (N<sub>eff</sub>) of sites in each antibody epitope.
We see that epitopes exhibit the least natural sequence variation for the core-RBD mAbs, VHH72 and CR3022. Among the RBM-directed mAbs, 80R seems to bind sites that are more variable across the sarbecovirus clade.
```{r natural_diversity_epitopes, fig.width=4.5, fig.height=3, fig.align="center",dpi=300,dev="png",echo=F}
#remake sites_epitope with new column
sites_ACE2 <- RBD_sites[SARS2_ACE2_contact==T,]; sites_ACE2$epitope <- "ACE2"
sites_B38 <- RBD_sites[epitope_B38==T,]; sites_B38$epitope <- "B38"
sites_80R <- RBD_sites[epitope_80R==T,]; sites_80R$epitope <- "80R"
sites_m396 <- RBD_sites[epitope_m396==T,]; sites_m396$epitope <- "m396"
sites_F26G19 <- RBD_sites[epitope_F26G19==T,]; sites_F26G19$epitope <- "F26G19"
sites_S230 <- RBD_sites[epitope_S230==T,]; sites_S230$epitope <- "S230"
sites_VHH72 <- RBD_sites[epitope_VHH72==T,]; sites_VHH72$epitope <- "VHH72"
sites_CR3022 <- RBD_sites[epitope_CR3022==T,]; sites_CR3022$epitope <- "CR3022"
sites_S309 <- RBD_sites[epitope_S309==T,]; sites_S309$epitope <- "S309"
sites_hypothetical_epitope <- RBD_sites[site_SARS2 %in% c(353,355,426,457,462,463,464,465,466,467),]; sites_hypothetical_epitope$epitope <- "candidate"
sites_epitope <- rbind(sites_ACE2,sites_B38,sites_80R,sites_m396,sites_F26G19,sites_S230,sites_VHH72,sites_CR3022,sites_S309,sites_hypothetical_epitope);sites_epitope$epitope <- factor(sites_epitope$epitope,levels=c("ACE2","B38","80R","m396","F26G19","S230","VHH72","CR3022","S309","candidate"))
ggplot(sites_epitope,aes(x=epitope,y=Neff))+
geom_jitter(width=0.2, alpha=0.5)+stat_summary(fun.y=median,geom="point",size=3,color="red",shape=18)+theme_classic()+
xlab("epitope")+ylab("N_eff per epitope site")+theme(axis.text.x=element_text(angle=45,hjust=1))
invisible(dev.print(pdf, paste(config$antibody_epitopes_dir,"/distribution-Neff_per_epitope.pdf",sep="")))
```
## in vitro escape mutations
One recent SARS-CoV-2 antibody paper, by [Baum et al.](https://science.sciencemag.org/content/early/2020/06/15/science.abd0831), identified amino-acid mutations that enable escape of monoclonal antibodies (or Ab cocktails if epitopes are overlapping). A previous study in SARS-CoV-1 by [Rockx et al.](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2826557/) also identified escape mutations.
Baum et al. identified the following escape mutations in SARS-CoV-2 across their different selections:
```
K417E
K444Q
V445A
N450D
Y453F
L455F
E484K
G485D
F486V
F490P
Q493K
```
Rockx et al. identified the following escape mutations in SARS-CoV-1 (amino acids as in the original study, but I have changed the residue numberings to be the residue number within the SARS-CoV-2 RBD):
```
P475A
P475H
D476G
L456R
D494Y
F473C
T345I
K403Q
K403E
```
What is the mutational tolerance of these positions?
```{r escape_mut_tol_per_mut, fig.width=4.5, fig.height=4, fig.align="center",dpi=300,dev="png",echo=F}
Baum_muts <- c("K417E","K444Q","V445A","N450D","Y453F","L455F","E484K","G485D","F486V","F490P","Q493K")
Baum_sites <- c(417,444,445,450,453,455,484,485,486,490,493)
#prep long form table giving mean effect per site for all RBD residues, RBM residues, ACE2 contact residues, and sites of Ab escape
RBD_sites_all <- RBD_sites; RBD_sites_all$factor <- "all sites"
RBD_sites_RBM <- RBD_sites[RBM==T,]; RBD_sites_RBM$factor <- "all RBM sites"
RBD_sites_ACE2contact <- RBD_sites[SARS2_ACE2_contact==T,]; RBD_sites_ACE2contact$factor <- "ACE2 contacts"
RBD_sites_escape <- RBD_sites[site_SARS2 %in% Baum_sites,]; RBD_sites_escape$factor <- "site of Ab escape"
dt <- rbind(RBD_sites_all,RBD_sites_RBM,RBD_sites_ACE2contact,RBD_sites_escape);dt$factor <- factor(dt$factor,levels=c("all sites","all RBM sites","ACE2 contacts","site of Ab escape"))
set.seed(1990)
ggplot(dt,aes(x=factor,y=mean_bind))+
geom_jitter(width=0.15, alpha=0.5)+stat_summary(fun.y=median,geom="point",size=3,color="red",shape=18)+theme_classic()+
xlab("sites")+ylab("mean binding effect per site")+theme(axis.text.x=element_text(angle=90,hjust=1))
invisible(dev.print(pdf, paste(config$antibody_epitopes_dir,"/jitterplots_escape-muts_sites.pdf",sep="")))
```
What is the mutational tolerance of these mutations in particular?
```{r escape_mut_tol_per_site, fig.width=3.5, fig.height=4, fig.align="center",dpi=300,dev="png",echo=F}
Baum_muts <- c("K417E","K444Q","V445A","N450D","Y453F","L455F","E484K","G485D","F486V","F490P","Q493K")
#prep long form table giving mean effect per site for all RBD residues, RBM residues, ACE2 contact residues, and sites of Ab escape
RBD_muts_all <- mutants; RBD_muts_all$factor <- "all mutations"
RBD_muts_RBM <- mutants[SARS2_site %in% RBD_sites[RBM==T,site_SARS2],];RBD_muts_RBM$factor <- "all RBM mutations"
RBD_muts_ACE2contact <- mutants[SARS2_site %in% RBD_sites[SARS2_ACE2_contact==T,site_SARS2],];RBD_muts_ACE2contact$factor <- "ACE2 contact mutations"
RBD_muts_escape <- mutants[mutation %in% Baum_muts,]; RBD_muts_escape$factor <- "Ab escape mutations"
dt <- rbind(RBD_muts_RBM,RBD_muts_ACE2contact,RBD_muts_escape);dt$factor <- factor(dt$factor,levels=c("all RBM mutations","ACE2 contact mutations","Ab escape mutations"))
set.seed(1990)
ggplot(dt[mutant != wildtype,],aes(x=factor,y=bind_avg))+
#geom_jitter(width=0.15, alpha=0.25)+stat_summary(fun.y=median,geom="point",size=3,color="red",shape=18)+
geom_boxplot(outlier.shape=16, width=0.3, outlier.alpha=0.25)+
theme_classic()+
xlab("mutation class")+ylab("mutation effect on binding")+theme(axis.text.x=element_text(angle=90,hjust=1))
invisible(dev.print(pdf, paste(config$antibody_epitopes_dir,"/boxplot_escape-muts.pdf",sep="")))
```