Skip to content

Commit e22d8b9

Browse files
author
Liam Shaw
committed
updating code including dplyr changes
1 parent 6d528f2 commit e22d8b9

15 files changed

Lines changed: 213 additions & 150 deletions

Pathogen-host-range.Rmd

Lines changed: 94 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ It is useful to have data frames with the summary statistics for each pathogen s
189189
### Viruses
190190

191191
```{r Virus_data_frame, results=FALSE, cache=cachedata}
192+
options(warn=-1) # Turn off warnings
192193
virus.names <- names(sort(table(virus$Species), decreasing=TRUE))
193194
n.hosts <- as.numeric(sort(table(virus$Species), decreasing=TRUE))
194195
virus.phbs <- sapply(virus.names, function(x) PHB(x))
@@ -271,6 +272,7 @@ This is the schematic overview figure, and was made manually in Inkscape using i
271272
We get a dataset of just the unique pathogens and then calculate the mean phylogenetic host breadth (PHB) of each pathogen. We then use this to plot a histogram.
272273

273274
```{r Figure_PHB_histogram, results='show', cache=cachedata}
275+
options(warn=-1) # Turn off warnings
274276
# Get unique pathogens
275277
unique_pathogens <- data.frame(Species=pathogen_vs_host_db[!duplicated(pathogen_vs_host_db$Species),"Species"],
276278
Type=pathogen_vs_host_db[!duplicated(pathogen_vs_host_db$Species),"Type"])
@@ -336,14 +338,91 @@ p.histogram.figure.2
336338
dev.off()
337339
```
338340

341+
```{r Figure_PHB_histogram_subsample_function, cache=cachedata}
342+
options(warn=-1) # Turn off warnings
343+
# Function to plot PHB histogram for a given dataset
344+
plotHistogram <- function(association_dataset, cophenetic_distances){
345+
# Get unique pathogens
346+
unique_pathogens.subsampled <- data.frame(Species=association_dataset[!duplicated(association_dataset$Species),"Species"],
347+
Type=association_dataset[!duplicated(association_dataset$Species),"Type"])
348+
unique_pathogens.subsampled$Species <- as.character(unique_pathogens.subsampled$Species)
349+
# Calculate PHB
350+
unique_pathogens.subsampled$PHB <- sapply(unique_pathogens.subsampled$Species, function(x) PHB(x, cophenetic.matrix = cophenetic_distances, m = association_dataset))
351+
unique_pathogens.subsampled$PHB.median <- sapply(unique_pathogens.subsampled$Species, function(x) PHB(x, cophenetic.matrix = cophenetic_distances, m = association_dataset, FUN=median))
352+
unique_pathogens.subsampled$PHB.max <- sapply(unique_pathogens.subsampled$Species, function(x) PHB(x, cophenetic.matrix = cophenetic_distances, m = association_dataset, FUN = max))
353+
354+
355+
# Number of total hosts
356+
host_species_counts <- data.frame(association_dataset %>% group_by(Species) %>% summarise(count=length(HostSpeciesPHB)))
357+
rownames(host_species_counts) <- host_species_counts$Species
358+
unique_pathogens.subsampled$N.hosts <- host_species_counts[unique_pathogens.subsampled$Species,"count"]
359+
360+
# Gather data
361+
pathogen_range_histo <- data.frame(phb = 1:50/30-0.03,
362+
bacteria = hist(filter(unique_pathogens.subsampled, Type == "Bacteria")$PHB, breaks = 0:50/30, freq = TRUE, plot = FALSE)$count,
363+
virus = hist(filter(unique_pathogens.subsampled, Type == "Virus")$PHB, breaks = 0:50/30, freq = TRUE, plot = FALSE)$count) %>% pivot_longer(cols=c("bacteria", "virus"), names_to = "type", values_to = "count")
364+
pathogen_range_histo$count.norm <- pathogen_range_histo$count/sum(pathogen_range_histo$count)
365+
pathogen_range_histo$type <- ifelse(pathogen_range_histo$type=="bacteria", "Bacteria", "Virus")
366+
367+
pathogen_range_histo.total <- data.frame(pathogen_range_histo %>% group_by(phb) %>% summarise(count=sum(count)))
368+
# Pathogen range histogram, excluding zeros
369+
pathogen_range_histo$type <- ordered(pathogen_range_histo$type, levels=c("Virus", "Bacteria"))
370+
levels(pathogen_range_histo$type) <- c("(a) Viruses", "(b) Bacteria")
371+
p.histogram <- ggplot(pathogen_range_histo, aes(fill=type, x=phb, y=count))+
372+
geom_histogram(colour="black", aes(fill="All"), data=pathogen_range_histo.total, stat="identity", alpha=0.4)+
373+
ylim(c(0,75))+geom_bar(stat="identity")+
374+
facet_wrap(~type, ncol=1)+
375+
scale_fill_manual(values=c("#de2d26", "#252525", "#f7f7f7"))+
376+
xlim(c(0,1))+theme_basic()+
377+
xlab("Mean PHB")+
378+
ylab("Frequency")+
379+
labs(fill="Pathogen type")+
380+
geom_vline(xintercept = median(filter(unique_pathogens.subsampled, Type == "Bacteria")$PHB[which(filter(unique_pathogens.subsampled, Type == "Bacteria")$PHB!=0)]),
381+
colour="white", size=2, alpha=0.8)+
382+
geom_vline(xintercept = median(filter(unique_pathogens.subsampled, Type == "Bacteria")$PHB[which(filter(unique_pathogens.subsampled, Type == "Bacteria")$PHB!=0)]),
383+
colour="black", size=1, linetype="dashed")+ # Second to make colour clear
384+
geom_vline(xintercept = median(filter(unique_pathogens.subsampled, Type == "Virus")$PHB[which(filter(unique_pathogens.subsampled, Type == "Virus")$PHB!=0)]),
385+
colour="white", size=2, alpha=0.8)+
386+
geom_vline(xintercept = median(filter(unique_pathogens.subsampled, Type == "Virus")$PHB[which(filter(unique_pathogens.subsampled, Type == "Virus")$PHB!=0)]),
387+
colour="red", size=1, linetype="dashed")+ # Second to make colour clear
388+
theme(axis.text=element_text(size=16),
389+
legend.text =element_text(colour="black", size=16),
390+
legend.title=element_text(colour="black", size=18),
391+
strip.text.x=element_text(colour="black", size=22),
392+
axis.title=element_text(colour="black", size=22))+
393+
theme(legend.position="none")
394+
return(p.histogram)
395+
}
396+
```
397+
398+
399+
### Subsampling to non-human hosts
400+
```{r Figure_PHB_domestic, cache=cachedata}
401+
options(warn=-1) # Turn off warnings
402+
pathogen_vs_host_db_domestic <- pathogen_vs_host_db[which(pathogen_vs_host_db$Domestic=="Yes" & pathogen_vs_host_db$HostSpeciesPHB!="Homosapiens"),]
403+
cophenetic_domestic <- cophenetic_matrix[which(rownames(cophenetic_matrix) %in% pathogen_vs_host_db_domestic$HostSpeciesPHB), which(colnames(cophenetic_matrix) %in% pathogen_vs_host_db_domestic$HostSpeciesPHB)]
404+
p.PHB.domestic <- plotHistogram(pathogen_vs_host_db_domestic, cophenetic_domestic)
405+
```
406+
407+
408+
```{r Figure_PHB_non_domestic, cache=cachedata}
409+
options(warn=-1) # Turn off warnings
410+
pathogen_vs_host_db_non_domestic <- pathogen_vs_host_db[which(pathogen_vs_host_db$Domestic=="No" & pathogen_vs_host_db$HostSpeciesPHB!="Homosapiens"),]
411+
cophenetic_non_domestic <- cophenetic_matrix[which(rownames(cophenetic_matrix) %in% pathogen_vs_host_db_non_domestic$HostSpeciesPHB), which(colnames(cophenetic_matrix) %in% pathogen_vs_host_db_non_domestic$HostSpeciesPHB)]
412+
p.PHB.nondomestic <- plotHistogram(pathogen_vs_host_db_non_domestic, cophenetic_non_domestic)
413+
```
414+
415+
```{r Figure_PHB_subsampling_combine, cache=cachedata}
416+
cowplot::plot_grid(p.PHB.domestic+ggtitle("Domestic (n=2,270)"), p.PHB.nondomestic + ggtitle("Non-domestic (n=5,898)"))
417+
```
418+
339419

340420
## Figure 3
341421

342422
We want to produce an analagous version of Figure 1 from Olival et al. (2017) with our data.
343423

344424
```{r viral_richness_per_host_order, cache=cachedata}
345-
library(dplyr)
346-
library(ggplot2)
425+
options(warn=-1) # Turn off warnings
347426
348427
# First make a dataset per order of mammals
349428
# Note that we have Artiodactyla where Olival et al. have "Cetartiodactyla" - have changed this
@@ -444,7 +523,9 @@ png(file="figures/Figure-3-richness-per-species.png", width=2000, height=900)
444523
cowplot::plot_grid(p.virus.zoonotic, p.virus.richness,
445524
p.bacteria.zoonotic, p.bacteria.richness, nrow=1)
446525
dev.off()
447-
526+
# Show plot
527+
cowplot::plot_grid(p.virus.zoonotic, p.virus.richness,
528+
p.bacteria.zoonotic, p.bacteria.richness, nrow=1)
448529
# Correlation of bacterial and viral richness per host order
449530
boxplot.df.bacteria <- boxplot.df[which(boxplot.df$Type=="Bacteria"),]
450531
boxplot.df.virus <- boxplot.df[which(boxplot.df$Type=="Virus"),]
@@ -464,9 +545,11 @@ cor.test(mammal.bacterial.viral.richness$nBacteria, mammal.bacterial.viral.richn
464545

465546
First we do GAMs for host traits which predict viral and bacterial richness.
466547

467-
```{r GAMs, cache=cachedata}
548+
```{r GAMs, cache=cachedata, message=FALSE}
549+
options(warn=-1) # Turn off warnings
468550
source('scripts/04-fit-GAMs-host-traits.R')
469551
source('scripts/05-plot-GAMs-host-traits.R')
552+
allplots
470553
```
471554

472555
## Figure 5
@@ -475,7 +558,7 @@ source('scripts/05-plot-GAMs-host-traits.R')
475558

476559
Then GAMs for predicting zoonotic potential.
477560

478-
```{r GAMs-zoonotic-potential, cache=cachedata}
561+
```{r GAMs-zoonotic-potential, cache=cachedata, message=FALSE}
479562
detachAllPackages <- function() {
480563
481564
basic.packages <- c("package:stats","package:graphics","package:grDevices","package:utils","package:datasets","package:methods","package:base")
@@ -489,7 +572,7 @@ detachAllPackages <- function() {
489572
}
490573
```
491574

492-
```{r GAMs-2, message=FALSE, cache=cachedata}
575+
```{r GAMs-2, message=FALSE, cache=cachedata, message=FALSE}
493576
detachAllPackages()
494577
options(warn=-1)
495578
library(stringi)
@@ -520,6 +603,9 @@ dev.off()
520603
pdf(file="figures/Figure-5-gams-zoonotic-potential.pdf", width =7, height=5, pointsize=7)
521604
allplots
522605
dev.off()
606+
607+
# Show plot
608+
allplots
523609
```
524610

525611
## Figure 6
@@ -820,6 +906,8 @@ dev.off()
820906
png("figures/Figure-6-pathogen-sharing.png", width=1700, height=1000, pointsize = 14)
821907
cowplot::plot_grid(p.pathogen.sharing.orders.10, p.human.sharing)
822908
dev.off()
909+
# Show plot
910+
cowplot::plot_grid(p.pathogen.sharing.orders.10, p.human.sharing)
823911
```
824912

825913
It is useful to be able to match the points and names together, which can be done with the plot below.

Pathogen-host-range.html

Lines changed: 119 additions & 144 deletions
Large diffs are not rendered by default.

figures/Figure-2-histogram-PHB.pdf

0 Bytes
Binary file not shown.
-9 Bytes
Binary file not shown.
-1.74 KB
Loading
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.
95 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)