@@ -189,6 +189,7 @@ It is useful to have data frames with the summary statistics for each pathogen s
189189### Viruses
190190
191191``` {r Virus_data_frame, results=FALSE, cache=cachedata}
192+ options(warn=-1) # Turn off warnings
192193virus.names <- names(sort(table(virus$Species), decreasing=TRUE))
193194n.hosts <- as.numeric(sort(table(virus$Species), decreasing=TRUE))
194195virus.phbs <- sapply(virus.names, function(x) PHB(x))
@@ -271,6 +272,7 @@ This is the schematic overview figure, and was made manually in Inkscape using i
271272We get a dataset of just the unique pathogens and then calculate the mean phylogenetic host breadth (PHB) of each pathogen. We then use this to plot a histogram.
272273
273274``` {r Figure_PHB_histogram, results='show', cache=cachedata}
275+ options(warn=-1) # Turn off warnings
274276# Get unique pathogens
275277unique_pathogens <- data.frame(Species=pathogen_vs_host_db[!duplicated(pathogen_vs_host_db$Species),"Species"],
276278 Type=pathogen_vs_host_db[!duplicated(pathogen_vs_host_db$Species),"Type"])
@@ -336,14 +338,91 @@ p.histogram.figure.2
336338dev.off()
337339```
338340
341+ ``` {r Figure_PHB_histogram_subsample_function, cache=cachedata}
342+ options(warn=-1) # Turn off warnings
343+ # Function to plot PHB histogram for a given dataset
344+ plotHistogram <- function(association_dataset, cophenetic_distances){
345+ # Get unique pathogens
346+ unique_pathogens.subsampled <- data.frame(Species=association_dataset[!duplicated(association_dataset$Species),"Species"],
347+ Type=association_dataset[!duplicated(association_dataset$Species),"Type"])
348+ unique_pathogens.subsampled$Species <- as.character(unique_pathogens.subsampled$Species)
349+ # Calculate PHB
350+ unique_pathogens.subsampled$PHB <- sapply(unique_pathogens.subsampled$Species, function(x) PHB(x, cophenetic.matrix = cophenetic_distances, m = association_dataset))
351+ unique_pathogens.subsampled$PHB.median <- sapply(unique_pathogens.subsampled$Species, function(x) PHB(x, cophenetic.matrix = cophenetic_distances, m = association_dataset, FUN=median))
352+ unique_pathogens.subsampled$PHB.max <- sapply(unique_pathogens.subsampled$Species, function(x) PHB(x, cophenetic.matrix = cophenetic_distances, m = association_dataset, FUN = max))
353+
354+
355+ # Number of total hosts
356+ host_species_counts <- data.frame(association_dataset %>% group_by(Species) %>% summarise(count=length(HostSpeciesPHB)))
357+ rownames(host_species_counts) <- host_species_counts$Species
358+ unique_pathogens.subsampled$N.hosts <- host_species_counts[unique_pathogens.subsampled$Species,"count"]
359+
360+ # Gather data
361+ pathogen_range_histo <- data.frame(phb = 1:50/30-0.03,
362+ bacteria = hist(filter(unique_pathogens.subsampled, Type == "Bacteria")$PHB, breaks = 0:50/30, freq = TRUE, plot = FALSE)$count,
363+ virus = hist(filter(unique_pathogens.subsampled, Type == "Virus")$PHB, breaks = 0:50/30, freq = TRUE, plot = FALSE)$count) %>% pivot_longer(cols=c("bacteria", "virus"), names_to = "type", values_to = "count")
364+ pathogen_range_histo$count.norm <- pathogen_range_histo$count/sum(pathogen_range_histo$count)
365+ pathogen_range_histo$type <- ifelse(pathogen_range_histo$type=="bacteria", "Bacteria", "Virus")
366+
367+ pathogen_range_histo.total <- data.frame(pathogen_range_histo %>% group_by(phb) %>% summarise(count=sum(count)))
368+ # Pathogen range histogram, excluding zeros
369+ pathogen_range_histo$type <- ordered(pathogen_range_histo$type, levels=c("Virus", "Bacteria"))
370+ levels(pathogen_range_histo$type) <- c("(a) Viruses", "(b) Bacteria")
371+ p.histogram <- ggplot(pathogen_range_histo, aes(fill=type, x=phb, y=count))+
372+ geom_histogram(colour="black", aes(fill="All"), data=pathogen_range_histo.total, stat="identity", alpha=0.4)+
373+ ylim(c(0,75))+geom_bar(stat="identity")+
374+ facet_wrap(~type, ncol=1)+
375+ scale_fill_manual(values=c("#de2d26", "#252525", "#f7f7f7"))+
376+ xlim(c(0,1))+theme_basic()+
377+ xlab("Mean PHB")+
378+ ylab("Frequency")+
379+ labs(fill="Pathogen type")+
380+ geom_vline(xintercept = median(filter(unique_pathogens.subsampled, Type == "Bacteria")$PHB[which(filter(unique_pathogens.subsampled, Type == "Bacteria")$PHB!=0)]),
381+ colour="white", size=2, alpha=0.8)+
382+ geom_vline(xintercept = median(filter(unique_pathogens.subsampled, Type == "Bacteria")$PHB[which(filter(unique_pathogens.subsampled, Type == "Bacteria")$PHB!=0)]),
383+ colour="black", size=1, linetype="dashed")+ # Second to make colour clear
384+ geom_vline(xintercept = median(filter(unique_pathogens.subsampled, Type == "Virus")$PHB[which(filter(unique_pathogens.subsampled, Type == "Virus")$PHB!=0)]),
385+ colour="white", size=2, alpha=0.8)+
386+ geom_vline(xintercept = median(filter(unique_pathogens.subsampled, Type == "Virus")$PHB[which(filter(unique_pathogens.subsampled, Type == "Virus")$PHB!=0)]),
387+ colour="red", size=1, linetype="dashed")+ # Second to make colour clear
388+ theme(axis.text=element_text(size=16),
389+ legend.text =element_text(colour="black", size=16),
390+ legend.title=element_text(colour="black", size=18),
391+ strip.text.x=element_text(colour="black", size=22),
392+ axis.title=element_text(colour="black", size=22))+
393+ theme(legend.position="none")
394+ return(p.histogram)
395+ }
396+ ```
397+
398+
399+ ### Subsampling to non-human hosts
400+ ``` {r Figure_PHB_domestic, cache=cachedata}
401+ options(warn=-1) # Turn off warnings
402+ pathogen_vs_host_db_domestic <- pathogen_vs_host_db[which(pathogen_vs_host_db$Domestic=="Yes" & pathogen_vs_host_db$HostSpeciesPHB!="Homosapiens"),]
403+ cophenetic_domestic <- cophenetic_matrix[which(rownames(cophenetic_matrix) %in% pathogen_vs_host_db_domestic$HostSpeciesPHB), which(colnames(cophenetic_matrix) %in% pathogen_vs_host_db_domestic$HostSpeciesPHB)]
404+ p.PHB.domestic <- plotHistogram(pathogen_vs_host_db_domestic, cophenetic_domestic)
405+ ```
406+
407+
408+ ``` {r Figure_PHB_non_domestic, cache=cachedata}
409+ options(warn=-1) # Turn off warnings
410+ pathogen_vs_host_db_non_domestic <- pathogen_vs_host_db[which(pathogen_vs_host_db$Domestic=="No" & pathogen_vs_host_db$HostSpeciesPHB!="Homosapiens"),]
411+ cophenetic_non_domestic <- cophenetic_matrix[which(rownames(cophenetic_matrix) %in% pathogen_vs_host_db_non_domestic$HostSpeciesPHB), which(colnames(cophenetic_matrix) %in% pathogen_vs_host_db_non_domestic$HostSpeciesPHB)]
412+ p.PHB.nondomestic <- plotHistogram(pathogen_vs_host_db_non_domestic, cophenetic_non_domestic)
413+ ```
414+
415+ ``` {r Figure_PHB_subsampling_combine, cache=cachedata}
416+ cowplot::plot_grid(p.PHB.domestic+ggtitle("Domestic (n=2,270)"), p.PHB.nondomestic + ggtitle("Non-domestic (n=5,898)"))
417+ ```
418+
339419
340420## Figure 3
341421
342422We want to produce an analagous version of Figure 1 from Olival et al. (2017) with our data.
343423
344424``` {r viral_richness_per_host_order, cache=cachedata}
345- library(dplyr)
346- library(ggplot2)
425+ options(warn=-1) # Turn off warnings
347426
348427# First make a dataset per order of mammals
349428# Note that we have Artiodactyla where Olival et al. have "Cetartiodactyla" - have changed this
@@ -444,7 +523,9 @@ png(file="figures/Figure-3-richness-per-species.png", width=2000, height=900)
444523cowplot::plot_grid(p.virus.zoonotic, p.virus.richness,
445524 p.bacteria.zoonotic, p.bacteria.richness, nrow=1)
446525dev.off()
447-
526+ # Show plot
527+ cowplot::plot_grid(p.virus.zoonotic, p.virus.richness,
528+ p.bacteria.zoonotic, p.bacteria.richness, nrow=1)
448529# Correlation of bacterial and viral richness per host order
449530boxplot.df.bacteria <- boxplot.df[which(boxplot.df$Type=="Bacteria"),]
450531boxplot.df.virus <- boxplot.df[which(boxplot.df$Type=="Virus"),]
@@ -464,9 +545,11 @@ cor.test(mammal.bacterial.viral.richness$nBacteria, mammal.bacterial.viral.richn
464545
465546First we do GAMs for host traits which predict viral and bacterial richness.
466547
467- ``` {r GAMs, cache=cachedata}
548+ ``` {r GAMs, cache=cachedata, message=FALSE}
549+ options(warn=-1) # Turn off warnings
468550source('scripts/04-fit-GAMs-host-traits.R')
469551source('scripts/05-plot-GAMs-host-traits.R')
552+ allplots
470553```
471554
472555## Figure 5
@@ -475,7 +558,7 @@ source('scripts/05-plot-GAMs-host-traits.R')
475558
476559Then GAMs for predicting zoonotic potential.
477560
478- ``` {r GAMs-zoonotic-potential, cache=cachedata}
561+ ``` {r GAMs-zoonotic-potential, cache=cachedata, message=FALSE }
479562detachAllPackages <- function() {
480563
481564 basic.packages <- c("package:stats","package:graphics","package:grDevices","package:utils","package:datasets","package:methods","package:base")
@@ -489,7 +572,7 @@ detachAllPackages <- function() {
489572}
490573```
491574
492- ``` {r GAMs-2, message=FALSE, cache=cachedata}
575+ ``` {r GAMs-2, message=FALSE, cache=cachedata, message=FALSE }
493576detachAllPackages()
494577options(warn=-1)
495578library(stringi)
@@ -520,6 +603,9 @@ dev.off()
520603pdf(file="figures/Figure-5-gams-zoonotic-potential.pdf", width =7, height=5, pointsize=7)
521604allplots
522605dev.off()
606+
607+ # Show plot
608+ allplots
523609```
524610
525611## Figure 6
@@ -820,6 +906,8 @@ dev.off()
820906png("figures/Figure-6-pathogen-sharing.png", width=1700, height=1000, pointsize = 14)
821907cowplot::plot_grid(p.pathogen.sharing.orders.10, p.human.sharing)
822908dev.off()
909+ # Show plot
910+ cowplot::plot_grid(p.pathogen.sharing.orders.10, p.human.sharing)
823911```
824912
825913It is useful to be able to match the points and names together, which can be done with the plot below.
0 commit comments