Skip to content

Commit

Permalink
Merge pull request #122 from UW-GAC/admixMap_dosage
Browse files Browse the repository at this point in the history
Admix map dosage
  • Loading branch information
smgogarten authored Feb 1, 2025
2 parents e3930f2 + 0746dfa commit 7655885
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 6 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
^renv$
^renv\.lock$
^\.github$
^.*\.Rproj$
^\.Rproj\.user$
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ Type: Package
Title: GENetic EStimation and Inference in Structured samples
(GENESIS): Statistical methods for analyzing genetic data from
samples with population structure and/or relatedness
Version: 2.37.0
Date: 2024-03-26
Version: 2.37.1
Date: 2025-01-31
Author: Matthew P. Conomos, Stephanie M. Gogarten,
Lisa Brown, Han Chen, Thomas Lumley, Kenneth Rice, Tamar Sofer, Adrienne Stilp, Timothy Thornton, Chaoyu Yu
Maintainer: Stephanie M. Gogarten <[email protected]>
Expand Down
14 changes: 12 additions & 2 deletions R/admixMap.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
admixMap <- function(admixDataList,
null.model,
imputed=FALSE,
male.diploid=TRUE, genome.build=c("hg19", "hg38"),
BPPARAM=bpparam(), verbose=TRUE){

Expand All @@ -15,7 +16,7 @@ admixMap <- function(admixDataList,
if(is.null(names(admixDataList))){
names(admixDataList) <- paste("Anc",1:v,sep="")
}

# get sample index
if (is(admixDataList[[1]], "GenotypeIterator")) {
sample.index <- lapply(admixDataList, .sampleIndexNullModel, null.model)
Expand All @@ -30,6 +31,11 @@ admixMap <- function(admixDataList,
}
n.samp <- length(sample.index)

# if admixDataList contains GenotypeIterator, imputed=TRUE will be ignored
if (is(admixDataList[[1]], "GenotypeIterator") && imputed) {
message("admixDataList contains GenotypeIterator, imputed=TRUE is ignored")
}

# get sex for calculating allele freq
sex <- validateSex(admixDataList[[1]])[sample.index]

Expand Down Expand Up @@ -81,7 +87,11 @@ admixMap <- function(admixDataList,
if (is(admixDataList[[1]], "GenotypeIterator")) {
local[,,i] <- getGenotypeSelection(admixDataList[[i]], scan=sample.index, order="selection", transpose=TRUE, use.names=FALSE, drop=FALSE)
} else {
local[,,i] <- refDosage(admixDataList[[i]], use.names=FALSE)[sample.index,,drop=FALSE]
if (!imputed) {
local[,,i] <- refDosage(admixDataList[[i]], use.names=FALSE)[sample.index,,drop=FALSE]
} else {
local[,,i] <- imputedDosage(admixDataList[[i]], use.names=FALSE)[sample.index,,drop=FALSE]
}
}
}
if (any(is.na(local))) warning("missing values in local ancestry will produce NA output for this block")
Expand Down
6 changes: 6 additions & 0 deletions inst/NEWS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

\title{NEWS for GENESIS}


\section{Version 2.37.1}{
\itemize{
\item Add option "imputed" to admixMap to allow testing imputed dosages.
}
}
\section{Version 2.33.2}{
\itemize{
\item Set extremely small p-values (\code{< Machine$double.xmin}) calculated with pchisq to \code{Machine$double.xmin}. This change prevents GENESIS from returning p-values equal to 0.
Expand Down
7 changes: 5 additions & 2 deletions man/admixMap.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,17 @@
Run admixture analyses
}
\usage{
admixMap(admixDataList, null.model, male.diploid=TRUE,
genome.build=c("hg19", "hg38"),
admixMap(admixDataList, null.model,
imputed=FALSE,
male.diploid=TRUE,
genome.build=c("hg19", "hg38"),
BPPARAM=bpparam(), verbose=TRUE)
}

\arguments{
\item{admixDataList}{named list of \code{\link{GenotypeIterator}} or \code{\link{SeqVarIterator}} objects for each ancestry}
\item{null.model}{A null model object returned by \code{\link{fitNullModel}}.}
\item{imputed}{Logical indicator of whether to read dosages from the "DS" field containing imputed dosages instead of counting the number of alternate alleles. Only used if admixDataList contains \code{\link{SeqVarIterator}} objects.}
\item{male.diploid}{Logical for whether males on sex chromosomes are coded as diploid. Default is `male.diploid=TRUE`, meaning sex chromosome genotypes for males have values 0/2. If the input object codes males as 0/1 on sex chromosomes, set `male.diploid=FALSE`.}
\item{genome.build}{A character sting indicating genome build; used to identify pseudoautosomal regions on the X and Y chromosomes. These regions are not treated as sex chromosomes when calculating allele frequencies.}
\item{BPPARAM}{A \code{\link{BiocParallelParam}} object to process blocks of variants in parallel. If not provided, the default back-end returned by \code{\link{bpparam}} will be used.}
Expand Down
46 changes: 46 additions & 0 deletions tests/testthat/test_admixMap.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,49 @@ test_that("admixMap", {
lapply(tmpfile, unlink)
lapply(tmpfile2, unlink)
})


test_that("admixMap - imputed dosage", {
gdsfmt::showfile.gds(closeall=TRUE, verbose=FALSE)
gdsfile <- gds <- SeqArray::seqExampleFileName("gds")
gds <- openfn.gds(gdsfile)
samp <- as.character(read.gdsn(index.gdsn(gds, "sample.id")))
nsnp <- objdesp.gdsn(index.gdsn(gds, "variant.id"))$dim
nsamp <- objdesp.gdsn(index.gdsn(gds, "sample.id"))$dim
closefn.gds(gds)
set.seed(200); dosage_eur <- sample(0:2, nsnp*nsamp, replace=TRUE)
set.seed(201); dosage_afr <- ifelse(dosage_eur == 2, 0, sample(0:1, nsnp*nsamp, replace=TRUE))
set.seed(202); dosage_amer <- 2 - dosage_eur - dosage_afr
dosage <- list(dosage_eur, dosage_afr, dosage_amer)
tmpfile <- character(3)
for (i in 1:3) {
tmpfile[i] <- tempfile()
file.copy(gdsfile, tmpfile[i])
gds <- openfn.gds(tmpfile[i], readonly=FALSE)
format_node <- index.gdsn(gds, "annotation/format")
folder <- addfolder.gdsn(format_node, "DS")
add.gdsn(folder, "data", matrix(dosage[[i]], nrow=nsamp, ncol=nsnp))
data2 <- read.gdsn(index.gdsn(gds, "annotation/format/DP/~data"))
add.gdsn(folder, "~data", data2)
closefn.gds(gds)
}

set.seed(203); pheno <- rnorm(nsamp, mean = 0, sd = 1)
set.seed(204); covar <- sample(0:1, nsamp, replace=TRUE)

annot <- AnnotatedDataFrame(data.frame(sample.id = samp,
covar, pheno, stringsAsFactors=FALSE))
seqIterators <- lapply(tmpfile, function(x) {
gr <- seqOpen(x)
gd <- SeqVarData(gr, sampleData=annot)
SeqVarBlockIterator(gd, verbose=FALSE)
})

null.model <- fitNullModel(annot, outcome = "pheno", covars = "covar")
myassoc <- admixMap(seqIterators, null.model, imputed=TRUE, BPPARAM=BPPARAM, verbose=FALSE)
expect_equal(nrow(myassoc), nsnp)

lapply(seqIterators, seqClose)

lapply(tmpfile, unlink)
})

0 comments on commit 7655885

Please sign in to comment.