Merge pull request #122 from UW-GAC/admixMap_dosage

Admix map dosage
UW-GAC · Feb 1, 2025 · 7655885 · 7655885
2 parents e3930f2 + 0746dfa
commit 7655885
Show file tree

Hide file tree

Showing 6 changed files with 73 additions and 6 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -1,3 +1,5 @@
 ^renv$
 ^renv\.lock$
 ^\.github$
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -3,8 +3,8 @@ Type: Package
 Title: GENetic EStimation and Inference in Structured samples
         (GENESIS): Statistical methods for analyzing genetic data from
         samples with population structure and/or relatedness
-Version: 2.37.0
-Date: 2024-03-26
+Version: 2.37.1
+Date: 2025-01-31
 Author: Matthew P. Conomos, Stephanie M. Gogarten,
 	Lisa Brown, Han Chen, Thomas Lumley, Kenneth Rice, Tamar Sofer, Adrienne Stilp, Timothy Thornton, Chaoyu Yu
 Maintainer: Stephanie M. Gogarten <[email protected]>

diff --git a/R/admixMap.R b/R/admixMap.R
@@ -1,5 +1,6 @@
 admixMap <- function(admixDataList,
                      null.model,
+                     imputed=FALSE,
                      male.diploid=TRUE, genome.build=c("hg19", "hg38"),
                      BPPARAM=bpparam(), verbose=TRUE){
 
@@ -15,7 +16,7 @@ admixMap <- function(admixDataList,
     if(is.null(names(admixDataList))){
         names(admixDataList) <- paste("Anc",1:v,sep="")
     }
-
+   
     # get sample index
     if (is(admixDataList[[1]], "GenotypeIterator")) {
         sample.index <- lapply(admixDataList, .sampleIndexNullModel, null.model)
@@ -30,6 +31,11 @@ admixMap <- function(admixDataList,
     }
     n.samp <- length(sample.index)
 
+    # if admixDataList contains GenotypeIterator, imputed=TRUE will be ignored
+    if (is(admixDataList[[1]], "GenotypeIterator") && imputed) {
+        message("admixDataList contains GenotypeIterator, imputed=TRUE is ignored")
+    }
+
     # get sex for calculating allele freq
     sex <- validateSex(admixDataList[[1]])[sample.index]
 
@@ -81,7 +87,11 @@ admixMap <- function(admixDataList,
             if (is(admixDataList[[1]], "GenotypeIterator")) {
                 local[,,i] <- getGenotypeSelection(admixDataList[[i]], scan=sample.index, order="selection", transpose=TRUE, use.names=FALSE, drop=FALSE)
             } else {
-                local[,,i] <- refDosage(admixDataList[[i]], use.names=FALSE)[sample.index,,drop=FALSE]
+               if (!imputed) {
+                   local[,,i] <- refDosage(admixDataList[[i]], use.names=FALSE)[sample.index,,drop=FALSE]
+               } else {
+                   local[,,i] <- imputedDosage(admixDataList[[i]], use.names=FALSE)[sample.index,,drop=FALSE]
+               }
             }
         }
         if (any(is.na(local))) warning("missing values in local ancestry will produce NA output for this block")

diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd
@@ -2,6 +2,12 @@
 
 \title{NEWS for GENESIS}
 
+
+\section{Version 2.37.1}{
+  \itemize{
+    \item Add option "imputed" to admixMap to allow testing imputed dosages.
+  }
+}
 \section{Version 2.33.2}{
   \itemize{
     \item Set extremely small p-values (\code{< Machine$double.xmin}) calculated with pchisq to \code{Machine$double.xmin}. This change prevents GENESIS from returning p-values equal to 0.

diff --git a/man/admixMap.Rd b/man/admixMap.Rd
@@ -6,14 +6,17 @@
 Run admixture analyses
 }
 \usage{
-admixMap(admixDataList, null.model, male.diploid=TRUE,
-         genome.build=c("hg19", "hg38"),
+admixMap(admixDataList, null.model, 
+         imputed=FALSE,
+         male.diploid=TRUE,
+         genome.build=c("hg19", "hg38"), 
          BPPARAM=bpparam(), verbose=TRUE)
 }
 
 \arguments{
   \item{admixDataList}{named list of \code{\link{GenotypeIterator}} or \code{\link{SeqVarIterator}} objects for each ancestry}
     \item{null.model}{A null model object returned by \code{\link{fitNullModel}}.}
+    \item{imputed}{Logical indicator of whether to read dosages from the "DS" field containing imputed dosages instead of counting the number of alternate alleles. Only used if admixDataList contains \code{\link{SeqVarIterator}} objects.}
     \item{male.diploid}{Logical for whether males on sex chromosomes are coded as diploid. Default is `male.diploid=TRUE`, meaning sex chromosome genotypes for males have values 0/2. If the input object codes males as 0/1 on sex chromosomes, set `male.diploid=FALSE`.}
   \item{genome.build}{A character sting indicating genome build; used to identify pseudoautosomal regions on the X and Y chromosomes. These regions are not treated as sex chromosomes when calculating allele frequencies.}
   \item{BPPARAM}{A \code{\link{BiocParallelParam}} object to process blocks of variants in parallel. If not provided, the default back-end returned by \code{\link{bpparam}} will be used.}

diff --git a/tests/testthat/test_admixMap.R b/tests/testthat/test_admixMap.R
@@ -71,3 +71,49 @@ test_that("admixMap", {
     lapply(tmpfile, unlink)
     lapply(tmpfile2, unlink)
 })
+
+
+test_that("admixMap - imputed dosage", {
+    gdsfmt::showfile.gds(closeall=TRUE, verbose=FALSE)
+    gdsfile <- gds <- SeqArray::seqExampleFileName("gds")
+    gds <- openfn.gds(gdsfile)
+    samp <- as.character(read.gdsn(index.gdsn(gds, "sample.id")))
+    nsnp <- objdesp.gdsn(index.gdsn(gds, "variant.id"))$dim
+    nsamp <- objdesp.gdsn(index.gdsn(gds, "sample.id"))$dim
+    closefn.gds(gds)
+    set.seed(200); dosage_eur <- sample(0:2, nsnp*nsamp, replace=TRUE)
+    set.seed(201); dosage_afr <- ifelse(dosage_eur == 2, 0, sample(0:1, nsnp*nsamp, replace=TRUE))
+    set.seed(202); dosage_amer <- 2 - dosage_eur - dosage_afr
+    dosage <- list(dosage_eur, dosage_afr, dosage_amer)
+    tmpfile <- character(3)
+    for (i in 1:3) {
+        tmpfile[i] <- tempfile()
+        file.copy(gdsfile, tmpfile[i])
+        gds <- openfn.gds(tmpfile[i], readonly=FALSE)
+        format_node <- index.gdsn(gds, "annotation/format")
+        folder <- addfolder.gdsn(format_node, "DS")
+        add.gdsn(folder, "data", matrix(dosage[[i]], nrow=nsamp, ncol=nsnp))
+        data2 <- read.gdsn(index.gdsn(gds, "annotation/format/DP/~data"))
+        add.gdsn(folder, "~data", data2)
+        closefn.gds(gds)
+    }
+
+    set.seed(203); pheno <- rnorm(nsamp, mean = 0, sd = 1)
+    set.seed(204); covar <- sample(0:1, nsamp, replace=TRUE)
+
+    annot <- AnnotatedDataFrame(data.frame(sample.id = samp, 
+                                           covar, pheno, stringsAsFactors=FALSE))
+    seqIterators <- lapply(tmpfile, function(x) {
+        gr <- seqOpen(x)
+        gd <- SeqVarData(gr, sampleData=annot)
+        SeqVarBlockIterator(gd, verbose=FALSE)
+    })
+
+    null.model <- fitNullModel(annot, outcome = "pheno", covars = "covar")
+    myassoc <- admixMap(seqIterators, null.model, imputed=TRUE, BPPARAM=BPPARAM, verbose=FALSE)
+    expect_equal(nrow(myassoc), nsnp)
+
+    lapply(seqIterators, seqClose)
+
+    lapply(tmpfile, unlink)
+})