Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Began adding an R-native writer. #48

Open
wants to merge 3 commits into
base: devel
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,18 @@ importFrom(DelayedArray,type)
importFrom(Matrix,sparseMatrix)
importFrom(Matrix,t)
importFrom(S4Vectors,DataFrame)
importFrom(S4Vectors,I)
importFrom(S4Vectors,make_zero_col_DFrame)
importFrom(S4Vectors,metadata)
importFrom(S4Vectors,wmsg)
importFrom(SingleCellExperiment,"colPairs<-")
importFrom(SingleCellExperiment,"reducedDims<-")
importFrom(SingleCellExperiment,"rowPairs<-")
importFrom(SingleCellExperiment,SingleCellExperiment)
importFrom(SummarizedExperiment,"colData<-")
importFrom(SummarizedExperiment,"rowData<-")
importFrom(SummarizedExperiment,colData)
importFrom(SummarizedExperiment,rowData)
importFrom(basilisk,basiliskRun)
importFrom(methods,as)
importFrom(methods,is)
Expand Down
179 changes: 179 additions & 0 deletions R/read.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,182 @@
adata <- anndata$read_h5ad(file, backed = if (backed) "r" else FALSE)
AnnData2SCE(adata, hdf5_backed = backed)
}

#' @importFrom S4Vectors I DataFrame wmsg
#' @importFrom SummarizedExperiment rowData colData rowData<- colData<-
#' @importFrom SingleCellExperiment SingleCellExperiment reducedDims<- colPairs<- rowPairs<-
.native_reader <- function(file, backed=FALSE) {

Check warning on line 64 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L64

Added line #L64 was not covered by tests
contents <- .list_contents(file)

# Let's read in the X matrix first... if it's there.
if (!"X" %in% names(contents)) {

Check warning on line 68 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L67-L68

Added lines #L67 - L68 were not covered by tests
stop("missing an 'X' entry in '", file, "'")
}

Check warning on line 70 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L70

Added line #L70 was not covered by tests
all.assays <- list(X = .read_matrix(file, "X", contents[["X"]], backed=backed))

for (l in names(contents[["layers"]])) {
tryCatch({
all.assays[[l]] <- .read_matrix(file, file.path("layers", l), contents[["layers"]][[l]], backed=backed)
}, error=function(e) {

Check warning on line 76 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L72-L76

Added lines #L72 - L76 were not covered by tests
warning(wmsg("setting additional assays from 'layers' failed for '", file, "':\n ", conditionMessage(e)))
})
}

Check warning on line 80 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L80

Added line #L80 was not covered by tests
sce <- SingleCellExperiment(all.assays)

# Adding the various pieces of data.
tryCatch({
cd <- .read_dim_data(file, "obs", contents[["obs"]])
if (!is.null(cd)) {

Check warning on line 86 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L83-L86

Added lines #L83 - L86 were not covered by tests
colData(sce) <- cd
}
}, error=function(e) {

Check warning on line 89 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L88-L89

Added lines #L88 - L89 were not covered by tests
warning(wmsg("setting 'colData' failed for '", file, "':\n ", conditionMessage(e)))
})

tryCatch({
rd <- .read_dim_data(file, "var", contents[["var"]])
if (!is.null(rd)) {

Check warning on line 95 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L92-L95

Added lines #L92 - L95 were not covered by tests
rowData(sce) <- rd
}
}, error=function(e) {

Check warning on line 98 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L97-L98

Added lines #L97 - L98 were not covered by tests
warning(wmsg("setting 'rowData' failed for '", file, "':\n ", conditionMessage(e)))
})

# Adding the reduced dimensions and other bits and pieces.
tryCatch({
reducedDims(sce) <- .read_dim_mats(file, "obsm", contents[["obsm"]])
}, error=function(e) {

Check warning on line 105 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L102-L105

Added lines #L102 - L105 were not covered by tests
warning(wmsg("setting 'reducedDims' failed for '", file, "':\n ", conditionMessage(e)))
})

tryCatch({
row.mat <- .read_dim_mats(file, "varm", contents[["varm"]])
if (length(row.mat)) {
row.mat.df <- do.call(DataFrame, lapply(row.mat, I))

Check warning on line 112 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L108-L112

Added lines #L108 - L112 were not covered by tests
rowData(sce) <- cbind(rowData(sce), row.mat.df)
}
}, error=function(e) {

Check warning on line 115 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L114-L115

Added lines #L114 - L115 were not covered by tests
warning(wmsg("extracting 'varm' failed for '", file, "':\n ", conditionMessage(e)))
})

# Adding pairings, if any exist.
tryCatch({
rowPairs(sce) <- .read_dim_pairs(file, "varp", contents[["varp"]])
}, error=function(e) {

Check warning on line 122 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L119-L122

Added lines #L119 - L122 were not covered by tests
warning(wmsg("setting 'rowPairs' failed for '", file, "':\n ", conditionMessage(e)))
})

tryCatch({
colPairs(sce) <- .read_dim_pairs(file, "obsp", contents[["obsp"]])
}, error=function(e) {

Check warning on line 128 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L125-L128

Added lines #L125 - L128 were not covered by tests
warning(wmsg("setting 'colPairs' failed for '", file, "':\n ", conditionMessage(e)))
})

if ("uns" %in% names(contents)) {
tryCatch({
metadata(sce) <- rhdf5::h5read(file, "uns")
}, error=function(e) {

Check warning on line 135 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L131-L135

Added lines #L131 - L135 were not covered by tests
warning(wmsg("setting 'metadata' failed for '", file, "':\n ", conditionMessage(e)))
})
}

Check warning on line 139 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L139

Added line #L139 was not covered by tests
sce
}

.list_contents <- function(file) {

Check warning on line 143 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L143

Added line #L143 was not covered by tests
manifest <- rhdf5::h5ls(file)

set_myself <- function(x, series, value) {
if (length(series)!=1) {

Check warning on line 147 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L145-L147

Added lines #L145 - L147 were not covered by tests
value <- set_myself(x[[series[1]]], series[-1], value)
}
if (is.null(x)) {

Check warning on line 150 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L149-L150

Added lines #L149 - L150 were not covered by tests
x <- list()
}
x[[series[1]]] <- value

Check warning on line 153 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L152-L153

Added lines #L152 - L153 were not covered by tests
x
}

contents <- list()
for (i in seq_len(nrow(manifest))) {
components <- c(strsplit(manifest[i, "group"], "/")[[1]], manifest[i, "name"])
if (components[1] == "") {

Check warning on line 160 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L156-L160

Added lines #L156 - L160 were not covered by tests
components <- components[-1]
}

info <- manifest[i, c('otype', 'dclass', 'dim')]
if (info$otype=="H5I_GROUP") {

Check warning on line 165 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L163-L165

Added lines #L163 - L165 were not covered by tests
info <- list()
}

Check warning on line 167 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L167

Added line #L167 was not covered by tests
contents <- set_myself(contents, components, info)
}

Check warning on line 170 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L170

Added line #L170 was not covered by tests
contents
}

.read_matrix <- function(file, path, fields, backed) {
if (is.data.frame(fields)) {

Check warning on line 175 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L174-L175

Added lines #L174 - L175 were not covered by tests
mat <- HDF5Array::HDF5Array(file, path)
} else {

Check warning on line 177 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L177

Added line #L177 was not covered by tests
mat <- HDF5Array::H5SparseMatrix(file, path)
}
if (!backed) {
if (DelayedArray::is_sparse(mat)) {

Check warning on line 181 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L179-L181

Added lines #L179 - L181 were not covered by tests
mat <- as(mat, "sparseMatrix")
} else {

Check warning on line 183 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L183

Added line #L183 was not covered by tests
mat <- as.matrix(mat)
}
}

Check warning on line 186 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L186

Added line #L186 was not covered by tests
mat
}

#' @importFrom S4Vectors DataFrame
.read_dim_data <- function(file, path, fields) {
col.names <- setdiff(names(fields), c("__categories", "_index"))
out.cols <- list()
for (i in col.names) {

Check warning on line 194 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L191-L194

Added lines #L191 - L194 were not covered by tests
out.cols[[i]] <- as.vector(rhdf5::h5read(file, file.path(path, i)))
}

cat.names <- names(fields[["__categories"]])
for (i in cat.names) {
levels <- as.vector(rhdf5::h5read(file, file.path(path, "__categories", i)))

Check warning on line 200 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L197-L200

Added lines #L197 - L200 were not covered by tests
out.cols[[i]] <- factor(out.cols[[i]], levels)
}

if (!is.null(fields[["_index"]])) {

Check warning on line 204 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L203-L204

Added lines #L203 - L204 were not covered by tests
indices <- as.vector(rhdf5::h5read(file, file.path(path, "_index")))
} else {

Check warning on line 206 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L206

Added line #L206 was not covered by tests
indices <- NULL
}

if (length(out.cols)) {
df <- do.call(DataFrame, out.cols)
rownames(df) <- indices
} else if (!is.null(indices)) {

Check warning on line 213 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L209-L213

Added lines #L209 - L213 were not covered by tests
df <- DataFrame(row.names=indices)
} else {

Check warning on line 215 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L215

Added line #L215 was not covered by tests
df <- NULL
}

Check warning on line 218 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L218

Added line #L218 was not covered by tests
df
}

.read_dim_mats <- function(file, path, fields) {
all.contents <- list()

Check warning on line 223 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L222-L223

Added lines #L222 - L223 were not covered by tests
for (i in names(fields)) {
# because everything's transposed.

Check warning on line 225 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L225

Added line #L225 was not covered by tests
all.contents[[i]] <- t(rhdf5::h5read(file, file.path(path, i)))
}

Check warning on line 227 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L227

Added line #L227 was not covered by tests
all.contents
}

.read_dim_pairs <- function(file, path, fields) {
all.pairs <- list()
for (i in names(fields)) {
mat <- HDF5Array::H5SparseMatrix(file, file.path(path, i))

Check warning on line 234 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L231-L234

Added lines #L231 - L234 were not covered by tests
all.pairs[[i]] <- as(mat, "sparseMatrix")
}

Check warning on line 236 in R/read.R

View check run for this annotation

Codecov / codecov/patch

R/read.R#L236

Added line #L236 was not covered by tests
all.pairs
}
133 changes: 133 additions & 0 deletions R/write.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,139 @@
adata$write_h5ad(file)
}

.native_writer <- function(sce, file) {
# Save to an adjacent file and then move on success. This avoids
# overwriting an existing file unless the entire write was okay.
temp <- tempfile(dirname(file), fileext=".h5")
on.exit(unlink(temp))

Check warning on line 127 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L125-L127

Added lines #L125 - L127 were not covered by tests
rhdf5::h5createFile(temp)

# Saving the assays.
# TODO: sparse array support, waiting on HDF5.

Check warning on line 131 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L131

Added line #L131 was not covered by tests
HDF5Array::writeHDF5Array(assay(sce, withDimnames=FALSE), temp, "X", with.dimnames = FALSE)

all.ass <- assayNames(sce)[-1]
if (length(all.ass)) {
if (any(all.ass=="") || anyDuplicated(all.ass)) {

Check warning on line 136 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L133-L136

Added lines #L133 - L136 were not covered by tests
stop("assay names must be non-NULL and unique")
}
rhdf5::h5createGroup("layers")
for (i in all.ass) {

Check warning on line 140 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L138-L140

Added lines #L138 - L140 were not covered by tests
HDF5Array::writeHDF5Array(assay(sce, i, withDimnames=FALSE), temp, file.path("layers", i), with.dimnames = FALSE)
}
}

# Saving the rowData with a shift of matrices.
row.mats <- .save_dim_data(rowData(sce), temp, "var", "rowData", ignore.mat=FALSE)

Check warning on line 146 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L145-L146

Added lines #L145 - L146 were not covered by tests
.save_mat_list(as.list(row.mats), temp, "varm", "rowData")

# Saving the colData.

Check warning on line 149 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L149

Added line #L149 was not covered by tests
col.mats <- .save_dim_data(colData(sce), temp, "obs", "colData", ignore.mat=TRUE)

# Saving the reducedDims.

Check warning on line 152 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L152

Added line #L152 was not covered by tests
.save_mat_list(reducedDims(sce), temp, "obsm", "reducedDims")

# Saving the metadata.

Check warning on line 155 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L155

Added line #L155 was not covered by tests
.save_uns_list(metadata(sce), temp, "uns", "metadata")

# TODO: skipping the pairs until we can save sparse attributes properly.

Check warning on line 159 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L159

Added line #L159 was not covered by tests
file.rename(temp, file)
}

.save_dim_data <- function(df, file, name, msg, ignore.mat) {
if (is.null(rownames(df)) || anyDuplicated(rownames(df))) {

Check warning on line 164 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L163-L164

Added lines #L163 - L164 were not covered by tests
stop("'", msg, "' should have non-NULL and unique row names")
}
if (is.null(colnames(df)) || anyDuplicated(colnames(df))) {

Check warning on line 167 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L166-L167

Added lines #L166 - L167 were not covered by tests
stop("'", msg, "' should have non-NULL and unique column names")
}

rhdf5::h5createGroup(file, name)

Check warning on line 171 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L170-L171

Added lines #L170 - L171 were not covered by tests
rhdf5::h5write(rownames(df), file, file.path(name, "_index"))

is.mat <- logical(ncol(df))
made.factor <- FALSE
for (i in seq_len(ncol(df))) {
x <- colnames(df)[i]

Check warning on line 177 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L173-L177

Added lines #L173 - L177 were not covered by tests
current <- df[[x]]

if (is.matrix(current)) {
if (ignore.mat) {
warning("ignoring matrix-like column '", x, "' in the '", msg,

Check warning on line 182 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L179-L182

Added lines #L179 - L182 were not covered by tests
"':\n ", conditionMessage(e))
}
is.mat[i] <- TRUE
} else if (is.factor(current)) {
if (!made.factor) {
rhdf5::h5write(current, file, file.path(name, "__categories"))

Check warning on line 188 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L184-L188

Added lines #L184 - L188 were not covered by tests
made.factor <- TRUE
}
rhdf5::h5write(levels(current), file, file.path(name, "__categories", x))

Check warning on line 191 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L190-L191

Added lines #L190 - L191 were not covered by tests
current <- as.character(current)
} else {
# Who knows what weird crap we have here.
tryCatch({
rhdf5::h5write(current, file, file.path(name, x))
}, error=function(e) {
warning("failed to save column '", x, "' from the '", msg,

Check warning on line 198 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L194-L198

Added lines #L194 - L198 were not covered by tests
"':\n ", conditionMessage(e))
})
}
}

Check warning on line 203 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L203

Added line #L203 was not covered by tests
df[,is.mat,drop=FALSE]
}

.save_mat_list <- function(matlist, file, name, msg) {
if (!length(matlist)) {

Check warning on line 208 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L207-L208

Added lines #L207 - L208 were not covered by tests
return(NULL)
}
if (is.null(names(matlist)) || anyDuplicated(names(matlist))) {

Check warning on line 211 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L210-L211

Added lines #L210 - L211 were not covered by tests
stop("'", msg, "' should have non-NULL and unique names")
}

rhdf5::h5createGroup(file, name)
for (i in seq_along(matlist)) {
x <- names(matlist)[i]
tryCatch({
rhdf5::h5write(matlist[[i]], file, file.path(name, x))
}, error=function(e) {
warning("failed to save matrix '", x, "' from the '", msg,

Check warning on line 221 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L214-L221

Added lines #L214 - L221 were not covered by tests
"':\n ", conditionMessage(e))
})
}
}

.save_uns_list <- function(contents, file, name, msg) {
if (!length(contents)) {

Check warning on line 228 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L227-L228

Added lines #L227 - L228 were not covered by tests
return(NULL)
}
if (is.null(names(contents)) || anyDuplicated(names(contents))) {
warning(wmsg("'", msg, "' should have non-NULL and unique names"))

Check warning on line 232 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L230-L232

Added lines #L230 - L232 were not covered by tests
return(NULL)
}

Check warning on line 234 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L234

Added line #L234 was not covered by tests
rhdf5::h5createGroup(file, name)

for (i in names(contents)) {
current <- contents[[i]]
path <- file.path(name, i)

Check warning on line 239 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L236-L239

Added lines #L236 - L239 were not covered by tests
msg2 <- paste0(msg, "->", i)

if (is.data.frame(current) || is.atomic(current)) {

Check warning on line 242 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L241-L242

Added lines #L241 - L242 were not covered by tests
if (is.factor(current)) {
# TODO: save this as an HDF5 enumeration type.

Check warning on line 244 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L244

Added line #L244 was not covered by tests
current <- as.character(current)
}
rhdf5::h5write(current, file=file, name=path)
} else if (is.list(current)) {

Check warning on line 248 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L246-L248

Added lines #L246 - L248 were not covered by tests
.save_uns_list(current, file, path, msg2)
} else {

Check warning on line 250 in R/write.R

View check run for this annotation

Codecov / codecov/patch

R/write.R#L250

Added line #L250 was not covered by tests
warning(msg("ignoring '", msg2, "' of unknown type ", class(current)[1]))
}
}
}

# nocov start

# Skipping code coverage on these function because they aren't used until the
Expand Down