Skip to content

Commit cc75061

Browse files
authored
Merge pull request #204 from MoTrPAC/develop
v0.8.7: Metabolomics batching variables QC support
2 parents 32faf17 + 61f227a commit cc75061

File tree

98 files changed

+1500
-209
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

98 files changed

+1500
-209
lines changed

.circleci/config.yml

+37-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,36 @@
1-
version: 1
1+
version: 2.1
22
jobs:
3-
build:
3+
build_4_2_2:
44
docker:
5-
- image: rocker/verse:3.6.2
5+
- image: rocker/verse:4.2.2
66
steps:
77
- checkout
8+
- run:
9+
name: Install LaTeX packages
10+
command: |
11+
apt-get update
12+
apt-get install -y texlive
13+
apt-get install -y texlive-fonts-recommended
14+
- run:
15+
name: Install package dependencies
16+
command: R -e "devtools::install_deps(dep = TRUE)"
17+
- run:
18+
name: Build package
19+
command: R CMD build .
20+
- run:
21+
name: Check package
22+
command: R CMD check *tar.gz
23+
24+
build_4_1_0:
25+
docker:
26+
- image: rocker/verse:4.1.0
27+
steps:
28+
- checkout
29+
- run:
30+
name: Install LaTeX packages
31+
command: |
32+
apt-get update
33+
apt-get install -y texlive-fonts-recommended
834
- run:
935
name: Install package dependencies
1036
command: R -e "devtools::install_deps(dep = TRUE)"
@@ -13,4 +39,11 @@ jobs:
1339
command: R CMD build .
1440
- run:
1541
name: Check package
16-
command: R CMD check *tar.gz
42+
command: R CMD check *tar.gz
43+
44+
workflows:
45+
version: 2
46+
build_and_test:
47+
jobs:
48+
- build_4_2_2
49+
- build_4_1_0

DESCRIPTION

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: MotrpacBicQC
22
Type: Package
33
Title: QC/QA functions for the MoTrPAC community
4-
Version: 0.8.6
5-
Date: 2023-04-21
4+
Version: 0.8.7
5+
Date: 2023-05-22
66
Author: MoTrPAC Bioinformatics Center
77
Maintainer: David Jimenez-Morales <[email protected]>
88
Description: R Package for the analysis of MoTrPAC datasets.
@@ -19,7 +19,7 @@ URL: https://github.com/MoTrPAC/MotrpacBicQC
1919
Encoding: UTF-8
2020
LazyData: true
2121
BugReports: https://github.com/MoTrPAC/MotrpacBicQC/issues
22-
Depends: R (>= 3.6.0)
22+
Depends: R (>= 4.1.0)
2323
Imports:
2424
data.table,
2525
dplyr,
@@ -30,6 +30,7 @@ Imports:
3030
inspectdf,
3131
jsonlite,
3232
knitr,
33+
lubridate,
3334
naniar,
3435
progress,
3536
purrr,

NAMESPACE

+5
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,17 @@ export(set_phase)
3131
export(validate_assay)
3232
export(validate_batch)
3333
export(validate_cas)
34+
export(validate_dates_times)
35+
export(validate_lc_column_id)
3436
export(validate_metabolomics)
37+
export(validate_na_empty)
3538
export(validate_phase)
3639
export(validate_processFolder)
3740
export(validate_proteomics)
3841
export(validate_refmetname)
3942
export(validate_tissue)
4043
export(validate_two_phases)
44+
export(validate_yyyymmdd_dates)
4145
export(write_metabolomics_releases)
4246
export(write_proteomics_releases)
4347
import(dplyr)
@@ -60,6 +64,7 @@ importFrom(gridExtra,arrangeGrob)
6064
importFrom(gridExtra,grid.arrange)
6165
importFrom(inspectdf,inspect_na)
6266
importFrom(jsonlite,fromJSON)
67+
importFrom(lubridate,parse_date_time)
6368
importFrom(readr,read_lines)
6469
importFrom(scales,percent)
6570
importFrom(stats,median)

NEWS.md

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
1-
# MotrpacBicQC 0.8.5 (2023-04-21)
1+
# MotrpacBicQC 0.8.7 (2023-05-22)
2+
3+
* Add QC for the new required batching variables
4+
* Replace deprecated `ggplot` function
5+
* Fix issues with `dl_read_gcp`
6+
* Other adjustments
7+
8+
# MotrpacBicQC 0.8.6 (2023-04-21)
29

310
* Minor adjustments
411

R/metabolomics_plots.R

+7-7
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ plot_basic_metabolomics_qc <- function(results,
5454
if(verbose) message(" - (p) Density distributions")
5555
mu <- results_long %>%
5656
group_by(sample_id) %>%
57-
reframe(grp.mean = mean(intensity))
57+
dplyr::reframe(grp.mean = mean(intensity))
5858

5959
den1 <- ggplot(data = results_long, aes(x = log2(intensity), color = sample_type)) +
6060
geom_density(na.rm = TRUE) +
@@ -83,7 +83,7 @@ plot_basic_metabolomics_qc <- function(results,
8383
if(verbose) message(" - (p) Plot sum of intensity/concentration values")
8484
sum_int <- results_long %>%
8585
group_by(sample_id, sample_type, sample_order) %>%
86-
reframe(sum_quant = sum(intensity))
86+
dplyr::reframe(sum_quant = sum(intensity))
8787

8888
psumint <- ggplot(sum_int, aes(x = reorder(sample_id, sample_order), y = sum_quant, fill = sample_type)) +
8989
geom_bar(stat = "identity") + theme_classic() +
@@ -178,7 +178,7 @@ plot_basic_metabolomics_qc <- function(results,
178178
if(verbose) message(" - (p) Plot ID counts")
179179
uid <- results_long %>%
180180
group_by(across(all_of(c("metabolite_name", "sample_id", "sample_type", "sample_order", "id_type")))) %>%
181-
reframe(total_intensity = intensity)
181+
dplyr::reframe(total_intensity = intensity)
182182
uid2 <- uid[which(!is.na(uid$total_intensity)),]
183183
uid3 <- unique(uid2[c("metabolite_name", "sample_id", "sample_type", "sample_order", "id_type")]) %>%
184184
count(sample_id, sample_type, sample_order, id_type)
@@ -268,9 +268,9 @@ plot_basic_metabolomics_qc <- function(results,
268268

269269

270270
ppids <- ggplot(results, aes(x = as.factor(id_type), fill = as.factor(id_type))) +
271-
geom_bar(aes(y = (..count..)/sum(..count..))) +
272-
geom_text(aes(y = ((..count..)/sum(..count..)),
273-
label = scales::percent((..count..)/sum(..count..))),
271+
geom_bar(aes(y = after_stat(count)/sum(after_stat(count)))) +
272+
geom_text(aes(y = after_stat(count)/sum(after_stat(count)),
273+
label = scales::percent(after_stat(count)/sum(after_stat(count)))),
274274
stat = "count", vjust = -0.25) +
275275
scale_y_continuous(labels = percent) +
276276
labs(title = "Proportion of Features Identified (named/unnamed)",
@@ -283,7 +283,7 @@ plot_basic_metabolomics_qc <- function(results,
283283

284284
pnids <- ggplot(results, aes(x = id_type, fill = id_type)) +
285285
geom_bar() +
286-
geom_text(stat = 'count',aes(label =..count.., vjust = -0.2)) +
286+
geom_text(stat = 'count',aes(label = after_stat(count), vjust = -0.2)) +
287287
labs(title = "Total Number of Features Identified (named/unnamed)",
288288
subtitle = paste(output_prefix), y = "", x = "") +
289289
theme_light() +

R/metabolomics_qc.R

+38-5
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
utils::globalVariables(
44
c("assay_codes",
55
"bic_animal_tissue_code",
6-
"phenotypes_pass1a06_short",
7-
"..count.."))
6+
"phenotypes_pass1a06_short"))
87

98

109
# METABOLOMICS DATASETS: PRIMARY QC
@@ -72,7 +71,7 @@ check_metadata_metabolites <- function(df,
7271
if(verbose) message(" + (+) `refmet_name` unique values: OK")
7372
}
7473

75-
if(verbose) message(" + Validating `refmet_name`")
74+
if(verbose) message(" + Validating `refmet_name` (it might take some time)")
7675
nrnna <- validate_refmetname(dataf = df, verbose = verbose)
7776
if(nrnna > 0){
7877
if(verbose) message(paste0(" - (-) SUMMARY: ", nrnna, " `refmet_name` not found in RefMet Metabolomics Data Dictionary: FAIL"))
@@ -184,7 +183,7 @@ check_metadata_samples <- function(df,
184183
# filter only expected columns
185184
df <- filter_required_columns(df = df,
186185
type = "m_s",
187-
verbose = FALSE)
186+
verbose = TRUE)
188187

189188
# Check every column
190189
# sample_id: si
@@ -264,6 +263,40 @@ check_metadata_samples <- function(df,
264263
if(verbose) message(" - (-) `raw_file` column missed: FAIL")
265264
ic <- ic + 1
266265
}
266+
267+
if("extraction_date" %in% colnames(df)){
268+
if(any(is.na(df$extraction_date))){
269+
if(verbose) message(" - (-) `extraction_date` has NA values: FAIL")
270+
ic <- ic + 1
271+
}else{
272+
icdate <- validate_yyyymmdd_dates(df = df, date_column = "extraction_date", verbose = verbose)
273+
ic <- ic + icdate
274+
}
275+
}else{
276+
if(verbose) message(" - (-) `extraction_date` column missed: FAIL")
277+
ic <- ic + 1
278+
}
279+
280+
if("acquisition_date" %in% colnames(df)){
281+
if( any(grepl(":", df$acquisition_date)) ){
282+
if(verbose) message(" + (i) Assuming `acquisition_date` is in `MM/DD/YYYY HH:MM:SS AM/PM` format. Validating:")
283+
icdt <- validate_dates_times(df = df, column_name = "acquisition_date", verbose = verbose)
284+
}else{
285+
icdate <- validate_yyyymmdd_dates(df = df, date_column = "acquisition_date", verbose = verbose)
286+
ic <- ic + icdate
287+
}
288+
}else{
289+
if(verbose) message(" - (-) `acquisition_date` column missed: FAIL")
290+
ic <- ic + 1
291+
}
292+
293+
# check if lc_column_id is in column names
294+
if ("lc_column_id" %in% colnames(df)) {
295+
validate_lc_column_id(df, column_name = "lc_column_id", verbose = verbose)
296+
}else{
297+
if(verbose) message(" - (-) `lc_column_id` column missed: FAIL")
298+
ic <- ic + 1
299+
}
267300

268301
if(return_n_issues) return(ic)
269302
} #end check_metadata_samples
@@ -658,7 +691,7 @@ validate_metabolomics <- function(input_results_folder,
658691
if(isTRUE(all.equal(m_s_n, m_s_u))){
659692
if(verbose) message(" + (+) Metadata samples: named and unnamed are identical: OK")
660693
}else{
661-
if(verbose) message(" - (-) Metadata samples: named and unnamed files differ")
694+
if(verbose) message(" - (-) Metadata samples: named and unnamed files differ: FAIL")
662695
ic <- ic + 1
663696
}
664697
}else{

0 commit comments

Comments
 (0)