Skip to content

Commit 741f748

Browse files
Merge pull request #457 from OHDSI/develop
Develop
2 parents 162e709 + 7a2d0e5 commit 741f748

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+8398
-158
lines changed

DESCRIPTION

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: DataQualityDashboard
22
Type: Package
33
Title: Execute and View Data Quality Checks on OMOP CDM Database
4-
Version: 2.2.0
5-
Date: 2023-05-05
4+
Version: 2.3.0
5+
Date: 2023-05-21
66
Authors@R: c(
77
person("Katy", "Sadowski", email = "[email protected]", role = c("aut", "cre")),
88
person("Clair", "Blacketer", role = c("aut")),
@@ -27,7 +27,7 @@ Imports:
2727
dplyr,
2828
jsonlite,
2929
rJava,
30-
SqlRender (>= 1.6.0),
30+
SqlRender (>= 1.10.1),
3131
plyr,
3232
stringr,
3333
rlang,

NAMESPACE

+8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
export(convertJsonResultsFileCase)
34
export(executeDqChecks)
45
export(listDqChecks)
56
export(reEvaluateThresholds)
@@ -8,8 +9,14 @@ export(writeJsonResultsToCsv)
89
export(writeJsonResultsToTable)
910
import(DatabaseConnector)
1011
import(magrittr)
12+
importFrom(SqlRender,camelCaseToSnakeCase)
13+
importFrom(SqlRender,snakeCaseToCamelCase)
1114
importFrom(dplyr,case_when)
1215
importFrom(dplyr,mutate)
16+
importFrom(dplyr,rename_with)
17+
importFrom(jsonlite,fromJSON)
18+
importFrom(jsonlite,parse_json)
19+
importFrom(jsonlite,toJSON)
1320
importFrom(magrittr,"%>%")
1421
importFrom(readr,read_csv)
1522
importFrom(rlang,.data)
@@ -18,6 +25,7 @@ importFrom(stats,setNames)
1825
importFrom(stringr,regex)
1926
importFrom(stringr,str_detect)
2027
importFrom(tidyselect,all_of)
28+
importFrom(tools,file_path_sans_ext)
2129
importFrom(utils,install.packages)
2230
importFrom(utils,menu)
2331
importFrom(utils,packageVersion)

NEWS.md

+15-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
DataQualityDashboard 2.3.0
2+
==========================
3+
This release includes:
4+
5+
### New features
6+
7+
- *New SQL-only Mode:* Setting `sqlOnly` and `sqlOnlyIncrementalInsert` to TRUE in `executeDqChecks` will return (but not run) a set of SQL queries that, when executed, will calculate the results of the DQ checks and insert them into a database table. Additionally, `sqlOnlyUnionCount` can be used to specify a number of SQL queries to union for each check type, allowing for parallel execution of these queries and potentially large performance gains. See the [SqlOnly vignette](https://ohdsi.github.io/DataQualityDashboard/articles/SqlOnly.html) for details
8+
- *Results File Case Converter:* The new function `convertJsonResultsFileCase` can be used to convert the keys in a DQD results JSON file between snakecase and camelcase. This allows reading of v2.1.0+ JSON files in older DQD versions, and other conversions which may be necessary for secondary use of the DQD results file. See [function documentation](https://ohdsi.github.io/DataQualityDashboard/reference/convertJsonResultsFileCase.html) for details
9+
10+
### Bugfixes
11+
12+
- In the v2.1.0 release, all DQD variables were converted from snakecase to camelcase, including those in the results JSON file. This resulted in errors for users trying to view results files generated by older DQD versions in DQD v2.1.0+. This issue has now been fixed. `viewDqDashboard` will now automatically convert the case of pre-v2.1.0 results files to camelcase so that older results files may be viewed in v2.3.0+
13+
14+
115
DataQualityDashboard 2.2.0
216
==========================
317
This release includes:
@@ -60,7 +74,7 @@ This release includes:
6074
- **withinVisitDates** looks at clinical facts and the visits they are associated with to make sure that the visit dates occur within one week on either side of the visit
6175
- **plausibleUnitConceptIds** identifies records with invalid Unit_Concept_Ids by Measurement_Concept_Id
6276

63-
### outputFolder input paramater
77+
### outputFolder input parameter
6478

6579
- The `outputFolder` parameter for the `executeDqChecks` function is now REQUIRED and no longer has a default value. **This may be a breaking change for users who have not specified this parameter in their script to run DQD.**
6680

R/convertResultsCase.R

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# Copyright 2023 Observational Health Data Sciences and Informatics
2+
#
3+
# This file is part of DataQualityDashboard
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
#' @title Convert JSON results file case
18+
#'
19+
#' @description Convert a DQD JSON results file between camelcase and (all-caps) snakecase. Enables viewing of pre-v.2.1.0 results files in later DQD versions, and vice versa
20+
#'
21+
#' @param jsonFilePath Path to the JSON results file to be converted
22+
#' @param writeToFile Whether or not to write the converted results back to a file (must be either TRUE or FALSE)
23+
#' @param outputFolder The folder to output the converted JSON results file to
24+
#' @param outputFile (OPTIONAL) File to write converted results JSON object to. Default is name of input file with a "_camel" or "_snake" postfix
25+
#' @param targetCase Case into which the results file parameters should be converted (must be either "camel" or "snake")
26+
#'
27+
#' @returns DQD results object (a named list)
28+
#'
29+
#' @importFrom jsonlite fromJSON
30+
#' @importFrom SqlRender snakeCaseToCamelCase camelCaseToSnakeCase
31+
#' @importFrom dplyr rename_with
32+
#' @importFrom tools file_path_sans_ext
33+
#'
34+
#' @export
35+
36+
convertJsonResultsFileCase <- function(
37+
jsonFilePath,
38+
writeToFile,
39+
outputFolder = NA,
40+
outputFile = "",
41+
targetCase) {
42+
if (!any(targetCase %in% c("camel", "snake"))) {
43+
stop("targetCase must be either 'camel' or 'snake'.")
44+
}
45+
stopifnot(is.logical(writeToFile))
46+
if (writeToFile && is.na(outputFolder)) {
47+
stop("You must specify an output folder if writing to file.")
48+
}
49+
50+
results <- jsonlite::fromJSON(jsonFilePath)
51+
52+
if ("numViolatedRows" %in% names(results$CheckResults) && targetCase == "camel") {
53+
warning("File is already in camelcase! No conversion will be performed.")
54+
return(results)
55+
}
56+
if ("NUM_VIOLATED_ROWS" %in% names(results$CheckResults) && targetCase == "snake") {
57+
warning("File is already in snakecase! No conversion will be performed.")
58+
return(results)
59+
}
60+
61+
if (targetCase == "camel") {
62+
swapFunction <- SqlRender::snakeCaseToCamelCase
63+
} else {
64+
swapFunction <- function(x) {
65+
toupper(SqlRender::camelCaseToSnakeCase(x))
66+
}
67+
}
68+
69+
results$Metadata <- dplyr::rename_with(results$Metadata, swapFunction)
70+
results$CheckResults <- dplyr::rename_with(results$CheckResults, swapFunction, -c("checkId"))
71+
72+
if (writeToFile) {
73+
if (nchar(outputFile) == 0) {
74+
jsonFile <- tools::file_path_sans_ext(basename(jsonFilePath))
75+
outputFile <- paste(jsonFile, "_", targetCase, ".json", sep = "")
76+
}
77+
.writeResultsToJson(results, outputFolder, outputFile)
78+
}
79+
80+
return(results)
81+
}

R/executeDqChecks.R

+17-3
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,13 @@
2525
#' @param numThreads The number of concurrent threads to use to execute the queries
2626
#' @param cdmSourceName The name of the CDM data source
2727
#' @param sqlOnly Should the SQLs be executed (FALSE) or just returned (TRUE)?
28+
#' @param sqlOnlyUnionCount (OPTIONAL) In sqlOnlyIncrementalInsert mode, how many SQL commands to union in each query to insert check results into results table (can speed processing when queries done in parallel). Default is 1.
29+
#' @param sqlOnlyIncrementalInsert (OPTIONAL) In sqlOnly mode, boolean to determine whether to generate SQL queries that insert check results and associated metadata into results table. Default is FALSE (for backwards compatibility to <= v2.2.0)
2830
#' @param outputFolder The folder to output logs, SQL files, and JSON results file to
2931
#' @param outputFile (OPTIONAL) File to write results JSON object
3032
#' @param verboseMode Boolean to determine if the console will show all execution steps. Default is FALSE
3133
#' @param writeToTable Boolean to indicate if the check results will be written to the dqdashboard_results table in the resultsDatabaseSchema. Default is TRUE
32-
#' @param writeTableName The name of the results table. Defaults to `dqdashboard_results`.
34+
#' @param writeTableName The name of the results table. Defaults to `dqdashboard_results`. Used when sqlOnly or writeToTable is True.
3335
#' @param writeToCsv Boolean to indicate if the check results will be written to a csv file. Default is FALSE
3436
#' @param csvFile (OPTIONAL) CSV file to write results
3537
#' @param checkLevels Choose which DQ check levels to execute. Default is all 3 (TABLE, FIELD, CONCEPT)
@@ -64,6 +66,8 @@ executeDqChecks <- function(connectionDetails,
6466
cdmSourceName,
6567
numThreads = 1,
6668
sqlOnly = FALSE,
69+
sqlOnlyUnionCount = 1,
70+
sqlOnlyIncrementalInsert = FALSE,
6771
outputFolder,
6872
outputFile = "",
6973
verboseMode = FALSE,
@@ -93,6 +97,8 @@ executeDqChecks <- function(connectionDetails,
9397
stopifnot(is.character(cdmDatabaseSchema), is.character(resultsDatabaseSchema), is.numeric(numThreads))
9498
stopifnot(is.character(cdmSourceName), is.logical(sqlOnly), is.character(outputFolder), is.logical(verboseMode))
9599
stopifnot(is.logical(writeToTable), is.character(checkLevels))
100+
stopifnot(is.numeric(sqlOnlyUnionCount) && sqlOnlyUnionCount > 0)
101+
stopifnot(is.logical(sqlOnlyIncrementalInsert))
96102
stopifnot(is.character(cohortDatabaseSchema), is.character(cohortTableName))
97103

98104
if (!all(checkLevels %in% c("TABLE", "FIELD", "CONCEPT"))) {
@@ -128,7 +134,10 @@ executeDqChecks <- function(connectionDetails,
128134
metadata$dqdVersion <- as.character(packageVersion("DataQualityDashboard"))
129135
DatabaseConnector::disconnect(connection)
130136
} else {
131-
metadata <- NA
137+
metadata <- data.frame(
138+
dqdVersion = as.character(packageVersion("DataQualityDashboard")),
139+
cdmSourceName = cdmSourceName
140+
)
132141
}
133142

134143
# Setup output folder ------------------------------------------------------------------------------------------------------------
@@ -259,10 +268,14 @@ executeDqChecks <- function(connectionDetails,
259268
connection,
260269
cdmDatabaseSchema,
261270
vocabDatabaseSchema,
271+
resultsDatabaseSchema,
272+
writeTableName,
262273
cohortDatabaseSchema,
263274
cohortTableName,
264275
cohortDefinitionId,
265276
outputFolder,
277+
sqlOnlyUnionCount,
278+
sqlOnlyIncrementalInsert,
266279
sqlOnly,
267280
progressBar = TRUE
268281
)
@@ -310,9 +323,10 @@ executeDqChecks <- function(connectionDetails,
310323
.writeResultsToJson(allResults, outputFolder, outputFile)
311324

312325
ParallelLogger::logInfo("Execution Complete")
326+
} else {
327+
.writeDDL(resultsDatabaseSchema, writeTableName, connectionDetails$dbms, outputFolder)
313328
}
314329

315-
316330
# write to table ----------------------------------------------------------------------
317331

318332
if (!sqlOnly && writeToTable) {

R/listChecks.R

+12-56
Original file line numberDiff line numberDiff line change
@@ -35,65 +35,21 @@ listDqChecks <- function(cdmVersion = "5.3", tableCheckThresholdLoc = "default",
3535
sprintf("OMOP_CDMv%s_Check_Descriptions.csv", cdmVersion),
3636
package = "DataQualityDashboard"
3737
))
38-
dqChecks$checkDescriptions <- as.data.frame(dqChecks$checkDescriptions)
3938

39+
dqChecks$tableChecks <- .readThresholdFile(
40+
checkThresholdLoc = tableCheckThresholdLoc,
41+
defaultLoc = sprintf("OMOP_CDMv%s_Table_Level.csv", cdmVersion)
42+
)
4043

41-
if (tableCheckThresholdLoc == "default") {
42-
dqChecks$tableChecks <-
43-
read_csv(
44-
system.file(
45-
"csv",
46-
sprintf("OMOP_CDMv%s_Table_Level.csv", cdmVersion),
47-
package = "DataQualityDashboard"
48-
),
49-
na = c(" ", "")
50-
)
51-
dqChecks$tableChecks <- as.data.frame(dqChecks$tableChecks)
52-
} else {
53-
dqChecks$tableChecks <- read_csv(
54-
tableCheckThresholdLoc,
55-
na = c(" ", "")
56-
)
57-
dqChecks$tableChecks <- as.data.frame(dqChecks$tableChecks)
58-
}
44+
dqChecks$fieldChecks <- .readThresholdFile(
45+
checkThresholdLoc = fieldCheckThresholdLoc,
46+
defaultLoc = sprintf("OMOP_CDMv%s_Field_Level.csv", cdmVersion)
47+
)
5948

60-
if (fieldCheckThresholdLoc == "default") {
61-
dqChecks$fieldChecks <-
62-
read_csv(
63-
system.file(
64-
"csv",
65-
sprintf("OMOP_CDMv%s_Field_Level.csv", cdmVersion),
66-
package = "DataQualityDashboard"
67-
),
68-
na = c(" ", "")
69-
)
70-
dqChecks$fieldChecks <- as.data.frame(dqChecks$fieldChecks)
71-
} else {
72-
dqChecks$fieldChecks <- read_csv(
73-
fieldCheckThresholdLoc,
74-
na = c(" ", "")
75-
)
76-
dqChecks$fieldChecks <- as.data.frame(dqChecks$fieldChecks)
77-
}
78-
79-
if (conceptCheckThresholdLoc == "default") {
80-
dqChecks$conceptChecks <-
81-
read_csv(
82-
system.file(
83-
"csv",
84-
sprintf("OMOP_CDMv%s_Concept_Level.csv", cdmVersion),
85-
package = "DataQualityDashboard"
86-
),
87-
na = c(" ", "")
88-
)
89-
dqChecks$conceptChecks <- as.data.frame(dqChecks$conceptChecks)
90-
} else {
91-
dqChecks$conceptChecks <- read_csv(
92-
conceptCheckThresholdLoc,
93-
na = c(" ", "")
94-
)
95-
dqChecks$conceptChecks <- as.data.frame(dqChecks$conceptChecks)
96-
}
49+
dqChecks$conceptChecks <- .readThresholdFile(
50+
checkThresholdLoc = conceptCheckThresholdLoc,
51+
defaultLoc = sprintf("OMOP_CDMv%s_Concept_Level.csv", cdmVersion)
52+
)
9753

9854
return(dqChecks)
9955
}

R/runCheck.R

+32-6
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,14 @@
2424
#' @param connection A connection for connecting to the CDM database using the DatabaseConnector::connect(connectionDetails) function.
2525
#' @param cdmDatabaseSchema The fully qualified database name of the CDM schema
2626
#' @param vocabDatabaseSchema The fully qualified database name of the vocabulary schema (default is to set it as the cdmDatabaseSchema)
27+
#' @param resultsDatabaseSchema The fully qualified database name of the results schema
28+
#' @param writeTableName The table tor write DQD results to. Used when sqlOnly or writeToTable is True.
2729
#' @param cohortDatabaseSchema The schema where the cohort table is located.
2830
#' @param cohortTableName The name of the cohort table.
2931
#' @param cohortDefinitionId The cohort definition id for the cohort you wish to run the DQD on. The package assumes a standard OHDSI cohort table called 'Cohort'
3032
#' @param outputFolder The folder to output logs and SQL files to
33+
#' @param sqlOnlyUnionCount (OPTIONAL) How many SQL commands to union before inserting them into output table (speeds processing when queries done in parallel). Default is 1.
34+
#' @param sqlOnlyIncrementalInsert (OPTIONAL) Boolean to determine whether insert check results and associated metadata into output table. Default is FALSE (for backwards compatability to <= v2.2.0)
3135
#' @param sqlOnly Should the SQLs be executed (FALSE) or just returned (TRUE)?
3236
#'
3337
#' @import magrittr
@@ -42,10 +46,14 @@
4246
connection,
4347
cdmDatabaseSchema,
4448
vocabDatabaseSchema,
49+
resultsDatabaseSchema,
50+
writeTableName,
4551
cohortDatabaseSchema,
4652
cohortTableName,
4753
cohortDefinitionId,
4854
outputFolder,
55+
sqlOnlyUnionCount,
56+
sqlOnlyIncrementalInsert,
4957
sqlOnly) {
5058
ParallelLogger::logInfo(sprintf("Processing check description: %s", checkDescription$checkName))
5159

@@ -62,10 +70,6 @@
6270
cohort <- FALSE
6371
}
6472

65-
if (sqlOnly) {
66-
unlink(file.path(outputFolder, sprintf("%s.sql", checkDescription$checkName)))
67-
}
68-
6973
if (nrow(checks) > 0) {
7074
dfs <- apply(X = checks, MARGIN = 1, function(check) {
7175
columns <- lapply(names(check), function(c) {
@@ -88,7 +92,19 @@
8892

8993
sql <- do.call(SqlRender::loadRenderTranslateSql, params)
9094

91-
if (sqlOnly) {
95+
if (sqlOnly && sqlOnlyIncrementalInsert) {
96+
checkQuery <- .createSqlOnlyQueries(
97+
params,
98+
check,
99+
tableChecks,
100+
fieldChecks,
101+
conceptChecks,
102+
sql,
103+
connectionDetails,
104+
checkDescription
105+
)
106+
data.frame(query = checkQuery)
107+
} else if (sqlOnly) {
92108
write(x = sql, file = file.path(
93109
outputFolder,
94110
sprintf("%s.sql", checkDescription$checkName)
@@ -105,7 +121,17 @@
105121
)
106122
}
107123
})
108-
do.call(rbind, dfs)
124+
125+
dfs <- do.call(rbind, dfs)
126+
127+
if (sqlOnlyIncrementalInsert) {
128+
sqlToUnion <- dfs$query
129+
if (length(sqlToUnion) > 0) {
130+
.writeSqlOnlyQueries(sqlToUnion, sqlOnlyUnionCount, resultsDatabaseSchema, writeTableName, connectionDetails$dbms, outputFolder, checkDescription)
131+
}
132+
} else {
133+
dfs
134+
}
109135
} else {
110136
ParallelLogger::logWarn(paste0("Warning: Evaluation resulted in no checks: ", filterExpression))
111137
data.frame()

0 commit comments

Comments
 (0)