Skip to content

Commit c734b7e

Browse files
authored
Merge pull request #70 from OHDSI/develop
Release 0.5.10
2 parents d939010 + 65ebc9b commit c734b7e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+530
-145
lines changed

.Rbuildignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,6 @@ compare_versions
3535
LICENSE
3636
.git
3737
..Rcheck
38-
errorReportSql.txt
38+
errorReportSql.txt
39+
^CRAN-SUBMISSION$
40+
^cran-comments\.md$

CRAN-SUBMISSION

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Version: 0.5.10
2+
Date: 2024-08-21 04:10:48 UTC
3+
SHA: bb001b6745dfbc303db44ef96b9eb8aaa2f67901

DESCRIPTION

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: ResultModelManager
22
Title: Result Model Manager
3-
Version: 0.5.9
3+
Version: 0.5.10
44
Authors@R:
55
person("Jamie", "Gilbert", , "[email protected]", role = c("aut", "cre"))
66
Description: Database data model management utilities for R packages in the Observational Health Data Sciences and
@@ -35,7 +35,6 @@ Suggests:
3535
testthat (>= 3.0.0),
3636
RSQLite,
3737
duckdb,
38-
RPostgres,
3938
knitr,
4039
rmarkdown,
4140
keyring,

NEWS.md

+20
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,23 @@
1+
# ResultModelManager 0.5.10
2+
3+
Changes:
4+
5+
1. Using readr column types to work around issues with inconsistent type conversion between DBI and JDBC drivers.
6+
7+
Bug fixes:
8+
9+
1. Resolved issue where failed queries were being aborted inside the wrong connection
10+
in PooledConnectionHandler
11+
12+
2. Refactored pooled connection handler to better ensure checkout connections are returned
13+
14+
15+
# ResultModelManager 0.5.9
16+
17+
Changes:
18+
19+
1. More tidy cleanup of PooledConnectionHandlers to prevent leaked connections.
20+
121
# ResultModelManager 0.5.9
222

323
Changes:

R/ConnectionHandler.R

+22-18
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,20 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616

17+
# Limit row count is intended for web applications that may cause a denial of service
18+
.limitRowCount <- function(sql, overrideRowLimit) {
19+
limitRowCount <- as.integer(Sys.getenv("LIMIT_ROW_COUNT"))
20+
if (!is.na(limitRowCount) &
21+
limitRowCount > 0 &
22+
!overrideRowLimit) {
23+
sql <- SqlRender::render("SELECT TOP @limit_row_count * FROM (@query) result;",
24+
query = gsub(";$", "", sql), # Remove last semi-colon
25+
limit_row_count = limitRowCount
26+
)
27+
}
28+
return(sql)
29+
}
30+
1731
#' ConnectionHandler
1832
#' @description
1933
#' Class for handling DatabaseConnector:connection objects with consistent R6 interfaces for pooled and non-pooled connections.
@@ -111,10 +125,6 @@ ConnectionHandler <- R6::R6Class(
111125
#' Connects automatically if it isn't yet loaded
112126
#' @returns DatabaseConnector Connection instance
113127
getConnection = function() {
114-
if (is.null(self$con)) {
115-
self$initConnection()
116-
}
117-
118128
if (!self$dbIsValid()) {
119129
self$initConnection()
120130
}
@@ -168,15 +178,7 @@ ConnectionHandler <- R6::R6Class(
168178
#' @param ... Additional query parameters
169179
#' @returns boolean TRUE if connection is valid
170180
queryDb = function(sql, snakeCaseToCamelCase = self$snakeCaseToCamelCase, overrideRowLimit = FALSE, ...) {
171-
# Limit row count is intended for web applications that may cause a denial of service if they consume too many
172-
# resources.
173-
limitRowCount <- as.integer(Sys.getenv("LIMIT_ROW_COUNT"))
174-
if (!is.na(limitRowCount) & limitRowCount > 0 & !overrideRowLimit) {
175-
sql <- SqlRender::render("SELECT TOP @limit_row_count * FROM (@query) result;",
176-
query = gsub(";$", "", sql), # Remove last semi-colon
177-
limit_row_count = limitRowCount
178-
)
179-
}
181+
sql <- .limitRowCount(sql, overrideRowLimit)
180182
sql <- self$renderTranslateSql(sql, ...)
181183

182184
tryCatch(
@@ -203,7 +205,7 @@ ConnectionHandler <- R6::R6Class(
203205

204206
tryCatch(
205207
{
206-
data <- self$executeFunction(sql)
208+
self$executeFunction(sql)
207209
},
208210
error = function(error) {
209211
if (self$dbms() %in% c("postgresql", "redshift")) {
@@ -223,17 +225,19 @@ ConnectionHandler <- R6::R6Class(
223225
#' Does not translate or render sql.
224226
#' @param sql sql query string
225227
#' @param snakeCaseToCamelCase (Optional) Boolean. return the results columns in camel case (default)
226-
queryFunction = function(sql, snakeCaseToCamelCase = self$snakeCaseToCamelCase) {
227-
DatabaseConnector::querySql(self$getConnection(), sql, snakeCaseToCamelCase = snakeCaseToCamelCase)
228+
#' @param connection (Optional) connection object
229+
queryFunction = function(sql, snakeCaseToCamelCase = self$snakeCaseToCamelCase, connection = self$getConnection()) {
230+
DatabaseConnector::querySql(connection, sql, snakeCaseToCamelCase = snakeCaseToCamelCase)
228231
},
229232

230233
#' execute Function
231234
#' @description
232235
#' exec query Function that can be overriden with subclasses (e.g. use different base function or intercept query)
233236
#' Does not translate or render sql.
234237
#' @param sql sql query string
235-
executeFunction = function(sql) {
236-
DatabaseConnector::executeSql(self$getConnection(), sql)
238+
#' @param connection connection object
239+
executeFunction = function(sql, connection = self$getConnection()) {
240+
DatabaseConnector::executeSql(connection, sql)
237241
}
238242
)
239243
)

R/DataModel.R

+28-7
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ checkAndFixDataTypes <-
110110
table <- dplyr::mutate_at(table, i, as.numeric)
111111
}
112112
} else if (expectedType == "int") {
113-
if (observedTypes[i] != "integer") {
113+
if (!observedTypes[i] %in% c("integer", "numeric")) {
114114
ParallelLogger::logDebug(
115115
sprintf(
116116
"Column %s in table %s in results folder %s is of type %s, but was expecting %s. Attempting to convert.",
@@ -121,7 +121,7 @@ checkAndFixDataTypes <-
121121
expectedType
122122
)
123123
)
124-
table <- dplyr::mutate_at(table, i, as.integer)
124+
table <- dplyr::mutate_at(table, i, as.numeric)
125125
}
126126
} else if (expectedType == "varchar") {
127127
if (observedTypes[i] != "character") {
@@ -423,12 +423,14 @@ uploadTable <- function(tableName,
423423
purgeSiteDataBeforeUploading,
424424
warnOnMissingTable) {
425425
csvFileName <- paste0(tableName, ".csv")
426+
specifications <- specifications %>%
427+
dplyr::filter(.data$tableName == !!tableName)
428+
426429
if (csvFileName %in% list.files(resultsFolder)) {
427430
rlang::inform(paste0("Uploading file: ", csvFileName, " to table: ", tableName))
428431

429432
primaryKey <- specifications %>%
430-
dplyr::filter(.data$tableName == !!tableName &
431-
tolower(.data$primaryKey) == "yes") %>%
433+
dplyr::filter(tolower(.data$primaryKey) == "yes") %>%
432434
dplyr::select("columnName") %>%
433435
dplyr::pull()
434436

@@ -443,8 +445,7 @@ uploadTable <- function(tableName,
443445
env$purgeSiteDataBeforeUploading <- purgeSiteDataBeforeUploading
444446
if (purgeSiteDataBeforeUploading && "database_id" %in% primaryKey) {
445447
type <- specifications %>%
446-
dplyr::filter(.data$tableName == !!tableName &
447-
.data$columnName == "database_id") %>%
448+
dplyr::filter(.data$columnName == "database_id") %>%
448449
dplyr::select("dataType") %>%
449450
dplyr::pull()
450451
# Remove the existing data for the databaseId
@@ -477,12 +478,32 @@ uploadTable <- function(tableName,
477478
env$primaryKeyValuesInDb <- primaryKeyValuesInDb
478479
}
479480

481+
# Remove data size or types
482+
types <- sub(" ", "", sub("\\(.*\\)", "", specifications$dataType))
483+
484+
# Convert the types to readr's col_types format
485+
convertType <- Vectorize(
486+
function(type) {
487+
switch(type,
488+
varchar = "c",
489+
bigint = "n",
490+
int = "n",
491+
date = "D",
492+
"?"
493+
) # default to guess if type not matched
494+
}
495+
)
496+
497+
types <- convertType(types)
498+
# Create a named vector of column types
499+
names(types) <- specifications$columnName
500+
colTypes <- do.call(readr::cols, as.list(types))
480501

481502
readr::read_csv_chunked(
482503
file = file.path(resultsFolder, csvFileName),
483504
callback = function(chunk, pos) uploadChunk(chunk, pos, env, specifications, resultsFolder, connection, runCheckAndFixCommands, forceOverWriteOfSpecifications),
484505
chunk_size = 1e7,
485-
col_types = readr::cols(),
506+
col_types = colTypes,
486507
guess_max = 1e6,
487508
progress = FALSE
488509
)

0 commit comments

Comments
 (0)