From d2fbc0fe86bfb1761ecc9f802e4ae9c8e74ead98 Mon Sep 17 00:00:00 2001 From: Julian Gilbey Date: Sat, 27 Nov 2021 22:16:20 +0000 Subject: [PATCH 01/11] Allow PostgreSQL port setting and slightly simplify Makefile in the process --- mimic_direct_extract.py | 3 +++ utils/Makefile | 8 ++++---- utils/build_curated_from_psql.sh | 1 + utils/setup_user_env.sh | 12 +++++++----- 4 files changed, 15 insertions(+), 9 deletions(-) mode change 100644 => 100755 utils/build_curated_from_psql.sh diff --git a/mimic_direct_extract.py b/mimic_direct_extract.py index 9c6a66f..6e13eb8 100644 --- a/mimic_direct_extract.py +++ b/mimic_direct_extract.py @@ -732,6 +732,8 @@ def plot_variable_histograms(col_names, df): ap.add_argument('--psql_host', type=str, default=None, help='Postgres host. Try "/var/run/postgresql/" for Unix domain socket errors.') + ap.add_argument('--psql_port', type=int, default=None, + help='Postgres port. Defaults to 5432 if not provided.') ap.add_argument('--psql_dbname', type=str, default='mimic', help='Postgres database name.') ap.add_argument('--psql_schema_name', type=str, default='mimiciii', @@ -804,6 +806,7 @@ def plot_variable_histograms(col_names, df): schema_name = args['psql_schema_name'] query_args = {'dbname': dbname} if args['psql_host'] is not None: query_args['host'] = args['psql_host'] + if args['psql_port'] is not None: query_args['port'] = args['psql_port'] if args['psql_user'] is not None: query_args['user'] = args['psql_user'] if args['psql_password'] is not None: query_args['password'] = args['psql_password'] diff --git a/utils/Makefile b/utils/Makefile index f8c9d35..c37cee4 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -37,7 +37,7 @@ build_concepts_mimic_code: setup_user_env.sh clone_mimic_code_repo { \ source ./setup_user_env.sh; \ cd ${MIMIC_CODE_DIR}/concepts; \ - psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./make-concepts.sql; \ + psql "${DBSTRING}" -f ./make-concepts.sql; \ cd ../../MIMIC_Extract/utils; \ } @@ -45,9 +45,9 @@ build_concepts_mimic_code: setup_user_env.sh clone_mimic_code_repo build_extra_concepts: setup_user_env.sh niv-durations.sql crystalloid-bolus.sql colloid-bolus.sql { \ source ./setup_user_env.sh; \ - psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./niv-durations.sql; \ - psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./crystalloid-bolus.sql; \ - psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./colloid-bolus.sql; \ + psql "${DBSTRING}" -f ./niv-durations.sql; \ + psql "${DBSTRING}" -f ./crystalloid-bolus.sql; \ + psql "${DBSTRING}" -f ./colloid-bolus.sql; \ } #=== Env Checks diff --git a/utils/build_curated_from_psql.sh b/utils/build_curated_from_psql.sh old mode 100644 new mode 100755 index 342decb..0d8fd4c --- a/utils/build_curated_from_psql.sh +++ b/utils/build_curated_from_psql.sh @@ -26,4 +26,5 @@ python -u $MIMIC_EXTRACT_CODE_DIR/mimic_direct_extract.py \ --pop_size $POP_SIZE \ --psql_password $PGPASSWORD \ --psql_host $HOST \ + --psql_port $PORT \ --min_percent 0 \ diff --git a/utils/setup_user_env.sh b/utils/setup_user_env.sh index 2dfb58a..0069a54 100755 --- a/utils/setup_user_env.sh +++ b/utils/setup_user_env.sh @@ -8,14 +8,16 @@ export MIMIC_DATA_DIR=$MIMIC_EXTRACT_CODE_DIR/data/ export MIMIC_EXTRACT_OUTPUT_DIR=$MIMIC_DATA_DIR/curated/ mkdir -p $MIMIC_EXTRACT_OUTPUT_DIR -export DBUSER=bnestor +export DBUSER=mimic export DBNAME=mimic export SCHEMA=mimiciii -export HOST=mimic -export DBSTRING="dbname=$DBNAME options=--search_path=$SCHEMA" -alias psql="psql -h $HOST -U $DBUSER " +export HOST=localhost +export PORT=5432 +export PGPASSWORD=mimic + +export DBSTRING="host=$HOST port=$PORT user=$DBUSER password=$DBPASSWORD dbname=$DBNAME options=--search_path=$SCHEMA" export PGHOST=$HOST +export PGPORT=$PORT export PGUSER=$DBUSER -export PGPASSWORD=$1 From 12b000175f71d3c0fd8e6b2d70c0928e8d0287c2 Mon Sep 17 00:00:00 2001 From: Julian Gilbey Date: Sat, 27 Nov 2021 22:45:27 +0000 Subject: [PATCH 02/11] Modify postgres_make_extended_concepts.sh to allow more flexible #sql connection strings --- README.md | 4 +++- utils/postgres_make_extended_concepts.sh | 18 ++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index b753b55..f317ae4 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,9 @@ bash postgres_make_concepts.sh Next, you'll need to build 3 additional materialized views necessary for this pipeline. To do this (again with schema edit permission), navigate to `utils` and run `bash postgres_make_extended_concepts.sh` followed by -`psql -d mimic -f niv-durations.sql`. +`psql -d mimic -f niv-durations.sql`. (You can add extra `psql` +connection parameters; see the start of +`postgres_make_extended_concepts.sh` for details.) ## Step 4: Set Cohort Selection and Extraction Criteria diff --git a/utils/postgres_make_extended_concepts.sh b/utils/postgres_make_extended_concepts.sh index e4d049d..f38f37d 100644 --- a/utils/postgres_make_extended_concepts.sh +++ b/utils/postgres_make_extended_concepts.sh @@ -1,16 +1,22 @@ # This file makes tables for the concepts in this subfolder. # Be sure to run postgres-functions.sql first, as the concepts rely on those function definitions. # Note that this may take a large amount of time and hard drive space. +# +# Exporting DBCONNEXTRA before calling this script will add this to the +# connection string. For example, running: +# DBCONNEXTRA="user=mimic password=mimic" bash postgres_make_extended_concepts.sh +# will add these settings to all of the psql calls. (Note that "dbname" +# and "search_path" do not need to be set.) # string replacements are necessary for some queries -export REGEX_DATETIME_DIFF="s/DATETIME_DIFF\((.+?),\s?(.+?),\s?(DAY|MINUTE|SECOND|HOUR|YEAR)\)/DATETIME_DIFF(\1, \2, '\3')/g" -export REGEX_SCHEMA='s/`physionet-data.(mimiciii_clinical|mimiciii_derived|mimiciii_notes).(.+?)`/\2/g' -export CONNSTR='-d mimic' +REGEX_DATETIME_DIFF="s/DATETIME_DIFF\((.+?),\s?(.+?),\s?(DAY|MINUTE|SECOND|HOUR|YEAR)\)/DATETIME_DIFF(\1, \2, '\3')/g" +REGEX_SCHEMA='s/`physionet-data.(mimiciii_clinical|mimiciii_derived|mimiciii_notes).(.+?)`/\2/g' +CONNSTR="dbname=mimic $DBCONNEXTRA" # this is set as the search_path variable for psql # a search path of "public,mimiciii" will search both public and mimiciii # schemas for data, but will create tables on the public schema -export PSQL_PREAMBLE='SET search_path TO public,mimiciii' +PSQL_PREAMBLE='SET search_path TO public,mimiciii' echo '' echo '===' @@ -21,7 +27,7 @@ echo '===' echo '' echo 'Directory 5 of 9: fluid_balance' -{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS colloid_bolus; CREATE TABLE colloid_bolus AS "; cat $MIMIC_CODE_DIR/concepts/fluid_balance/colloid_bolus.sql; } | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | psql ${CONNSTR} -{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS crystalloid_bolus; CREATE TABLE crystalloid_bolus AS "; cat $MIMIC_CODE_DIR/concepts/fluid_balance/crystalloid_bolus.sql; } | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | psql ${CONNSTR} +{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS colloid_bolus; CREATE TABLE colloid_bolus AS "; cat $MIMIC_CODE_DIR/concepts/fluid_balance/colloid_bolus.sql; } | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | psql "${CONNSTR}" +{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS crystalloid_bolus; CREATE TABLE crystalloid_bolus AS "; cat $MIMIC_CODE_DIR/concepts/fluid_balance/crystalloid_bolus.sql; } | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | psql "${CONNSTR}" echo 'Finished creating tables.' From d71fc4622a705629ac27c6a36787b1ab403202bf Mon Sep 17 00:00:00 2001 From: Julian Gilbey Date: Sun, 28 Nov 2021 07:47:50 +0000 Subject: [PATCH 03/11] Allow make build_curated_from_psql to use the local socket by default --- mimic_direct_extract.py | 2 ++ utils/setup_user_env.sh | 14 +++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/mimic_direct_extract.py b/mimic_direct_extract.py index 6e13eb8..938e438 100644 --- a/mimic_direct_extract.py +++ b/mimic_direct_extract.py @@ -764,6 +764,8 @@ def plot_variable_histograms(col_names, df): args = vars(ap.parse_args()) for key in sorted(args.keys()): print(key, args[key]) + if args["psql_host"] == "SOCKET": + args["psql_host"] = None if not isdir(args['resource_path']): raise ValueError("Invalid resource_path: %s" % args['resource_path']) diff --git a/utils/setup_user_env.sh b/utils/setup_user_env.sh index 0069a54..bd31ffd 100755 --- a/utils/setup_user_env.sh +++ b/utils/setup_user_env.sh @@ -11,13 +11,13 @@ mkdir -p $MIMIC_EXTRACT_OUTPUT_DIR export DBUSER=mimic export DBNAME=mimic export SCHEMA=mimiciii -export HOST=localhost +export HOST=SOCKET export PORT=5432 export PGPASSWORD=mimic -export DBSTRING="host=$HOST port=$PORT user=$DBUSER password=$DBPASSWORD dbname=$DBNAME options=--search_path=$SCHEMA" - -export PGHOST=$HOST -export PGPORT=$PORT -export PGUSER=$DBUSER - +if [ $HOST = SOCKET ] +then + export DBSTRING="port=$PORT user=$DBUSER password=$DBPASSWORD dbname=$DBNAME options=--search_path=$SCHEMA" +else + export DBSTRING="host=$HOST port=$PORT user=$DBUSER password=$DBPASSWORD dbname=$DBNAME options=--search_path=$SCHEMA" +fi From 96aecadc34b0c648dcda39e3b6d16a3fa69e6583 Mon Sep 17 00:00:00 2001 From: Julian Gilbey Date: Sun, 28 Nov 2021 07:49:07 +0000 Subject: [PATCH 04/11] Use consistent quoting --- mimic_direct_extract.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mimic_direct_extract.py b/mimic_direct_extract.py index 938e438..0f6b9c4 100644 --- a/mimic_direct_extract.py +++ b/mimic_direct_extract.py @@ -764,8 +764,8 @@ def plot_variable_histograms(col_names, df): args = vars(ap.parse_args()) for key in sorted(args.keys()): print(key, args[key]) - if args["psql_host"] == "SOCKET": - args["psql_host"] = None + if args['psql_host'] == "SOCKET": + args['psql_host'] = None if not isdir(args['resource_path']): raise ValueError("Invalid resource_path: %s" % args['resource_path']) From f7166d270df66771f9ff5b5fe29cf0ff1d941190 Mon Sep 17 00:00:00 2001 From: Julian Gilbey Date: Mon, 29 Nov 2021 09:18:41 +0000 Subject: [PATCH 05/11] Fix username/password variable name issues --- utils/build_curated_from_psql.sh | 3 ++- utils/setup_user_env.sh | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/build_curated_from_psql.sh b/utils/build_curated_from_psql.sh index 0d8fd4c..91b2d82 100755 --- a/utils/build_curated_from_psql.sh +++ b/utils/build_curated_from_psql.sh @@ -24,7 +24,8 @@ python -u $MIMIC_EXTRACT_CODE_DIR/mimic_direct_extract.py \ --exit_after_loading 0 \ --plot_hist 0 \ --pop_size $POP_SIZE \ - --psql_password $PGPASSWORD \ + --psql_user $DBUSER \ + --psql_password $DBPASSWORD \ --psql_host $HOST \ --psql_port $PORT \ --min_percent 0 \ diff --git a/utils/setup_user_env.sh b/utils/setup_user_env.sh index bd31ffd..ffa1c8d 100755 --- a/utils/setup_user_env.sh +++ b/utils/setup_user_env.sh @@ -10,10 +10,10 @@ mkdir -p $MIMIC_EXTRACT_OUTPUT_DIR export DBUSER=mimic export DBNAME=mimic +export DBPASSWORD=mimic export SCHEMA=mimiciii export HOST=SOCKET export PORT=5432 -export PGPASSWORD=mimic if [ $HOST = SOCKET ] then From 848f672ae75821f683b78b8565ea760b2bb29cc4 Mon Sep 17 00:00:00 2001 From: Julian Gilbey Date: Mon, 29 Nov 2021 09:21:59 +0000 Subject: [PATCH 06/11] Add public to search_path, as that is where mimic-code stores the concepts --- mimic_direct_extract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mimic_direct_extract.py b/mimic_direct_extract.py index 0f6b9c4..627944f 100644 --- a/mimic_direct_extract.py +++ b/mimic_direct_extract.py @@ -805,7 +805,7 @@ def plot_variable_histograms(col_names, df): idx_hd5_filename = splitext(idx_hd5_filename)[0] + '_' + pop_size + splitext(idx_hd5_filename)[1] dbname = args['psql_dbname'] - schema_name = args['psql_schema_name'] + schema_name = 'public,' + args['psql_schema_name'] query_args = {'dbname': dbname} if args['psql_host'] is not None: query_args['host'] = args['psql_host'] if args['psql_port'] is not None: query_args['port'] = args['psql_port'] From 591f354bf52db508f2b3b079afa384a96b7cd6ae Mon Sep 17 00:00:00 2001 From: Julian Gilbey Date: Mon, 29 Nov 2021 11:00:50 +0000 Subject: [PATCH 07/11] Handle deprecation and removal of .ix() method from pandas --- mimic_direct_extract.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mimic_direct_extract.py b/mimic_direct_extract.py index 627944f..5519de8 100644 --- a/mimic_direct_extract.py +++ b/mimic_direct_extract.py @@ -146,8 +146,8 @@ def save_pop( def get_variable_mapping(mimic_mapping_filename): # Read in the second level mapping of the itemids var_map = pd.read_csv(mimic_mapping_filename, index_col=None) - var_map = var_map.ix[(var_map['LEVEL2'] != '') & (var_map['COUNT']>0)] - var_map = var_map.ix[(var_map['STATUS'] == 'ready')] + var_map = var_map[(var_map['LEVEL2'] != '') & (var_map['COUNT']>0)] + var_map = var_map[(var_map['STATUS'] == 'ready')] var_map['ITEMID'] = var_map['ITEMID'].astype(int) return var_map From 8ca8618cc56b8b186d4afef9f7d355be897b8adb Mon Sep 17 00:00:00 2001 From: Julian Gilbey Date: Mon, 29 Nov 2021 11:04:51 +0000 Subject: [PATCH 08/11] Replace rename_axis() method by correct rename() method --- mimic_direct_extract.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mimic_direct_extract.py b/mimic_direct_extract.py index 5519de8..3d3db8c 100644 --- a/mimic_direct_extract.py +++ b/mimic_direct_extract.py @@ -231,8 +231,8 @@ def save_numerics( var_map = var_map[ ['LEVEL2', 'ITEMID', 'LEVEL1'] - ].rename_axis( - {'LEVEL2': 'LEVEL2', 'LEVEL1': 'LEVEL1', 'ITEMID': 'itemid'}, axis=1 + ].rename( + columns={'LEVEL2': 'LEVEL2', 'LEVEL1': 'LEVEL1', 'ITEMID': 'itemid'} ).set_index('itemid') X['value'] = pd.to_numeric(X['value'], 'coerce') From 84c762e20010bcb08d7c167e26dd458745825e78 Mon Sep 17 00:00:00 2001 From: Julian Gilbey Date: Mon, 29 Nov 2021 18:43:53 +0000 Subject: [PATCH 09/11] Further pandas updates --- mimic_direct_extract.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mimic_direct_extract.py b/mimic_direct_extract.py index 3d3db8c..6ae2553 100644 --- a/mimic_direct_extract.py +++ b/mimic_direct_extract.py @@ -236,7 +236,7 @@ def save_numerics( ).set_index('itemid') X['value'] = pd.to_numeric(X['value'], 'coerce') - X.astype({k: int for k in ID_COLS}, inplace=True) + X = X.astype({k: int for k in ID_COLS}) to_hours = lambda x: max(0, x.days*24 + x.seconds // 3600) @@ -300,9 +300,9 @@ def save_numerics( # Get the max time for each of the subjects so we can reconstruct! if subjects_filename is not None: - np.save(os.path.join(outPath, subjects_filename), data['subject_id'].as_matrix()) + np.save(os.path.join(outPath, subjects_filename), data['subject_id'].to_numpy()) if times_filename is not None: - np.save(os.path.join(outPath, times_filename), data['max_hours'].as_matrix()) + np.save(os.path.join(outPath, times_filename), data['max_hours'].to_numpy()) #fix nan in count to be zero idx = pd.IndexSlice @@ -321,7 +321,7 @@ def save_numerics( X = X.drop(columns = drop_col) ######## - if dynamic_filename is not None: np.save(os.path.join(outPath, dynamic_filename), X.as_matrix()) + if dynamic_filename is not None: np.save(os.path.join(outPath, dynamic_filename), X.to_numpy()) if dynamic_hd5_filename is not None: X.to_hdf(os.path.join(outPath, dynamic_hd5_filename), 'X') return X From d52b979cd380725700d15a568dd4553615b7ac47 Mon Sep 17 00:00:00 2001 From: Julian Gilbey Date: Sun, 26 Dec 2021 20:04:10 +0000 Subject: [PATCH 10/11] Allow local settings to override setup_user_env.sh --- .gitignore | 3 +++ utils/setup_user_env.sh | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/.gitignore b/.gitignore index a2d2331..e702c82 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Local settings +utils/setup_user_env_local.sh + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/utils/setup_user_env.sh b/utils/setup_user_env.sh index ffa1c8d..cb26669 100755 --- a/utils/setup_user_env.sh +++ b/utils/setup_user_env.sh @@ -15,6 +15,11 @@ export SCHEMA=mimiciii export HOST=SOCKET export PORT=5432 +if [ -f "setup_user_env_local.sh" ] +then + . setup_user_env_local.sh +fi + if [ $HOST = SOCKET ] then export DBSTRING="port=$PORT user=$DBUSER password=$DBPASSWORD dbname=$DBNAME options=--search_path=$SCHEMA" From c077df95db9e053e6996dbae25389708d3feef93 Mon Sep 17 00:00:00 2001 From: Julian Gilbey Date: Sun, 2 Jan 2022 07:48:54 +0000 Subject: [PATCH 11/11] Fix local settings facility: only perform actions once local settings read --- utils/Makefile | 8 ++++---- utils/setup_user_env.sh | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/utils/Makefile b/utils/Makefile index c37cee4..1364113 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -1,8 +1,8 @@ SHELL:=/bin/bash -PSQL_EXECUTABLE:=$(shell which psql) +PSQL_EXECUTABLE:=${shell which psql} -MIMIC_CODE_DIR:=${shell grep "MIMIC_CODE_DIR" setup_user_env.sh | cut -d'=' -f2} +MIMIC_CODE_DIR:=${shell source ./setup_user_env.sh && echo $$MIMIC_CODE_DIR} #=== Commands @@ -53,7 +53,7 @@ build_extra_concepts: setup_user_env.sh niv-durations.sql crystalloid-bolus.sql #=== Env Checks .PHONY: has_psql_exe -has_psql_exe: setup_user_env.sh +has_psql_exe: ifndef PSQL_EXECUTABLE - $(error "Error: 'psql' is undefined. Please install/add to current path.") + ${error "Error: 'psql' is undefined. Please install/add to current path."} endif diff --git a/utils/setup_user_env.sh b/utils/setup_user_env.sh index cb26669..98df3b7 100755 --- a/utils/setup_user_env.sh +++ b/utils/setup_user_env.sh @@ -1,12 +1,9 @@ #!/bin/bash export MIMIC_CODE_DIR=$(realpath ../../mimic-code) -export MIMIC_EXTRACT_CODE_DIR=$(realpath ../) -export MIMIC_DATA_DIR=$MIMIC_EXTRACT_CODE_DIR/data/ - -export MIMIC_EXTRACT_OUTPUT_DIR=$MIMIC_DATA_DIR/curated/ -mkdir -p $MIMIC_EXTRACT_OUTPUT_DIR +export MIMIC_EXTRACT_CODE_DIR=$(realpath ..) +export MIMIC_EXTRACT_OUTPUT_DIR=$MIMIC_EXTRACT_CODE_DIR/data/curated export DBUSER=mimic export DBNAME=mimic @@ -15,11 +12,14 @@ export SCHEMA=mimiciii export HOST=SOCKET export PORT=5432 +# Allow users to override any of the above in a local configuration file if [ -f "setup_user_env_local.sh" ] then . setup_user_env_local.sh fi +mkdir -p $MIMIC_EXTRACT_OUTPUT_DIR + if [ $HOST = SOCKET ] then export DBSTRING="port=$PORT user=$DBUSER password=$DBPASSWORD dbname=$DBNAME options=--search_path=$SCHEMA"