-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #14 from OpenKBC/engineering_dev
Added docker-compose, dockerfile for jupyter notebook setting, confirmed
- Loading branch information
Showing
8 changed files
with
191 additions
and
111 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
FROM jupyter/datascience-notebook | ||
|
||
COPY notebook/installers/installer_Rpackage.R /installer_Rpackage.R | ||
COPY notebook/installers/requirements.txt /requirements.txt | ||
|
||
RUN Rscript /installer_Rpackage.R | ||
RUN pip install -r /requirements.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
version: "3" | ||
services: | ||
notebook: | ||
build: | ||
dockerfile: Dockerfile_jupyterNotebook | ||
volumes: | ||
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/notebook/notebook_lib:/home/jovyan/work/notebook_lib | ||
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/notebook/notebook_utils:/home/jovyan/work/notebook_utils | ||
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/notebook/resultFiles:/home/jovyan/work/resultFiles | ||
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/data:/home/jovyan/data | ||
ports: | ||
- 8888:8888 | ||
container_name: datascience-notebook-container |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,18 @@ | ||
## For running in conda | ||
name: utils_v1 | ||
channels: | ||
- defaults | ||
- conda-forge | ||
- bioconda | ||
dependencies: | ||
- ipykernel=6.2.0 | ||
- r-essentials=3.6.1 | ||
- pip: | ||
- feather-format==0.4.1 | ||
- numpy==1.21.2 | ||
- pandas==1.3.2 | ||
- pyarrow==5.0.0 | ||
- pytz==2021.1 | ||
- pytz==2021.1 | ||
- scikit-learn==0.24.2 | ||
- matplotlib==3.4.3 | ||
- seaborn==0.11.2 |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"source": [ | ||
"import pandas as pd\n", | ||
"\n", | ||
"## Utils and Library for notebook\n", | ||
"from notebook_lib.nwpv.nwpv import nwpv_calculation\n", | ||
"from notebook_utils.OpenKbcMSToolkit import ExtractionToolkit as exttoolkit\n", | ||
"\n", | ||
"# Root data path\n", | ||
"DATA_PATH = '../data/'\n", | ||
"\n", | ||
"# Sample loading\n", | ||
"gene_tpm = pd.read_feather(DATA_PATH+\"counts_normalized/counts_vst_CD8.feather\").set_index('index') # Load normalized CD8\n", | ||
"meta_data = pd.read_csv(DATA_PATH+'EPIC_HCvB_metadata_baseline_updated-share.csv')\n", | ||
"\n", | ||
"# Getting Sample Info\n", | ||
"# DiseaseStatus: ['MS' 'Unknown' 'CIS' 'Healthy']\n", | ||
"# DiseaseDuration(Early?)\n", | ||
"# DiseaseCourse: ['RR' 'PP' 'SP' 'RIS' 'CIS' 'Unknown' 'Healthy']\n", | ||
"sample_list, sample_category = exttoolkit.get_sample_name_by_category(dataframe=meta_data, sampleColumn='HCVB_ID', dataColname='DiseaseCourse')\n", | ||
"print(\"Sample Count\")\n", | ||
"count=0 \n", | ||
"for category, values in zip(sample_category, sample_list):\n", | ||
" print(category+\" : \"+str(len(values)) + \", List number : \" + str(count))\n", | ||
" count+=1" | ||
], | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stdout", | ||
"text": [ | ||
"Sample Count\n", | ||
"RR : 82, List number : 0\n", | ||
"PP : 14, List number : 1\n", | ||
"SP : 1, List number : 2\n", | ||
"RIS : 2, List number : 3\n", | ||
"CIS : 40, List number : 4\n", | ||
"Unknown : 1, List number : 5\n", | ||
"Healthy : 22, List number : 6\n" | ||
] | ||
} | ||
], | ||
"metadata": {} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"source": [ | ||
"duration_df = meta_data.dropna(subset=['DiseaseDuration']) # data for patient's duration set\n", | ||
"duration_df['DiseaseDuration'] = duration_df['DiseaseDuration'].astype(float) # make float\n", | ||
"\n", | ||
"## Long DD\n", | ||
"longDD_samples = duration_df.loc[ duration_df['DiseaseDuration'] >= duration_df['DiseaseDuration'].median(), 'HCVB_ID'] # Get sampleData which has longDD from metadata\n", | ||
"longDD_sampleList = list(set(gene_tpm.columns.tolist()).intersection(longDD_samples.values.tolist())) # Get intersected sampleID between expr and longDD\n", | ||
"longDD_meta = duration_df[duration_df['HCVB_ID'].isin(longDD_sampleList)] # Get meta with sampleNames\n", | ||
"longDD_gene_expr = gene_tpm[longDD_sampleList] # Get expr with sampleNames\n", | ||
"\n", | ||
"## Short DD\n", | ||
"shortDD_samples = duration_df.loc[ duration_df['DiseaseDuration'] < duration_df['DiseaseDuration'].median(), 'HCVB_ID']\n", | ||
"shortDD_sampleList = list(set(gene_tpm.columns.tolist()).intersection(shortDD_samples.values.tolist()))\n", | ||
"shortDD_meta = duration_df[duration_df['HCVB_ID'].isin(shortDD_samples.values.tolist())]\n", | ||
"shortDD_gene_expr = gene_tpm[shortDD_sampleList]" | ||
], | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stderr", | ||
"text": [ | ||
"/var/folders/sx/0rms4skn47nfn6svhhprv5700000gq/T/ipykernel_36505/3086605768.py:2: SettingWithCopyWarning: \n", | ||
"A value is trying to be set on a copy of a slice from a DataFrame.\n", | ||
"Try using .loc[row_indexer,col_indexer] = value instead\n", | ||
"\n", | ||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", | ||
" duration_df['DiseaseDuration'] = duration_df['DiseaseDuration'].astype(float) # make float\n" | ||
] | ||
} | ||
], | ||
"metadata": {} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"source": [ | ||
"# NWPV calculation\n", | ||
"#nwpv_class = nwpv_calculation(gene_tpm, shortDD_sampleList, longDD_sampleList)\n", | ||
"#nwpv_class.get_result().to_csv(\"nwpv_result_CD8_vst.csv\")\n", | ||
"#nwpv_df = pd.read_csv('resultFiles/nwpv_result_CD8_vst.csv')\n", | ||
"#nwpv_df[(nwpv_df['combined_pvalue_adj'] < 0.05)]" | ||
], | ||
"outputs": [], | ||
"metadata": {} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"source": [ | ||
"X = longDD_gene_expr.values[0]" | ||
], | ||
"outputs": [ | ||
{ | ||
"output_type": "execute_result", | ||
"data": { | ||
"text/plain": [ | ||
"72" | ||
] | ||
}, | ||
"metadata": {}, | ||
"execution_count": 10 | ||
} | ||
], | ||
"metadata": {} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"source": [], | ||
"outputs": [], | ||
"metadata": {} | ||
} | ||
], | ||
"metadata": { | ||
"orig_nbformat": 4, | ||
"language_info": { | ||
"name": "python", | ||
"version": "3.9.6", | ||
"mimetype": "text/x-python", | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"pygments_lexer": "ipython3", | ||
"nbconvert_exporter": "python", | ||
"file_extension": ".py" | ||
}, | ||
"kernelspec": { | ||
"name": "python3", | ||
"display_name": "Python 3.9.6 64-bit ('utils_v1': conda)" | ||
}, | ||
"interpreter": { | ||
"hash": "77a526a359b8fd796eb09814c2228805e7076f62d8d78ef70c860dff672df599" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
### install DESeq2, tximport packages from Bioconductor | ||
if (!requireNamespace("BiocManager", quietly = TRUE)) | ||
install.packages("BiocManager", repos='http://cran.us.r-project.org') | ||
BiocManager::install("DESeq2") | ||
BiocManager::install("tximport") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
feather-format==0.4.1 |