OpenKBC
diff --git a/‎R/IDconverter.R
Lines changed: 22 additions & 0 deletions b/‎R/IDconverter.R
Lines changed: 22 additions & 0 deletions
diff --git a/‎R/README.md
Lines changed: 17 additions & 0 deletions b/‎R/README.md
Lines changed: 17 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 7 additions & 0 deletions b/‎README.md
Lines changed: 7 additions & 0 deletions
diff --git a/‎notebook/README.md
Lines changed: 5 additions & 1 deletion b/‎notebook/README.md
Lines changed: 5 additions & 1 deletion
diff --git a/‎notebook/RFECV_with_allgenes.ipynb
Lines changed: 233 additions & 0 deletions b/‎notebook/RFECV_with_allgenes.ipynb
Lines changed: 233 additions & 0 deletions
diff --git a/‎notebook/getDEG_with_nwpv.ipynb
Lines changed: 0 additions & 149 deletions b/‎notebook/getDEG_with_nwpv.ipynb
Lines changed: 0 additions & 149 deletions
diff --git a/‎notebook/installers/installer_Rpackage.R
Lines changed: 3 additions & 1 deletion b/‎notebook/installers/installer_Rpackage.R
Lines changed: 3 additions & 1 deletion
diff --git a/‎notebook/installers/requirements.txt
Lines changed: 2 additions & 1 deletion b/‎notebook/installers/requirements.txt
Lines changed: 2 additions & 1 deletion
diff --git a/‎notebook/notebook_lib/gene_zscore/threaded_gzscore.py
Lines changed: 80 additions & 0 deletions b/‎notebook/notebook_lib/gene_zscore/threaded_gzscore.py
Lines changed: 80 additions & 0 deletions
diff --git a/‎notebook/notebook_lib/nwpv/nwpv.py
Lines changed: 8 additions & 4 deletions b/‎notebook/notebook_lib/nwpv/nwpv.py
Lines changed: 8 additions & 4 deletions
diff --git a/‎notebook/notebook_utils/OpenKbcMSCalculator.py
Lines changed: 40 additions & 0 deletions b/‎notebook/notebook_utils/OpenKbcMSCalculator.py
Lines changed: 40 additions & 0 deletions
@@ -0,0 +1,22 @@
+# ID converter from Ensembl ID to Entrez ID, please cleanup data by using python code before using this code
+# Only working with csv file
+# Usage Rscript
+# Rscript IDconverter.R path/counts_norm_CD8.csv path/outputfile.csv
+
+library("AnnotationDbi")
+library("org.Hs.eg.db")
+
+args = commandArgs(trailingOnly=TRUE)
+inputFile = args[1] # with path
+outputFile = args[2] # with path
+
+#str_split
+data<-read.table(inputFile, row.names=1, sep=',', header=TRUE) # Read data
+names(data) <- sub("^X", "", names(data)) # drop "X" string in columns name
+
+### Warning ###
+# Entrez ID might duplicate for Ensemble ID
+data$entrez = mapIds(org.Hs.eg.db,  keys=row.names(data), column="ENTREZID", keytype="ENSEMBL", multiVals="first")
+row.names(data)<-make.names(data$entrez, unique=TRUE)
+row.names(data) <- sub("^X", "", row.names(data)) # drop "X" string in index name
+write.table(data, outputFile, sep=',', row.names = TRUE, col.names = TRUE) # Write result
@@ -0,0 +1,17 @@
+# R Utils for the project
+
+### Requirements
+```shell
+Rscript notebook/installers/installer_Rpackage.R
+```
+
+#### 1. deseq2_normalizaiton.R
+This code is an example to get normalized matrix from raw files, it does not have instruction
+
+#### 2. IDconverter.R
+This code is converter for Ensembl ID to Entrez ID, and input should be cleaned up. Input file should be CSV format.
+
+**Example:**
+```shell
+Rscript IDconverter.R inputpath/input.csv outputpath/output.csv
+```
@@ -17,3 +17,10 @@
 * Slides (Ask to members)
 * S3 Bucket (Ask to members)
 * https://openkbc.github.io/multiple_sclerosis_proj/
+
+### Usage of docker container
+* Use docker-compose for using jupyter notebook
+```
+docker-compose up
+```
+* Access http://localhost:8888/token
@@ -4,6 +4,7 @@
 
 ## Guide for docker volumes
 * Please mount or bind with this information
+* For getting data, please ask members to have s3 access 
 ```yaml
 ## Local path:container path
 - notebook/notebook_lib:/home/jovyan/work/notebook_lib
@@ -15,9 +16,12 @@
 ## Library List
 | Name | Description | Reference or link |
 |---------|---------|---------|
-| NWPV2 | DEG function with pvalue integration | [github](https://github.com/swiri021/NWPV2/blob/master/README.md), [paper](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3135688/) |
+| NWPV2 | DEG function with pvalue integration | [github](https://github.com/swiri021/NWPV2), [paper](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3135688/) |
+| gene_zscore | Getting Gene-set Zscore(Activation Score) for data | [github](https://github.com/swiri021/Threaded_gsZscore), [paper](https://genomebiology.biomedcentral.com/articles/10.1186/gb-2006-7-10-r93) |
+
 
 ## Utils List
 | Name | Description | Reference or link |
 |---------|---------|---------|
 | OpenKbcMSToolkit | Handy toolkit for data extraction | No reference |
+| OpenKbcMSCalculator | Advanced calculators for getting result | No reference |
@@ -2,4 +2,6 @@
 if (!requireNamespace("BiocManager", quietly = TRUE))
   install.packages("BiocManager", repos='http://cran.us.r-project.org')
   BiocManager::install("DESeq2")
-  BiocManager::install("tximport")
+  BiocManager::install("tximport")
+  BiocManager::install("AnnotationDbi")
+  BiocManager::install("org.Hs.eg.db")
@@ -1 +1,2 @@
-feather-format==0.4.1
+feather-format==0.4.1
+scikit-learn==0.24.2
@@ -0,0 +1,80 @@
+__author__ = "Junhee Yoon"
+__version__ = "1.0.0"
+__maintainer__ = "Junhee Yoon"
+__email__ = "[email protected]"
+
+"""
+Manual: https://github.com/swiri021/Threaded_gsZscore
+Reference: https://genomebiology.biomedcentral.com/articles/10.1186/gb-2006-7-10-r93
+Description: calculating activation score by using threaded z score
+"""
+import pandas as pd
+import numpy as np
+import threading
+import functools
+import itertools
+
+class funcThread(object):
+    def __init__(self):
+        print ("Loaded Threads")
+
+    def __call__(self, func):
+        @functools.wraps(func)
+        def run(*args, **kwargs):
+            print ("Number of Threads : %d"%(kwargs['nthread']))
+
+            threads = [None]*kwargs['nthread']
+            container = [None]*kwargs['nthread']
+
+            ####Divide Samples by number of threads
+            i_col = len(args[1].columns.tolist())
+            contents_numb = i_col/kwargs['nthread']
+            split_columns = [args[1].columns.tolist()[i:i+contents_numb] for i in range(0, len(args[1].columns.tolist()), contents_numb)]
+            if len(split_columns)>kwargs['nthread']:
+                split_columns = split_columns[:kwargs['nthread']-1] + [list(itertools.chain(*split_columns[kwargs['nthread']-1:]))]
+                #split_columns[len(split_columns)-2] = split_columns[len(split_columns)-2]+split_columns[len(split_columns)-1]
+                #split_columns = split_columns[:len(split_columns)-1]
+
+            ####Running threads
+            for i, item in enumerate(split_columns):
+                threads[i] = threading.Thread(target = func, args=(args[0], args[1].ix[:,item], container, i), kwargs=kwargs)
+                threads[i].start()
+            for i in range(len(threads)):
+                threads[i].join()
+
+            return pd.concat(container, axis=0)
+
+        return run
+
+
+class calculator(object):
+
+    def __init__(self, df):
+        if df.empty:
+            raise ValueError("Input Dataframe is empty, please try with different one.")
+        else:
+            self.df = df
+
+    # Wrapper for controlling Threads
+    def gs_zscore(self, nthread=5, gene_set=[]):
+        arr1 = self.df
+        container = None
+        i = None
+
+        return self._calculating(arr1, container, i, nthread=nthread, gene_set=gene_set)
+
+    # function structure
+    # args(input, container, thread_index , **kwargs)
+    @funcThread()
+    def _calculating(self, arr1, container, i, nthread=5, gene_set=[]):
+        zscore=[]
+        arr1_index = arr1.index.tolist()
+        inter = list(set(arr1_index).intersection(gene_set))
+
+        diff_mean = arr1.loc[inter].mean(axis=0).subtract(arr1.mean(axis=0))
+        len_norm = arr1.std(ddof=1, axis=0).apply(lambda x: np.sqrt(len(inter))/x)
+        zscore = diff_mean*len_norm
+        zscore = zscore.to_frame()
+        zscore.columns = ['Zscore']
+        container[i] = zscore
+        ##No Return
@@ -1,13 +1,17 @@
-from .statistics import STAT
-from scipy import stats
-import numpy as np
+__author__ = "Junhee Yoon"
+__version__ = "1.0.0"
+__maintainer__ = "Junhee Yoon"
+__email__ = "[email protected]"
+
 """
 Manual: https://github.com/swiri021/NWPV2
 Reference: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3135688/
 Description: Method of combined p-values for getting DEG in dataset
 """
 
-
+from .statistics import STAT
+from scipy import stats
+import numpy as np
 
 class nwpv_calculation(object):
 	def _preprocessing(self, min_adj=1e-16, max_adj=0.9999999999999999):
 
@@ -0,0 +1,40 @@
+from notebook_lib.nwpv.nwpv import nwpv_calculation
+from notebook_lib.gene_zscore.threaded_gzscore import calculator as gzscore_class
+
+class AdvancedCalculators(object):
+    def nwpv_calculator(self, test : list, contol : list, data, save : bool = True):
+        """
+        NWPV calculator
+        Input
+        test : test sample list
+        control : control sample list
+        data : actual input data
+        save : saving output or not ?
+        
+        """
+        #NWPV calculation
+        nwpv_class = nwpv_calculation(data, test, contol)
+        result = nwpv_class.get_result()
+        
+        if save==True:
+            result.to_csv("resultFiles/nwpv_result.csv")
+        
+        return result
+
+    def activation_score(self, data, gene_set : list):
+        
+        """
+        gene zscore calculator
+        Input
+        data : actual input data
+        gene set : gene set input for calculating activate score
+        save : saving output or not ?
+        
+        """
+
+        #### Init Class and check input file
+        zscore_calculator = gzscore_class(data)
+
+        #### Input list should be EntrezIDs(Pathways)
+        result = zscore_calculator.gs_zscore(nthread=4, gene_set=gene_set)
+        return result
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`		`-feather-format==0.4.1`
	`1`	`+feather-format==0.4.1`
	`2`	`+scikit-learn==0.24.2`