1+ __author__ = "Junhee Yoon"
2+ __version__ = "1.0.0"
3+ __maintainer__ = "Junhee Yoon"
4+ __email__ = "swiri021@gmail.com"
5+
6+ """
7+ Manual: https://github.com/swiri021/Threaded_gsZscore
8+ Reference: https://genomebiology.biomedcentral.com/articles/10.1186/gb-2006-7-10-r93
9+ Description: calculating activation score by using threaded z score
10+ """
11+ import pandas as pd
12+ import numpy as np
13+ import threading
14+ import functools
15+ import itertools
16+
17+ class funcThread (object ):
18+ def __init__ (self ):
19+ print ("Loaded Threads" )
20+
21+ def __call__ (self , func ):
22+ @functools .wraps (func )
23+ def run (* args , ** kwargs ):
24+ print ("Number of Threads : %d" % (kwargs ['nthread' ]))
25+
26+ threads = [None ]* kwargs ['nthread' ]
27+ container = [None ]* kwargs ['nthread' ]
28+
29+ ####Divide Samples by number of threads
30+ i_col = len (args [1 ].columns .tolist ())
31+ contents_numb = i_col / kwargs ['nthread' ]
32+ split_columns = [args [1 ].columns .tolist ()[i :i + contents_numb ] for i in range (0 , len (args [1 ].columns .tolist ()), contents_numb )]
33+ if len (split_columns )> kwargs ['nthread' ]:
34+ split_columns = split_columns [:kwargs ['nthread' ]- 1 ] + [list (itertools .chain (* split_columns [kwargs ['nthread' ]- 1 :]))]
35+ #split_columns[len(split_columns)-2] = split_columns[len(split_columns)-2]+split_columns[len(split_columns)-1]
36+ #split_columns = split_columns[:len(split_columns)-1]
37+
38+ ####Running threads
39+ for i , item in enumerate (split_columns ):
40+ threads [i ] = threading .Thread (target = func , args = (args [0 ], args [1 ].ix [:,item ], container , i ), kwargs = kwargs )
41+ threads [i ].start ()
42+ for i in range (len (threads )):
43+ threads [i ].join ()
44+
45+ return pd .concat (container , axis = 0 )
46+
47+ return run
48+
49+
50+ class calculator (object ):
51+
52+ def __init__ (self , df ):
53+ if df .empty :
54+ raise ValueError ("Input Dataframe is empty, please try with different one." )
55+ else :
56+ self .df = df
57+
58+ # Wrapper for controlling Threads
59+ def gs_zscore (self , nthread = 5 , gene_set = []):
60+ arr1 = self .df
61+ container = None
62+ i = None
63+
64+ return self ._calculating (arr1 , container , i , nthread = nthread , gene_set = gene_set )
65+
66+ # function structure
67+ # args(input, container, thread_index , **kwargs)
68+ @funcThread ()
69+ def _calculating (self , arr1 , container , i , nthread = 5 , gene_set = []):
70+ zscore = []
71+ arr1_index = arr1 .index .tolist ()
72+ inter = list (set (arr1_index ).intersection (gene_set ))
73+
74+ diff_mean = arr1 .loc [inter ].mean (axis = 0 ).subtract (arr1 .mean (axis = 0 ))
75+ len_norm = arr1 .std (ddof = 1 , axis = 0 ).apply (lambda x : np .sqrt (len (inter ))/ x )
76+ zscore = diff_mean * len_norm
77+ zscore = zscore .to_frame ()
78+ zscore .columns = ['Zscore' ]
79+ container [i ] = zscore
80+ ##No Return
0 commit comments