-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlowmutatedness.py
66 lines (39 loc) · 2.13 KB
/
lowmutatedness.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
__author__ = 'jlu96'
import random
from scipy import stats
class CooccurPatientDistribution:
def __init__(self, patients, patientToGenes, dist_num=1000):
self.patients = patients
self.patientToGenes = patientToGenes
self.mut_freqs = [len(patientToGenes[patient]) for patient in patients]
# Dictionary with low_mutated_score distributions
self.low_mutated_score_dist_dict = {}
self.dist_num = dist_num
def calc_low_mutated_score_dist(self, numPatients):
low_mutated_score_dist = []
for i in range(self.dist_num):
set_patients = random.sample(self.patients, numPatients)
score = low_mutated_score(set_patients, self.patientToGenes)
low_mutated_score_dist.append(score)
self.low_mutated_score_dist_dict[numPatients] = low_mutated_score_dist
def get_percentile(self, low_mutated_score, numPatients):
if numPatients not in self.low_mutated_score_dist_dict:
self.calc_low_mutated_score_dist(numPatients)
return stats.percentileofscore(self.low_mutated_score_dist_dict[numPatients], low_mutated_score)
def low_mutated_score(patients, patientToGenes):
mut_freqs = [len(patientToGenes[patient]) for patient in patients]
return sum([1.0/mut_freq for mut_freq in mut_freqs])
def add_low_mutated_scores(cpairsdict, geneToCases, patientToGenes, dist_num=1000):
"""
:return: The co-occurring pairs with low mutated scores and percentiles added.
"""
cooccur_patients = set()
for pair in cpairsdict:
gene0, gene1 = cpairsdict[pair]['Gene0'], cpairsdict[pair]['Gene1']
shared_patients = geneToCases[gene0].intersection(geneToCases[gene1])
cpairsdict[pair]['LowMutatedScore'] = low_mutated_score(shared_patients, patientToGenes)
cooccur_patients = cooccur_patients.union(shared_patients)
CPD = CooccurPatientDistribution(cooccur_patients, patientToGenes, dist_num=dist_num)
for pair in cpairsdict:
cpairsdict[pair]['LowMutatedScorePercentile'] = CPD.get_percentile(cpairsdict[pair]['LowMutatedScore'], cpairsdict[pair]['Overlap'])
return cpairsdict