Skip to content

Commit 7d739df

Browse files
committed
testing new species addition to methods
1 parent 04f9ba6 commit 7d739df

File tree

3 files changed

+35
-30
lines changed

3 files changed

+35
-30
lines changed

classes/hypergeometric_distribution_class_V3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def predict(
8181
pos_n = len(positive_pro_pro_neighbor) #Number of protein neighbors the protein of interest has
8282
K = len(positive_go_neighbor) - 1 #Number of protein neighbors the GO term of interest has, same for pos & neg, does not include the protein of interest
8383
pos_k = positive_go_annotated_pro_pro_neighbor_count #The overlap between the GO term and the protein of interst's neighbor proteins
84-
84+
print("pos_N: ", pos_N, "pos_n: ", pos_n, "K: ", K, "pos_k: ", pos_k)
8585
#The hypergeometric function using variables above, math.comb(n,k) is an n choose k function
8686
positive_score = 1 - ((math.comb(K,pos_k)*math.comb(pos_N-K,pos_n-pos_k))/math.comb(pos_N,pos_n))
8787

main.py

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -38,47 +38,52 @@ def main():
3838
if not os.path.exists("output/images"):
3939
os.makedirs("output/images")
4040

41-
interactome_path = Path("./network/interactome-flybase-collapsed-weighted.txt")
42-
go_association_path = Path("./network/fly_proGo.csv")
41+
fly_interactome_path = Path("./network/fly_propro.csv")
42+
fly_go_association_path = Path("./network/fly_proGo.csv")
43+
zfish_interactome_path = Path("./network/zfish_propro.csv")
44+
zfish_go_association_path = Path("./network/zfish_proGo.csv")
45+
bsub_interactome_path = Path("./network/bsub_propro.csv")
46+
bsub_go_association_path = Path("./network/bsub_proGo.csv")
47+
4348
output_data_path = Path("./output/data/")
4449
output_image_path = Path("./output/images/")
4550
dataset_directory_path = Path("./output/dataset")
4651
graph_file_path = Path(dataset_directory_path, "graph.pickle")
47-
sample_size = 1000
52+
sample_size = 10000
4853

4954
testing_output_data_path = Path("./output/data/")
5055
testing_output_image_path = Path("./output/images/")
5156
testing_input_directory_path = Path("./tests/testing-dataset/")
5257
testing_graph_file_path = Path(testing_input_directory_path, "graph.pickle")
5358

54-
interactome_columns = [0, 1, 4, 5]
55-
interactome = read_specific_columns(interactome_path, interactome_columns, "\t")
59+
interactome_columns = [0, 1]
60+
interactome = read_specific_columns(zfish_interactome_path, interactome_columns, ",")
5661

5762
go_inferred_columns = [0, 2]
5863
go_protein_pairs = read_specific_columns(
59-
go_association_path, go_inferred_columns, ","
64+
zfish_go_association_path, go_inferred_columns, ","
6065
)
6166

6267
protein_list = []
6368

6469
# if there is no graph.pickle file in the output/dataset directory, uncomment the following lines
65-
# G, protein_list = create_ppi_network(interactome, go_protein_pairs)
66-
# export_graph_to_pickle(G, graph_file_path)
70+
G, protein_list = create_ppi_network(interactome, go_protein_pairs)
71+
export_graph_to_pickle(G, graph_file_path)
6772

6873
# if there is no sample dataset, uncomment the following lines. otherwise, the dataset in outputs will be used
69-
# positive_dataset, negative_dataset = sample_data(
70-
# go_protein_pairs, sample_size, protein_list, G, dataset_directory_path
71-
# )
74+
positive_dataset, negative_dataset = sample_data(
75+
go_protein_pairs, sample_size, protein_list, G, dataset_directory_path
76+
)
7277

7378
# Define algorithm classes and their names
7479
algorithm_classes = {
75-
"OverlappingNeighbors": OverlappingNeighbors,
76-
"OverlappingNeighborsV2": OverlappingNeighborsV2,
77-
"OverlappingNeighborsV3": OverlappingNeighborsV3,
78-
"ProteinDegree": ProteinDegree,
79-
"ProteinDegreeV2": ProteinDegreeV2,
80-
"ProteinDegreeV3": ProteinDegreeV3,
81-
"SampleAlgorithm": SampleAlgorithm,
80+
# "OverlappingNeighbors": OverlappingNeighbors,
81+
# "OverlappingNeighborsV2": OverlappingNeighborsV2,
82+
# "OverlappingNeighborsV3": OverlappingNeighborsV3,
83+
# "ProteinDegree": ProteinDegree,
84+
# "ProteinDegreeV2": ProteinDegreeV2,
85+
# "ProteinDegreeV3": ProteinDegreeV3,
86+
# "SampleAlgorithm": SampleAlgorithm,
8287
"HypergeometricDistribution": HypergeometricDistribution,
8388
"HypergeometricDistributionV2": HypergeometricDistributionV2,
8489
"HypergeometricDistributionV3": HypergeometricDistributionV3,
@@ -87,10 +92,10 @@ def main():
8792

8893
results = run_workflow(
8994
algorithm_classes,
90-
testing_input_directory_path,
91-
testing_graph_file_path,
92-
testing_output_data_path,
93-
testing_output_image_path,
95+
dataset_directory_path,
96+
graph_file_path,
97+
output_data_path,
98+
output_image_path,
9499
True,
95100
True,
96101
)

tools/helper.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,17 @@ def create_ppi_network(fly_interactome, fly_GO_term):
3939

4040
# go through fly interactome, add a new node if it doesnt exists already, then add their physical interactions as edges
4141
for line in fly_interactome:
42-
if not G.has_node(line[2]):
43-
G.add_node(line[2], name=line[0], type="protein")
44-
protein_list.append({"id": line[2], "name": line[0]})
42+
if not G.has_node(line[0]):
43+
G.add_node(line[0], name=line[0], type="protein")
44+
protein_list.append({"id": line[0], "name": line[0]})
4545
protein_node += 1
4646

47-
if not G.has_node(line[3]):
48-
G.add_node(line[3], name=line[1], type="protein")
49-
protein_list.append({"id": line[3], "name": line[1]})
47+
if not G.has_node(line[1]):
48+
G.add_node(line[1], name=line[1], type="protein")
49+
protein_list.append({"id": line[1], "name": line[1]})
5050
protein_node += 1
5151

52-
G.add_edge(line[2], line[3], type="protein_protein")
52+
G.add_edge(line[0], line[1], type="protein_protein")
5353
protein_protein_edge += 1
5454
print_progress(i, total_progress)
5555
i += 1

0 commit comments

Comments
 (0)