Skip to content

Commit 44ee511

Browse files
committed
testing
1 parent 7d739df commit 44ee511

File tree

5 files changed

+97
-19
lines changed

5 files changed

+97
-19
lines changed

classes/hypergeometric_distribution_class_V3.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ def predict(
8181
pos_n = len(positive_pro_pro_neighbor) #Number of protein neighbors the protein of interest has
8282
K = len(positive_go_neighbor) - 1 #Number of protein neighbors the GO term of interest has, same for pos & neg, does not include the protein of interest
8383
pos_k = positive_go_annotated_pro_pro_neighbor_count #The overlap between the GO term and the protein of interst's neighbor proteins
84-
print("pos_N: ", pos_N, "pos_n: ", pos_n, "K: ", K, "pos_k: ", pos_k)
8584
#The hypergeometric function using variables above, math.comb(n,k) is an n choose k function
8685
positive_score = 1 - ((math.comb(K,pos_k)*math.comb(pos_N-K,pos_n-pos_k))/math.comb(pos_N,pos_n))
8786

interactome_testing.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from pathlib import Path
2+
3+
from tools.helper import create_ppi_network, read_specific_columns
4+
5+
6+
def main():
7+
print("interactome testing")
8+
9+
fly_interactome_path = Path("./network/fly_propro.csv")
10+
fly_go_association_path = Path("./network/fly_proGo.csv")
11+
zfish_interactome_path = Path("./network/zfish_propro.csv")
12+
zfish_go_association_path = Path("./network/zfish_proGo.csv")
13+
bsub_interactome_path = Path("./network/bsub_propro.csv")
14+
bsub_go_association_path = Path("./network/bsub_proGo.csv")
15+
16+
interactome_columns = [0, 1]
17+
interactome = read_specific_columns(bsub_interactome_path, interactome_columns, ",")
18+
19+
go_inferred_columns = [0, 2]
20+
go_protein_pairs = read_specific_columns(
21+
bsub_go_association_path, go_inferred_columns, ","
22+
)
23+
24+
# for pair in go_protein_pairs:
25+
# print(pair)
26+
27+
protein_list = []
28+
29+
# if there is no graph.pickle file in the output/dataset directory, uncomment the following lines
30+
G, protein_list = create_ppi_network(interactome, go_protein_pairs)
31+
32+
self_edge_count = 0
33+
for protein in protein_list:
34+
print(protein["id"])
35+
if G.has_edge(protein["id"], protein["id"]):
36+
self_edge_count+=1
37+
print("self edge")
38+
39+
print(self_edge_count)
40+
41+
# fly has no pro pro self edge
42+
# zfish has 31 pro pro self edges
43+
# bsub has 278 pro pro self edges
44+
45+
if __name__ == "__main__":
46+
main()

main.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
read_specific_columns,
2424
print_progress,
2525
export_graph_to_pickle,
26+
read_pro_go_data,
2627
)
2728
from tools.workflow import run_workflow, sample_data
2829

@@ -55,13 +56,17 @@ def main():
5556
testing_output_image_path = Path("./output/images/")
5657
testing_input_directory_path = Path("./tests/testing-dataset/")
5758
testing_graph_file_path = Path(testing_input_directory_path, "graph.pickle")
58-
59+
60+
namespace = ["molecular_function", "biological_process", "cellular_component"]
61+
# change the go_term_type variable to include which go term namespace you want
62+
go_term_type = [namespace[2]]
63+
5964
interactome_columns = [0, 1]
60-
interactome = read_specific_columns(zfish_interactome_path, interactome_columns, ",")
65+
interactome = read_specific_columns(fly_interactome_path, interactome_columns, ",")
6166

62-
go_inferred_columns = [0, 2]
63-
go_protein_pairs = read_specific_columns(
64-
zfish_go_association_path, go_inferred_columns, ","
67+
go_inferred_columns = [0, 2, 3]
68+
go_protein_pairs = read_pro_go_data(
69+
fly_go_association_path, go_inferred_columns, go_term_type, ","
6570
)
6671

6772
protein_list = []
@@ -77,13 +82,13 @@ def main():
7782

7883
# Define algorithm classes and their names
7984
algorithm_classes = {
80-
# "OverlappingNeighbors": OverlappingNeighbors,
81-
# "OverlappingNeighborsV2": OverlappingNeighborsV2,
82-
# "OverlappingNeighborsV3": OverlappingNeighborsV3,
83-
# "ProteinDegree": ProteinDegree,
84-
# "ProteinDegreeV2": ProteinDegreeV2,
85-
# "ProteinDegreeV3": ProteinDegreeV3,
86-
# "SampleAlgorithm": SampleAlgorithm,
85+
"OverlappingNeighbors": OverlappingNeighbors,
86+
"OverlappingNeighborsV2": OverlappingNeighborsV2,
87+
"OverlappingNeighborsV3": OverlappingNeighborsV3,
88+
"ProteinDegree": ProteinDegree,
89+
"ProteinDegreeV2": ProteinDegreeV2,
90+
"ProteinDegreeV3": ProteinDegreeV3,
91+
"SampleAlgorithm": SampleAlgorithm,
8792
"HypergeometricDistribution": HypergeometricDistribution,
8893
"HypergeometricDistributionV2": HypergeometricDistributionV2,
8994
"HypergeometricDistributionV3": HypergeometricDistributionV3,

tools/helper.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,27 @@ def read_specific_columns(file_path, columns, delimit):
105105
return None
106106

107107

108+
def read_pro_go_data(file_path, columns, namespace, delimit):
109+
try:
110+
with open(file_path, "r") as file:
111+
next(file)
112+
data = []
113+
for line in file:
114+
parts = line.strip().split(delimit)
115+
selected_columns = []
116+
for col in columns:
117+
selected_columns.append(parts[col].replace('"', ""))
118+
if selected_columns[2] in namespace:
119+
data.append(selected_columns)
120+
return data
121+
except FileNotFoundError:
122+
print(f"Error: File '{file_path}' not found.")
123+
return None
124+
except Exception as e:
125+
print(f"An error occurred: {e}")
126+
return None
127+
128+
108129
def generate_random_colors(num_colors):
109130
colors = []
110131
for _ in range(num_colors):
@@ -145,10 +166,10 @@ def add_print_statements(filename, statements):
145166

146167

147168
def export_graph_to_pickle(graph, filename):
148-
with open(filename, 'wb') as f:
169+
with open(filename, "wb") as f:
149170
pickle.dump(graph, f)
150171

151172

152173
def import_graph_from_pickle(filename):
153-
with open(filename, 'rb') as f:
174+
with open(filename, "rb") as f:
154175
return pickle.load(f)

tools/workflow.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,15 @@ def generate_figures(algorithm_classes, results, output_image_path, output_data_
141141
colors = generate_random_colors(len(algorithm_classes))
142142

143143
sorted_results = sort_results_by(results, "roc_auc", output_data_path)
144-
i = 0
145-
plt.figure()
144+
# Initialize your parameters
145+
fig_width = 10 # width in inches
146+
fig_height = 7 # height in inches
147+
fig_dpi = 100 # dots per inch for the figure
148+
save_dpi = 200 # dots per inch for the saved image
149+
150+
plt.figure(figsize=(fig_width, fig_height), dpi=fig_dpi)
151+
152+
i = 0 # Initialize your index for colors
146153
for algorithm_name, metrics in sorted_results.items():
147154
plt.plot(
148155
metrics["fpr"],
@@ -164,7 +171,7 @@ def generate_figures(algorithm_classes, results, output_image_path, output_data_
164171

165172
sorted_results = sort_results_by(results, "pr_auc", output_data_path)
166173
i = 0
167-
plt.figure()
174+
plt.figure(figsize=(fig_width, fig_height), dpi=fig_dpi)
168175
for algorithm_name, metrics in sorted_results.items():
169176
plt.plot(
170177
metrics["recall"],
@@ -177,7 +184,7 @@ def generate_figures(algorithm_classes, results, output_image_path, output_data_
177184
plt.xlabel("Recall")
178185
plt.ylabel("Precision")
179186
plt.title("Precision-Recall Curve")
180-
plt.legend(loc="lower left")
187+
plt.legend(loc="lower right")
181188
plt.savefig(Path(output_image_path, "multiple_pr_curves.png"))
182189
plt.show()
183190

0 commit comments

Comments
 (0)