Skip to content

Commit 3049dc4

Browse files
author
Maria Littmann
committed
Embedding files switched to h5
1 parent a109091 commit 3049dc4

6 files changed

+22
-66
lines changed

config.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
go: data/go_cafa3.obo
2-
lookup_set: data/seqvec_goa_2017.npz # please download from ftp://rostlab.org/goPredSim
2+
lookup_set: data/seqvec_goa_2017.h5 # please download from ftp://rostlab.org/goPredSim
33
annotations: data/goa_annotations_2017.txt
4-
targets: data/seqvec_cafa3_targets.npz
4+
targets: data/seqvec_cafa3_targets.h5
55
onto: all
66
thresh: 1
77
modus: num

embedding_lookup.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from sklearn.metrics import pairwise_distances, pairwise
2-
import torch
32
import numpy
43
import sys
54

file_utils.py

+16
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from collections import defaultdict
2+
import h5py
23

34

45
def read_config_file(file_in):
@@ -35,6 +36,21 @@ def read_go_annotations(file_in):
3536
return go_annotations
3637

3738

39+
def read_embeddings(embeddings_in):
40+
"""
41+
Read embeddings from h5 file generated by bio_embeddings pipeline
42+
:param embeddings_in:
43+
:return:
44+
"""
45+
embeddings = dict()
46+
with h5py.File(embeddings_in, 'r') as f:
47+
for key, embedding in f.items():
48+
original_id = embedding.attrs['original_id']
49+
embeddings[original_id] = np.array(embedding)
50+
51+
return embeddings
52+
53+
3854
def write_predictions_cafa(predictions, out_file, model_num):
3955
"""
4056
Write prediictions in CAFA format

npy2npz.py

-59
This file was deleted.

predict_go_embedding_inference.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import sys
22
import file_utils as fu
3-
import npy2npz as n2n
43
from gene_ontology import GeneOntology
54
from function_prediction import FunctionPrediction
65
from pathlib import Path
@@ -13,8 +12,9 @@ def main():
1312
print(config_data)
1413

1514
# read in embeddings, annotations, and GO
16-
test_embeddings = n2n.get_dataset(Path(config_data['targets']), False)
17-
embeddings = n2n.get_dataset(Path(config_data['lookup_set']), False)
15+
test_embeddings = fu.read_embeddings(config_data['targets'])
16+
embeddings = fu.read_embeddings(config_data['lookup_set'])
17+
1818
go = GeneOntology(config_data['go'])
1919
go_annotations = fu.read_go_annotations(config_data['annotations'])
2020

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
numpy
2-
torch
2+
h5py
33
pathlib
44
sklearn

0 commit comments

Comments
 (0)