Skip to content

Commit b6c3328

Browse files
authored
Merge branch 'dev' into dev
2 parents f3f4516 + e019633 commit b6c3328

File tree

12 files changed

+343
-64
lines changed

12 files changed

+343
-64
lines changed

modelseedpy/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
c_handler.setFormatter(c_format)
2828
logger.addHandler(c_handler)
2929
if config.get("logging","log_file") == "yes":
30-
f_handler = logging.FileHandler(config.get("logging","filename"),mode="w")
30+
f_handler = logging.FileHandler(config.get("logging","filename"), mode="a")
3131
f_handler.setLevel(logging_hash[config.get("logging","file_level")])
3232
f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
3333
f_handler.setFormatter(f_format)

modelseedpy/core/msatpcorrection.py

Lines changed: 95 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717

1818
class MSATPCorrection:
1919

20-
def __init__(self, model, core_template, atp_medias,compartment="c0",
20+
DEBUG = False
21+
22+
def __init__(self, model, core_template, atp_medias, compartment="c0",
2123
max_gapfilling=None, gapfilling_delta=0, atp_hydrolysis_id=None):
2224
"""
2325
@@ -60,56 +62,89 @@ def __init__(self, model, core_template, atp_medias,compartment="c0",
6062
self.lp_filename = None
6163
self.multiplier = 1.2
6264

65+
@staticmethod
66+
def find_reaction_in_template(model_reaction, template, compartment):
67+
template_reaction = None # we save lookup result here
68+
if model_reaction.id in template.reactions:
69+
template_reaction = template.reactions.get_by_id(model_reaction.id)
70+
else:
71+
msid = FBAHelper.modelseed_id_from_cobra_reaction(model_reaction)
72+
if msid is not None:
73+
msid += "_" + compartment
74+
if msid in template.reactions:
75+
template_reaction = template.reactions.get_by_id(model_reaction.id[0:-1])
76+
else:
77+
# will leave this here for now
78+
def split_id_from_index(s):
79+
"""
80+
Extracts the last digits of a string example: rxn12345, returns rxn 12345
81+
82+
@param s: any string
83+
@return: string split into head (remaining) + tail (digits)
84+
"""
85+
str_pos = len(s) - 1
86+
while str_pos >= 0:
87+
if not s[str_pos].isdigit():
88+
break
89+
str_pos -= 1
90+
91+
return s[:str_pos + 1], s[str_pos + 1:]
92+
93+
rxn_id, index = split_id_from_index(model_reaction.id)
94+
if rxn_id in template.reactions:
95+
template_reaction = template.reactions.get_by_id(rxn_id)
96+
97+
return template_reaction
98+
6399
def disable_noncore_reactions(self):
64100
"""
65-
Disables all noncore reactions in the model
101+
Disables all non core reactions in the model
66102
:return:
67103
"""
68-
#Must restore reactions before disabling to ensure bounds are not overwritten
104+
# Must restore reactions before disabling to ensure bounds are not overwritten
69105
if len(self.noncore_reactions) > 0:
70-
self.restore_noncore_reactions(noncore = True,othercompartment = True)
71-
#Now clearing the existing noncore datastructures
106+
self.restore_noncore_reactions(noncore=True, othercompartment=True)
107+
# Now clearing the existing noncore data structures
72108
self.original_bounds = {}
73109
self.noncore_reactions = []
74110
self.other_compartments = []
75-
#Iterating through reactions and disabling
111+
# Iterating through reactions and disabling
76112
for reaction in self.model.reactions:
77113
if reaction.id == self.atp_hydrolysis.id:
78114
continue
79115
if FBAHelper.is_ex(reaction):
80116
continue
81117
if FBAHelper.is_biomass(reaction):
82118
continue
83-
msid = FBAHelper.modelseed_id_from_cobra_reaction(reaction)
84-
if msid != None:
85-
msid += "_"+self.compartment[0:1]
86-
if FBAHelper.rxn_compartment(reaction) != self.compartment:
87-
logger.debug(reaction.id+" noncore")
88-
self.original_bounds[reaction.id] = (reaction.lower_bound, reaction.upper_bound)
89-
if reaction.lower_bound < 0:
90-
self.other_compartments.append([reaction, "<"])
91-
if reaction.upper_bound > 0:
92-
self.other_compartments.append([reaction, ">"])
93-
reaction.lower_bound = 0
94-
reaction.upper_bound = 0
95-
elif msid in self.coretemplate.reactions:
96-
self.original_bounds[reaction.id] = (reaction.lower_bound, reaction.upper_bound)
97-
logger.debug(reaction.id+" core")
98-
if reaction.lower_bound < 0 and self.coretemplate.reactions.get_by_id(reaction.id[0:-1]).lower_bound >= 0:
99-
logger.debug(reaction.id+" core but reversible")
119+
120+
self.original_bounds[reaction.id] = (reaction.lower_bound, reaction.upper_bound)
121+
122+
# check if reaction is in core template
123+
template_reaction = self.find_reaction_in_template(reaction, self.coretemplate, self.compartment[0:1])
124+
125+
# update bounds to reaction
126+
if template_reaction is not None:
127+
logger.debug(f"{reaction.id} core")
128+
if reaction.lower_bound < 0 and template_reaction.lower_bound >= 0:
129+
logger.debug(reaction.id + " core but reversible")
100130
self.noncore_reactions.append([reaction, "<"])
101131
reaction.lower_bound = 0
102-
if reaction.upper_bound > 0 and self.coretemplate.reactions.get_by_id(reaction.id[0:-1]).upper_bound <= 0:
103-
logger.debug(reaction.id+" core but reversible")
132+
if reaction.upper_bound > 0 and template_reaction.upper_bound <= 0:
133+
logger.debug(reaction.id + " core but reversible")
104134
self.noncore_reactions.append([reaction, ">"])
105135
reaction.upper_bound = 0
106136
else:
107-
logger.debug(reaction.id+" noncore")
108-
self.original_bounds[reaction.id] = (reaction.lower_bound, reaction.upper_bound)
109-
if reaction.lower_bound < 0:
110-
self.noncore_reactions.append([reaction, "<"])
111-
if reaction.upper_bound > 0:
112-
self.noncore_reactions.append([reaction, ">"])
137+
logger.debug(f"{reaction.id} non core")
138+
if FBAHelper.rxn_compartment(reaction) != self.compartment:
139+
if reaction.lower_bound < 0:
140+
self.other_compartments.append([reaction, "<"])
141+
if reaction.upper_bound > 0:
142+
self.other_compartments.append([reaction, ">"])
143+
else:
144+
if reaction.lower_bound < 0:
145+
self.noncore_reactions.append([reaction, "<"])
146+
if reaction.upper_bound > 0:
147+
self.noncore_reactions.append([reaction, ">"])
113148
reaction.lower_bound = 0
114149
reaction.upper_bound = 0
115150

@@ -129,13 +164,14 @@ def evaluate_growth_media(self):
129164
self.model.objective = self.atp_hydrolysis.id
130165
#self.model.objective = self.model.problem.Objective(Zero,direction="max")
131166
#self.atp_hydrolysis.update_variable_bounds()
132-
logger.debug("ATP bounds:"+str(self.atp_hydrolysis.lower_bound)+":"+str(self.atp_hydrolysis.upper_bound))
167+
logger.debug(f'ATP bounds: ({self.atp_hydrolysis.lower_bound}, {self.atp_hydrolysis.upper_bound})')
133168
#self.model.objective.set_linear_coefficients({self.atp_hydrolysis.forward_variable:1})
134169
pkgmgr = MSPackageManager.get_pkg_mgr(self.model)
135170
for media_tuple in self.atp_medias:
136171
media = media_tuple[0]
137172
logger.debug('evaluate media %s', media)
138173
pkgmgr.getpkg("KBaseMediaPkg").build_package(media)
174+
logger.debug('model.medium %s', self.model.medium)
139175
solution = self.model.optimize()
140176
logger.debug('evaluate media %s - %f (%s)', media.id, solution.objective_value, solution.status)
141177
self.media_gapfill_stats[media] = None
@@ -146,6 +182,11 @@ def evaluate_growth_media(self):
146182
elif solution.objective_value >= media_tuple[1]:
147183
self.media_gapfill_stats[media] = {'reversed': {}, 'new': {}}
148184
logger.debug('gapfilling stats:',json.dumps(self.media_gapfill_stats[media],indent=2))
185+
186+
if MSATPCorrection.DEBUG:
187+
with open('debug.json', 'w') as outfile:
188+
json.dump(self.media_gapfill_stats[media], outfile)
189+
149190
return output
150191

151192
def determine_growth_media(self):
@@ -163,10 +204,15 @@ def determine_growth_media(self):
163204
best_score = gfscore
164205
if self.max_gapfilling is None:
165206
self.max_gapfilling = best_score
207+
208+
logger.debug(f'max_gapfilling: {self.max_gapfilling}, best_score: {best_score}')
209+
166210
for media in self.media_gapfill_stats:
167211
gfscore = 0
168212
if self.media_gapfill_stats[media]:
169213
gfscore = len(self.media_gapfill_stats[media]["new"].keys()) + 0.5*len(self.media_gapfill_stats[media]["reversed"].keys())
214+
215+
logger.debug(f'media gapfilling score: {media.id}: {gfscore}')
170216
if gfscore <= self.max_gapfilling and gfscore <= (best_score+self.gapfilling_delta):
171217
self.selected_media.append(media)
172218

@@ -211,9 +257,10 @@ def expand_model_to_genome_scale(self):
211257
"""
212258
self.filtered_noncore = []
213259
tests = self.build_tests()
214-
#Must restore noncore reactions and NOT other compartment reactions before running this function - it is not detrimental to run this twice
215-
self.restore_noncore_reactions(noncore = True,othercompartment = False)
216-
# Extending model with noncore reactions while retaining ATP accuracy
260+
# Must restore non core reactions and NOT other compartment reactions before running this function
261+
# it is not detrimental to run this twice
262+
self.restore_noncore_reactions(noncore=True, othercompartment=False)
263+
# Extending model with non core reactions while retaining ATP accuracy
217264
self.filtered_noncore = self.modelutl.reaction_expansion_test(self.noncore_reactions,tests)
218265
# Removing filtered reactions
219266
for item in self.filtered_noncore:
@@ -225,8 +272,8 @@ def expand_model_to_genome_scale(self):
225272
# reaction.update_variable_bounds()
226273
if item[0].lower_bound == 0 and item[0].upper_bound == 0:
227274
self.model.remove_reactions([item[0]])
228-
#Restoring other compartment reactions but not the core because this would undo reaction filtering
229-
self.restore_noncore_reactions(noncore = False,othercompartment = True)
275+
# Restoring other compartment reactions but not the core because this would undo reaction filtering
276+
self.restore_noncore_reactions(noncore=False, othercompartment=True)
230277

231278
def restore_noncore_reactions(self,noncore = True,othercompartment = True):
232279
"""
@@ -247,8 +294,7 @@ def restore_noncore_reactions(self,noncore = True,othercompartment = True):
247294
reaction.lower_bound = self.original_bounds[reaction.id][0]
248295
reaction.upper_bound = self.original_bounds[reaction.id][1]
249296

250-
251-
def build_tests(self,multiplier=None):
297+
def build_tests(self, multiplier=None):
252298
"""Build tests based on ATP media evaluations
253299
254300
Parameters
@@ -264,23 +310,28 @@ def build_tests(self,multiplier=None):
264310
Raises
265311
------
266312
"""
267-
if multiplier == None:
313+
if multiplier is None:
268314
multiplier = self.multiplier
269315
tests = []
270316
self.model.objective = self.atp_hydrolysis.id
271317
for media in self.selected_media:
272318
self.modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(media)
273-
obj_value = model.slim_optimize()
274-
logger.debug(media.name," = ",obj_value)
275-
tests.append({"media":media,"is_max_threshold": True,"threshold":multiplier*obj_value,"objective":self.atp_hydrolysis.id})
319+
obj_value = self.model.slim_optimize()
320+
logger.debug(f'{media.name} = {obj_value}')
321+
tests.append({
322+
"media": media,
323+
"is_max_threshold": True,
324+
"threshold": multiplier*obj_value,
325+
"objective": self.atp_hydrolysis.id
326+
})
276327
return tests
277328

278329
def run_atp_correction(self):
279330
"""
280331
Runs the entire ATP method
281332
:return:
282333
"""
283-
#Ensure all specified media work
334+
# Ensure all specified media work
284335
self.evaluate_growth_media()
285336
self.determine_growth_media()
286337
self.apply_growth_media_gapfilling()

modelseedpy/core/msgenome.py

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
import logging
2-
logger = logging.getLogger(__name__)
3-
42
import re
53
import copy # !!! the import is never used
64
from cobra.core.dictlist import DictList
75

6+
logger = logging.getLogger(__name__)
87

98

109
def normalize_role(s):
@@ -13,15 +12,12 @@ def normalize_role(s):
1312
s = re.sub('[\W_]+', '', s)
1413
return s
1514

16-
#Static factory functions:
17-
18-
#def build_from_kbase_gto:
19-
2015

2116
def read_fasta(f, split='|', h_func=None):
2217
with open(f, 'r') as fh:
2318
return parse_fasta_str(fh.read(), split, h_func)
2419

20+
2521
def parse_fasta_str(faa_str, split='|', h_func=None):
2622
features = []
2723
seq = None
@@ -48,23 +44,54 @@ def parse_fasta_str(faa_str, split='|', h_func=None):
4844

4945

5046
class MSFeature:
47+
5148
def __init__(self, feature_id, sequence, description=None):
52-
self.id = feature_id; self.seq = sequence
49+
"""
50+
51+
@param feature_id: identifier for the protein coding feature
52+
@param sequence: protein sequence
53+
@param description: description of the feature
54+
"""
55+
56+
self.id = feature_id
57+
self.seq = sequence
5358
self.description = description # temporary replace with proper parsing
5459
self.ontology_terms = {}
5560
self.aliases = []
5661

5762
def add_ontology_term(self, ontology_term, value):
63+
"""
64+
Add functional term to the feature
65+
66+
@param ontology_term: type of the ontology (e.g., RAST, EC)
67+
@param value: value for the ontology (e.g., pyruvate kinase)
68+
"""
5869
if ontology_term not in self.ontology_terms:
5970
self.ontology_terms[ontology_term] = []
6071
if value not in self.ontology_terms[ontology_term]:
6172
self.ontology_terms[ontology_term].append(value)
6273

6374

6475
class MSGenome:
76+
6577
def __init__(self):
6678
self.features = DictList()
6779

80+
def add_features(self, feature_list: list):
81+
"""
82+
83+
:param feature_list:
84+
:return:
85+
"""
86+
duplicates = list(filter(lambda o: o.id in self.features, feature_list))
87+
if len(duplicates) > 0:
88+
raise ValueError(f"unable to add features {duplicates} already present in the genome")
89+
90+
for f in feature_list:
91+
f._genome = self
92+
93+
self.features += feature_list
94+
6895
@staticmethod
6996
def from_fasta(filename, contigs=0, split='|', h_func=None): # !!! the contigs argument is never used
7097
genome = MSGenome()
@@ -83,10 +110,10 @@ def from_protein_sequences_hash(sequences):
83110
return genome
84111

85112
def alias_hash(self):
86-
return {alias:gene for gene in self.features for alias in gene.aliases}
113+
return {alias: gene for gene in self.features for alias in gene.aliases}
87114

88-
def search_for_gene(self,query):
115+
def search_for_gene(self, query):
89116
if query in self.features:
90117
return self.features.get_by_id(query)
91118
aliases = self.alias_hash()
92-
return aliases[query] if query in aliases else None
119+
return aliases[query] if query in aliases else None

modelseedpy/core/msgenomeclassifier.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from modelseedpy.ml.predict_phenotype import create_indicator_matrix
2+
from modelseedpy.core.msgenome import MSGenome
23

34

45
class MSGenomeClassifier:
@@ -22,7 +23,10 @@ def extract_features_from_genome(genome, ontology_term):
2223
return {'genome': list(features)}
2324

2425
def classify(self, genome_or_roles, ontology_term='RAST'):
25-
if isinstance(genome_or_roles,"MSGenome"):
26+
"""
27+
param genome_or_roles:
28+
"""
29+
if isinstance(genome_or_roles, MSGenome):
2630
genome_or_roles = self.extract_features_from_genome(genome_or_roles, ontology_term)
2731
indicator_df, master_role_list = create_indicator_matrix(genome_or_roles, self.features)
2832
predictions_numerical = self.model.predict(indicator_df[master_role_list].values)

modelseedpy/core/msmedia.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
logger = logging.getLogger(__name__)
55

6+
67
class MediaCompound:
78

89
def __init__(self, compound_id, lower_bound, upper_bound, concentration=None):
@@ -23,16 +24,18 @@ def minFlux(self):
2324

2425

2526
class MSMedia:
26-
def __init__(self, media_id):
27+
28+
def __init__(self, media_id, name=""):
2729
self.id = media_id
30+
self.name = name
2831
self.mediacompounds = DictList()
2932

3033
@staticmethod
3134
def from_dict(media_dict):
3235
"""
3336
Either dict with exchange bounds (example: {'cpd00027': (-10, 1000)}) or
3437
just absolute value of uptake (example: {''cpd00027': 10})
35-
:param d:
38+
:param media_dict:
3639
:return:
3740
"""
3841
media = MSMedia('media')

0 commit comments

Comments
 (0)