ModelSEED
diff --git a/‎modelseedpy/__init__.py
Lines changed: 1 addition & 1 deletion b/‎modelseedpy/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎modelseedpy/core/msatpcorrection.py
Lines changed: 95 additions & 44 deletions b/‎modelseedpy/core/msatpcorrection.py
Lines changed: 95 additions & 44 deletions
diff --git a/‎modelseedpy/core/msgenome.py
Lines changed: 37 additions & 10 deletions b/‎modelseedpy/core/msgenome.py
Lines changed: 37 additions & 10 deletions
diff --git a/‎modelseedpy/core/msgenomeclassifier.py
Lines changed: 5 additions & 1 deletion b/‎modelseedpy/core/msgenomeclassifier.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎modelseedpy/core/msmedia.py
Lines changed: 5 additions & 2 deletions b/‎modelseedpy/core/msmedia.py
Lines changed: 5 additions & 2 deletions
@@ -27,7 +27,7 @@
 c_handler.setFormatter(c_format)
 logger.addHandler(c_handler)
 if config.get("logging","log_file") == "yes":
-    f_handler = logging.FileHandler(config.get("logging","filename"),mode="w")
+    f_handler = logging.FileHandler(config.get("logging","filename"), mode="a")
     f_handler.setLevel(logging_hash[config.get("logging","file_level")])
     f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
     f_handler.setFormatter(f_format)
 
@@ -17,7 +17,9 @@
 
 class MSATPCorrection:
 
-    def __init__(self, model, core_template, atp_medias,compartment="c0",
+    DEBUG = False
+
+    def __init__(self, model, core_template, atp_medias, compartment="c0",
                  max_gapfilling=None, gapfilling_delta=0, atp_hydrolysis_id=None):
         """
 
@@ -60,56 +62,89 @@ def __init__(self, model, core_template, atp_medias,compartment="c0",
         self.lp_filename = None
         self.multiplier = 1.2
 
+    @staticmethod
+    def find_reaction_in_template(model_reaction, template, compartment):
+        template_reaction = None  # we save lookup result here
+        if model_reaction.id in template.reactions:
+            template_reaction = template.reactions.get_by_id(model_reaction.id)
+        else:
+            msid = FBAHelper.modelseed_id_from_cobra_reaction(model_reaction)
+            if msid is not None:
+                msid += "_" + compartment
+            if msid in template.reactions:
+                template_reaction = template.reactions.get_by_id(model_reaction.id[0:-1])
+            else:
+                # will leave this here for now
+                def split_id_from_index(s):
+                    """
+                    Extracts the last digits of a string example: rxn12345, returns rxn 12345
+
+                    @param s: any string
+                    @return: string split into head (remaining) + tail (digits)
+                    """
+                    str_pos = len(s) - 1
+                    while str_pos >= 0:
+                        if not s[str_pos].isdigit():
+                            break
+                        str_pos -= 1
+
+                    return s[:str_pos + 1], s[str_pos + 1:]
+
+                rxn_id, index = split_id_from_index(model_reaction.id)
+                if rxn_id in template.reactions:
+                    template_reaction = template.reactions.get_by_id(rxn_id)
+
+        return template_reaction
+
     def disable_noncore_reactions(self):
         """
-        Disables all noncore reactions in the model
+        Disables all non core reactions in the model
         :return:
         """
-        #Must restore reactions before disabling to ensure bounds are not overwritten
+        # Must restore reactions before disabling to ensure bounds are not overwritten
         if len(self.noncore_reactions) > 0:
-            self.restore_noncore_reactions(noncore = True,othercompartment = True)
-        #Now clearing the existing noncore datastructures
+            self.restore_noncore_reactions(noncore=True, othercompartment=True)
+        # Now clearing the existing noncore data structures
         self.original_bounds = {}
         self.noncore_reactions = []
         self.other_compartments = []
-        #Iterating through reactions and disabling
+        # Iterating through reactions and disabling
         for reaction in self.model.reactions:
             if reaction.id == self.atp_hydrolysis.id:
                 continue
             if FBAHelper.is_ex(reaction):
                 continue
             if FBAHelper.is_biomass(reaction):
                 continue
-            msid = FBAHelper.modelseed_id_from_cobra_reaction(reaction)
-            if msid != None:
-                msid += "_"+self.compartment[0:1]
-            if FBAHelper.rxn_compartment(reaction) != self.compartment:
-                logger.debug(reaction.id+" noncore")
-                self.original_bounds[reaction.id] = (reaction.lower_bound, reaction.upper_bound)
-                if reaction.lower_bound < 0:
-                    self.other_compartments.append([reaction, "<"])
-                if reaction.upper_bound > 0:
-                    self.other_compartments.append([reaction, ">"])
-                reaction.lower_bound = 0
-                reaction.upper_bound = 0
-            elif msid in self.coretemplate.reactions:
-                self.original_bounds[reaction.id] = (reaction.lower_bound, reaction.upper_bound)
-                logger.debug(reaction.id+" core")
-                if reaction.lower_bound < 0 and self.coretemplate.reactions.get_by_id(reaction.id[0:-1]).lower_bound >= 0:
-                    logger.debug(reaction.id+" core but reversible")
+
+            self.original_bounds[reaction.id] = (reaction.lower_bound, reaction.upper_bound)
+
+            # check if reaction is in core template
+            template_reaction = self.find_reaction_in_template(reaction, self.coretemplate, self.compartment[0:1])
+
+            # update bounds to reaction
+            if template_reaction is not None:
+                logger.debug(f"{reaction.id} core")
+                if reaction.lower_bound < 0 and template_reaction.lower_bound >= 0:
+                    logger.debug(reaction.id + " core but reversible")
                     self.noncore_reactions.append([reaction, "<"])
                     reaction.lower_bound = 0
-                if reaction.upper_bound > 0 and self.coretemplate.reactions.get_by_id(reaction.id[0:-1]).upper_bound <= 0:
-                    logger.debug(reaction.id+" core but reversible")
+                if reaction.upper_bound > 0 and template_reaction.upper_bound <= 0:
+                    logger.debug(reaction.id + " core but reversible")
                     self.noncore_reactions.append([reaction, ">"])
                     reaction.upper_bound = 0
             else:
-                logger.debug(reaction.id+" noncore")
-                self.original_bounds[reaction.id] = (reaction.lower_bound, reaction.upper_bound)
-                if reaction.lower_bound < 0:
-                    self.noncore_reactions.append([reaction, "<"])
-                if reaction.upper_bound > 0:
-                    self.noncore_reactions.append([reaction, ">"])
+                logger.debug(f"{reaction.id} non core")
+                if FBAHelper.rxn_compartment(reaction) != self.compartment:
+                    if reaction.lower_bound < 0:
+                        self.other_compartments.append([reaction, "<"])
+                    if reaction.upper_bound > 0:
+                        self.other_compartments.append([reaction, ">"])
+                else:
+                    if reaction.lower_bound < 0:
+                        self.noncore_reactions.append([reaction, "<"])
+                    if reaction.upper_bound > 0:
+                        self.noncore_reactions.append([reaction, ">"])
                 reaction.lower_bound = 0
                 reaction.upper_bound = 0
 
@@ -129,13 +164,14 @@ def evaluate_growth_media(self):
             self.model.objective = self.atp_hydrolysis.id
             #self.model.objective = self.model.problem.Objective(Zero,direction="max")
             #self.atp_hydrolysis.update_variable_bounds()
-            logger.debug("ATP bounds:"+str(self.atp_hydrolysis.lower_bound)+":"+str(self.atp_hydrolysis.upper_bound))
+            logger.debug(f'ATP bounds: ({self.atp_hydrolysis.lower_bound}, {self.atp_hydrolysis.upper_bound})')
             #self.model.objective.set_linear_coefficients({self.atp_hydrolysis.forward_variable:1})
             pkgmgr = MSPackageManager.get_pkg_mgr(self.model)
             for media_tuple in self.atp_medias:
                 media = media_tuple[0]
                 logger.debug('evaluate media %s', media)
                 pkgmgr.getpkg("KBaseMediaPkg").build_package(media)
+                logger.debug('model.medium %s', self.model.medium)
                 solution = self.model.optimize()
                 logger.debug('evaluate media %s - %f (%s)', media.id, solution.objective_value, solution.status)
                 self.media_gapfill_stats[media] = None
@@ -146,6 +182,11 @@ def evaluate_growth_media(self):
                 elif solution.objective_value >= media_tuple[1]:
                     self.media_gapfill_stats[media] = {'reversed': {}, 'new': {}}
                 logger.debug('gapfilling stats:',json.dumps(self.media_gapfill_stats[media],indent=2))
+
+        if MSATPCorrection.DEBUG:
+            with open('debug.json', 'w') as outfile:
+                json.dump(self.media_gapfill_stats[media], outfile)
+
         return output
 
     def determine_growth_media(self):
@@ -163,10 +204,15 @@ def determine_growth_media(self):
                 best_score = gfscore
         if self.max_gapfilling is None:
             self.max_gapfilling = best_score
+
+        logger.debug(f'max_gapfilling: {self.max_gapfilling}, best_score: {best_score}')
+
         for media in self.media_gapfill_stats:
             gfscore = 0
             if self.media_gapfill_stats[media]:
                 gfscore = len(self.media_gapfill_stats[media]["new"].keys()) + 0.5*len(self.media_gapfill_stats[media]["reversed"].keys())
+
+            logger.debug(f'media gapfilling score: {media.id}: {gfscore}')
             if gfscore <= self.max_gapfilling and gfscore <= (best_score+self.gapfilling_delta):
                 self.selected_media.append(media)
 
@@ -211,9 +257,10 @@ def expand_model_to_genome_scale(self):
         """
         self.filtered_noncore = []
         tests = self.build_tests()
-        #Must restore noncore reactions and NOT other compartment reactions before running this function - it is not detrimental to run this twice
-        self.restore_noncore_reactions(noncore = True,othercompartment = False)
-        # Extending model with noncore reactions while retaining ATP accuracy
+        # Must restore non core reactions and NOT other compartment reactions before running this function
+        # it is not detrimental to run this twice
+        self.restore_noncore_reactions(noncore=True, othercompartment=False)
+        # Extending model with non core reactions while retaining ATP accuracy
         self.filtered_noncore = self.modelutl.reaction_expansion_test(self.noncore_reactions,tests)        
         # Removing filtered reactions
         for item in self.filtered_noncore:
@@ -225,8 +272,8 @@ def expand_model_to_genome_scale(self):
             # reaction.update_variable_bounds()
             if item[0].lower_bound == 0 and item[0].upper_bound == 0:
                 self.model.remove_reactions([item[0]])
-        #Restoring other compartment reactions but not the core because this would undo reaction filtering
-        self.restore_noncore_reactions(noncore = False,othercompartment = True)
+        # Restoring other compartment reactions but not the core because this would undo reaction filtering
+        self.restore_noncore_reactions(noncore=False, othercompartment=True)
 
     def restore_noncore_reactions(self,noncore = True,othercompartment = True):
         """
@@ -247,8 +294,7 @@ def restore_noncore_reactions(self,noncore = True,othercompartment = True):
                     reaction.lower_bound = self.original_bounds[reaction.id][0]
                     reaction.upper_bound = self.original_bounds[reaction.id][1]
 
-    
-    def build_tests(self,multiplier=None):
+    def build_tests(self, multiplier=None):
         """Build tests based on ATP media evaluations
         
         Parameters
@@ -264,23 +310,28 @@ def build_tests(self,multiplier=None):
         Raises
         ------
         """
-        if multiplier == None:
+        if multiplier is None:
             multiplier = self.multiplier
         tests = []
         self.model.objective = self.atp_hydrolysis.id
         for media in self.selected_media:
             self.modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(media)
-            obj_value = model.slim_optimize()
-            logger.debug(media.name," = ",obj_value)
-            tests.append({"media":media,"is_max_threshold": True,"threshold":multiplier*obj_value,"objective":self.atp_hydrolysis.id})
+            obj_value = self.model.slim_optimize()
+            logger.debug(f'{media.name} = {obj_value}')
+            tests.append({
+                "media": media,
+                "is_max_threshold": True,
+                "threshold": multiplier*obj_value,
+                "objective": self.atp_hydrolysis.id
+            })
         return tests
 
     def run_atp_correction(self):
         """
         Runs the entire ATP method
         :return:
         """
-        #Ensure all specified media work
+        # Ensure all specified media work
         self.evaluate_growth_media()
         self.determine_growth_media()
         self.apply_growth_media_gapfilling()
 
@@ -1,10 +1,9 @@
 import logging
-logger = logging.getLogger(__name__)
-
 import re
 import copy  # !!! the import is never used
 from cobra.core.dictlist import DictList
 
+logger = logging.getLogger(__name__)
 
 
 def normalize_role(s):
@@ -13,15 +12,12 @@ def normalize_role(s):
     s = re.sub('[\W_]+', '', s)
     return s
 
-#Static factory functions:
-            
-#def build_from_kbase_gto:
-
 
 def read_fasta(f, split='|', h_func=None):
     with open(f, 'r') as fh:
         return parse_fasta_str(fh.read(), split, h_func)
 
+
 def parse_fasta_str(faa_str, split='|', h_func=None):
     features = []
     seq = None
@@ -48,23 +44,54 @@ def parse_fasta_str(faa_str, split='|', h_func=None):
 
 
 class MSFeature:
+
     def __init__(self, feature_id, sequence, description=None):
-        self.id = feature_id; self.seq = sequence
+        """
+
+        @param feature_id: identifier for the protein coding feature
+        @param sequence: protein sequence
+        @param description: description of the feature
+        """
+
+        self.id = feature_id
+        self.seq = sequence
         self.description = description  # temporary replace with proper parsing
         self.ontology_terms = {}
         self.aliases = []
 
     def add_ontology_term(self, ontology_term, value):
+        """
+        Add functional term to the feature
+
+        @param ontology_term: type of the ontology (e.g., RAST, EC)
+        @param value: value for the ontology (e.g., pyruvate kinase)
+        """
         if ontology_term not in self.ontology_terms:
             self.ontology_terms[ontology_term] = []
         if value not in self.ontology_terms[ontology_term]:
             self.ontology_terms[ontology_term].append(value)
 
 
 class MSGenome:
+
     def __init__(self):
         self.features = DictList()
 
+    def add_features(self, feature_list: list):
+        """
+
+        :param feature_list:
+        :return:
+        """
+        duplicates = list(filter(lambda o: o.id in self.features, feature_list))
+        if len(duplicates) > 0:
+            raise ValueError(f"unable to add features {duplicates} already present in the genome")
+
+        for f in feature_list:
+            f._genome = self
+
+        self.features += feature_list
+
     @staticmethod
     def from_fasta(filename, contigs=0, split='|', h_func=None):  # !!! the contigs argument is never used
         genome = MSGenome()
@@ -83,10 +110,10 @@ def from_protein_sequences_hash(sequences):
         return genome
 
     def alias_hash(self):
-        return {alias:gene for gene in self.features for alias in gene.aliases}
+        return {alias: gene for gene in self.features for alias in gene.aliases}
 
-    def search_for_gene(self,query):
+    def search_for_gene(self, query):
         if query in self.features:
             return self.features.get_by_id(query)
         aliases = self.alias_hash()
-        return aliases[query] if query in aliases else None
+        return aliases[query] if query in aliases else None
@@ -1,4 +1,5 @@
 from modelseedpy.ml.predict_phenotype import create_indicator_matrix
+from modelseedpy.core.msgenome import MSGenome
 
 
 class MSGenomeClassifier:
@@ -22,7 +23,10 @@ def extract_features_from_genome(genome, ontology_term):
         return {'genome': list(features)}
 
     def classify(self, genome_or_roles, ontology_term='RAST'):
-        if isinstance(genome_or_roles,"MSGenome"):
+        """
+        param genome_or_roles:
+        """
+        if isinstance(genome_or_roles, MSGenome):
             genome_or_roles = self.extract_features_from_genome(genome_or_roles, ontology_term)
         indicator_df, master_role_list = create_indicator_matrix(genome_or_roles, self.features)
         predictions_numerical = self.model.predict(indicator_df[master_role_list].values)
 
@@ -3,6 +3,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class MediaCompound:
 
     def __init__(self, compound_id, lower_bound, upper_bound, concentration=None):
@@ -23,16 +24,18 @@ def minFlux(self):
 
 
 class MSMedia:
-    def __init__(self, media_id):
+
+    def __init__(self, media_id, name=""):
         self.id = media_id
+        self.name = name
         self.mediacompounds = DictList()
 
     @staticmethod
     def from_dict(media_dict):
         """
         Either dict with exchange bounds (example: {'cpd00027': (-10, 1000)}) or
         just absolute value of uptake (example: {''cpd00027': 10})
-        :param d:
+        :param media_dict:
         :return:
         """
         media = MSMedia('media')