@@ -598,6 +598,74 @@ def translate_data(self, target_type: str) -> 'MSExpression':
598598 new_expression ._data .loc [feature .id , condition .id ] = value
599599 return new_expression
600600
601+ def average_expression_replicates (self , strain_list : list ) -> 'MSExpression' :
602+ """Average expression replicates for each strain.
603+
604+ Takes an MSExpression object with replicate columns (e.g., ACN2586_1, ACN2586_2, ...)
605+ and averages them to create single columns per strain (e.g., ACN2586).
606+
607+ Args:
608+ strain_list: List of strain names (e.g., ["ACN2586", "ACN2821", ...])
609+
610+ Returns:
611+ New MSExpression object with averaged data per strain
612+
613+ Raises:
614+ ValueError: If no data found for any strain in the list
615+ """
616+ try :
617+ # Access the underlying DataFrame
618+ expression_df = self ._data .copy ()
619+
620+ # Create new DataFrame for averaged data
621+ averaged_data = {}
622+
623+ # Keep the index (gene/protein IDs)
624+ averaged_data ['index' ] = expression_df .index
625+
626+ # For each strain, find and average its replicates
627+ for strain in strain_list :
628+ # Find columns that match this strain pattern (e.g., ACN2586_1, ACN2586_2, ...)
629+ replicate_cols = [col for col in expression_df .columns if col .startswith (f"{ strain } _" )]
630+
631+ if replicate_cols :
632+ # Average the replicates
633+ averaged_data [strain ] = expression_df [replicate_cols ].mean (axis = 1 )
634+ logger .info (f"Averaged { len (replicate_cols )} replicates for strain { strain } " )
635+ else :
636+ # No replicates found - check if strain column exists as-is
637+ if strain in expression_df .columns :
638+ averaged_data [strain ] = expression_df [strain ]
639+ logger .info (f"No replicates found for { strain } , using existing column" )
640+ else :
641+ logger .warning (f"No data found for strain { strain } " )
642+
643+ # Create new DataFrame from averaged data
644+ averaged_df = pd .DataFrame (averaged_data )
645+ averaged_df .set_index ('index' , inplace = True )
646+
647+ # Create a deep copy of the expression object
648+ averaged_expression = copy .deepcopy (self )
649+
650+ # Replace the data with averaged data
651+ averaged_expression ._data = averaged_df
652+
653+ # Update conditions list to match new columns
654+ # Clear and rebuild conditions using proper MSCondition class
655+ averaged_expression .conditions = DictList ()
656+ for strain in strain_list :
657+ if strain in averaged_df .columns :
658+ condition = MSCondition (strain , averaged_expression )
659+ averaged_expression .conditions .append (condition )
660+
661+ logger .info (f"Created averaged expression data with { len (averaged_expression .conditions )} conditions" )
662+
663+ return averaged_expression
664+
665+ except Exception as e :
666+ logger .error (f"Error averaging expression replicates: { str (e )} " )
667+ raise
668+
601669 def fit_model_flux_to_data (
602670 self ,
603671 model : 'MSModelUtil' ,
0 commit comments