Skip to content

Commit 01f9e30

Browse files
authored
Merge pull request #23 from cshenry/claude/add-function-msexpression-011CUoKZ7YGo72duYdrs7oTT
Add average_expression_replicates method to MSExpression
2 parents ac62f9b + da8e975 commit 01f9e30

File tree

1 file changed

+68
-0
lines changed

1 file changed

+68
-0
lines changed

modelseedpy/multiomics/msexpression.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,74 @@ def translate_data(self, target_type: str) -> 'MSExpression':
598598
new_expression._data.loc[feature.id, condition.id] = value
599599
return new_expression
600600

601+
def average_expression_replicates(self, strain_list: list) -> 'MSExpression':
602+
"""Average expression replicates for each strain.
603+
604+
Takes an MSExpression object with replicate columns (e.g., ACN2586_1, ACN2586_2, ...)
605+
and averages them to create single columns per strain (e.g., ACN2586).
606+
607+
Args:
608+
strain_list: List of strain names (e.g., ["ACN2586", "ACN2821", ...])
609+
610+
Returns:
611+
New MSExpression object with averaged data per strain
612+
613+
Raises:
614+
ValueError: If no data found for any strain in the list
615+
"""
616+
try:
617+
# Access the underlying DataFrame
618+
expression_df = self._data.copy()
619+
620+
# Create new DataFrame for averaged data
621+
averaged_data = {}
622+
623+
# Keep the index (gene/protein IDs)
624+
averaged_data['index'] = expression_df.index
625+
626+
# For each strain, find and average its replicates
627+
for strain in strain_list:
628+
# Find columns that match this strain pattern (e.g., ACN2586_1, ACN2586_2, ...)
629+
replicate_cols = [col for col in expression_df.columns if col.startswith(f"{strain}_")]
630+
631+
if replicate_cols:
632+
# Average the replicates
633+
averaged_data[strain] = expression_df[replicate_cols].mean(axis=1)
634+
logger.info(f"Averaged {len(replicate_cols)} replicates for strain {strain}")
635+
else:
636+
# No replicates found - check if strain column exists as-is
637+
if strain in expression_df.columns:
638+
averaged_data[strain] = expression_df[strain]
639+
logger.info(f"No replicates found for {strain}, using existing column")
640+
else:
641+
logger.warning(f"No data found for strain {strain}")
642+
643+
# Create new DataFrame from averaged data
644+
averaged_df = pd.DataFrame(averaged_data)
645+
averaged_df.set_index('index', inplace=True)
646+
647+
# Create a deep copy of the expression object
648+
averaged_expression = copy.deepcopy(self)
649+
650+
# Replace the data with averaged data
651+
averaged_expression._data = averaged_df
652+
653+
# Update conditions list to match new columns
654+
# Clear and rebuild conditions using proper MSCondition class
655+
averaged_expression.conditions = DictList()
656+
for strain in strain_list:
657+
if strain in averaged_df.columns:
658+
condition = MSCondition(strain, averaged_expression)
659+
averaged_expression.conditions.append(condition)
660+
661+
logger.info(f"Created averaged expression data with {len(averaged_expression.conditions)} conditions")
662+
663+
return averaged_expression
664+
665+
except Exception as e:
666+
logger.error(f"Error averaging expression replicates: {str(e)}")
667+
raise
668+
601669
def fit_model_flux_to_data(
602670
self,
603671
model: 'MSModelUtil',

0 commit comments

Comments
 (0)