From 852d862cb94eb9d5708f6a536f1e3c7c14a81c5e Mon Sep 17 00:00:00 2001 From: bunnech Date: Sat, 13 Aug 2022 17:47:31 -0400 Subject: [PATCH 1/9] Remove temporary df in merge function and add test mode. --- debug.py | 38 +++++++++++++++++++++++++++++ pycytominer/cyto_utils/cells.py | 42 ++++++++++++++++++++++++++------- 2 files changed, 71 insertions(+), 9 deletions(-) create mode 100644 debug.py diff --git a/debug.py b/debug.py new file mode 100644 index 00000000..508d8a92 --- /dev/null +++ b/debug.py @@ -0,0 +1,38 @@ +#!/usr/bin/python3 + +# imports +import os +import pandas as pd +from pycytominer.cyto_utils.cells import SingleCells + + +# define test SQL file +sql_file = "sqlite:////" + os.path.abspath( + "../perturbmatch/datasets/BR00117010.sqlite") +add_file = "sqlite:////" + os.path.abspath( + "../perturbmatch/datasets/BR00117010.sqlite") + +# define dataframe +ap = SingleCells( + sql_file=sql_file, + image_cols=["TableNumber", "ImageNumber", "Metadata_Site"], + strata=["Metadata_Plate", "Metadata_Well"] +) + +# merge comparments and metainformation into one dataframe +df_merged_sc = ap.merge_single_cells( + sc_output_file="none", + compute_subsample=False, + compression_options=None, + float_format=None, + single_cell_normalize=True, + normalize_args=None, + test=True, + test_n=100 +) + +# load additional information of file +df_info = pd.read_csv("../perturbmatch/datasets/BR00117010_augmented.csv") + +# merge single cell dataframe with additional information +df_merged_sc = df_merged_sc.merge(right=df_info, how="left", on="Metadata_Well") diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py index 7fd8a7e4..b56e8374 100644 --- a/pycytominer/cyto_utils/cells.py +++ b/pycytominer/cyto_utils/cells.py @@ -397,13 +397,17 @@ def get_subsample(self, df=None, compartment="cells", rename_col=True): self.is_subset_computed = True - def load_compartment(self, compartment): + def load_compartment(self, compartment, test=False, test_n=None): """Creates the compartment dataframe. Parameters ---------- compartment : str The compartment to process. + test : bool + If true, run in test mode and only load small chunk of data + test_n : int + Set chunk size of the `read_sql` function. Returns ------- @@ -411,6 +415,10 @@ def load_compartment(self, compartment): Compartment dataframe. """ compartment_query = "select * from {}".format(compartment) + if test: + df = pd.read_sql( + sql=compartment_query, con=self.conn, chunksize=test_n) + return next(df) df = pd.read_sql(sql=compartment_query, con=self.conn) return df @@ -609,6 +617,8 @@ def merge_single_cells( float_format=None, single_cell_normalize=False, normalize_args=None, + test=False, + test_n=None, ): """Given the linking columns, merge single cell data. Normalization is also supported. @@ -626,12 +636,20 @@ def merge_single_cells( Whether or not to normalize the single cell data. normalize_args : dict, optional Additional arguments passed as input to pycytominer.normalize(). + test : bool, optional + If function run in test mode, read dataframe in chunks. + test_n : int, optional + Specifies size of test chunk loaded in test mode. + Returns ------- pandas.core.frame.DataFrame Either a dataframe (if output_file="none") or will write to file. """ + # When running in test mode, do not compute subsample + if test: + self.compute_subsample = False # Load the single cell dataframe by merging on the specific linking columns sc_df = "" @@ -658,29 +676,35 @@ def merge_single_cells( ] if isinstance(sc_df, str): - initial_df = self.load_compartment(compartment=left_compartment) + sc_df = self.load_compartment( + compartment=left_compartment, test=test, test_n=test_n) if compute_subsample: # Sample cells proportionally by self.strata - self.get_subsample(df=initial_df, rename_col=False) + self.get_subsample(df=sc_df, rename_col=False) subset_logic_df = self.subset_data_df.drop( self.image_df.columns, axis="columns" ) - initial_df = subset_logic_df.merge( - initial_df, how="left", on=subset_logic_df.columns.tolist() - ).reindex(initial_df.columns, axis="columns") + sc_df = subset_logic_df.merge( + sc_df, how="left", on=subset_logic_df.columns.tolist() + ).reindex(sc_df.columns, axis="columns") - sc_df = initial_df.merge( - self.load_compartment(compartment=right_compartment), + sc_df = sc_df.merge( + self.load_compartment( + compartment=right_compartment, + test=test, test_n=test_n), left_on=self.merge_cols + [left_link_col], right_on=self.merge_cols + [right_link_col], suffixes=merge_suffix, ) + else: sc_df = sc_df.merge( - self.load_compartment(compartment=right_compartment), + self.load_compartment( + compartment=right_compartment, + test=test, test_n=test_n), left_on=self.merge_cols + [left_link_col], right_on=self.merge_cols + [right_link_col], suffixes=merge_suffix, From 766e4c2ea57230b2432fbc8bb684de6ad03964df Mon Sep 17 00:00:00 2001 From: bunnech Date: Mon, 15 Aug 2022 19:08:51 -0400 Subject: [PATCH 2/9] Remove test environment and replace Pandas read_sql function. --- debug.py | 6 +-- pycytominer/cyto_utils/cells.py | 84 +++++++++++++++++++++------------ 2 files changed, 57 insertions(+), 33 deletions(-) diff --git a/debug.py b/debug.py index 508d8a92..69ef1615 100644 --- a/debug.py +++ b/debug.py @@ -27,12 +27,12 @@ float_format=None, single_cell_normalize=True, normalize_args=None, - test=True, - test_n=100 ) # load additional information of file df_info = pd.read_csv("../perturbmatch/datasets/BR00117010_augmented.csv") +df_info_meta = [m for m in df_info.columns if m.startswith("Metadata_")] # merge single cell dataframe with additional information -df_merged_sc = df_merged_sc.merge(right=df_info, how="left", on="Metadata_Well") +df_merged_sc = df_merged_sc.merge( + right=df_info_meta, how="left", on=["Metadata_Plate", "Metadata_Well"]) diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py index b56e8374..4ab10093 100644 --- a/pycytominer/cyto_utils/cells.py +++ b/pycytominer/cyto_utils/cells.py @@ -397,30 +397,69 @@ def get_subsample(self, df=None, compartment="cells", rename_col=True): self.is_subset_computed = True - def load_compartment(self, compartment, test=False, test_n=None): + def is_feature_col(self, col): + """Check if column is a feature.""" + return ( + col.startswith("Cell") + or col.startswith("Cytoplasm") + or col.startswith("Nuclei") + ) + + def count(self, table): + """Count total number of rows for a table.""" + (num_rows,) = next(self.conn.execute(f"SELECT COUNT(*) FROM {table}")) + return num_rows + + def get_columns(self, table): + """Get feature and metadata columns lists.""" + ptr = self.conn.execute(f"SELECT * FROM {table} LIMIT 1").cursor + col_names = [obj[0] for obj in ptr.description] + + feat_cols = [] + meta_cols = [] + for col in col_names: + if self.is_feature_col(col): + feat_cols.append(col) + else: + meta_cols.append(col) + + return meta_cols, feat_cols + + def load_compartment(self, compartment): """Creates the compartment dataframe. Parameters ---------- compartment : str The compartment to process. - test : bool - If true, run in test mode and only load small chunk of data - test_n : int - Set chunk size of the `read_sql` function. Returns ------- pandas.core.frame.DataFrame Compartment dataframe. """ - compartment_query = "select * from {}".format(compartment) - if test: - df = pd.read_sql( - sql=compartment_query, con=self.conn, chunksize=test_n) - return next(df) - df = pd.read_sql(sql=compartment_query, con=self.conn) - return df + + # Get data useful to pre-alloc memory + num_cells = self.count(compartment) + meta_cols, feat_cols = self.get_columns(compartment) + num_meta, num_feats = len(meta_cols), len(feat_cols) + + feats = np.empty(shape=(num_cells, num_feats), dtype=np.float32) + metas = pd.DataFrame(columns=meta_cols, index=range(num_cells)) + + # Load data row by row for both meta information and features + columns = ", ".join(meta_cols + feat_cols) + query = f"select {columns} from {compartment}" + resultset = self.conn.execute(query) + + print(f"Loading compartment {compartment}.") + for i, row in enumerate(resultset): + metas.loc[i] = row[:num_meta] + feats[i] = row[num_meta:] + + # Concatenate both into final output per compartment + return pd.concat( + [pd.DataFrame(columns=feat_cols, data=feats), metas], axis=1) def aggregate_compartment( self, @@ -617,8 +656,6 @@ def merge_single_cells( float_format=None, single_cell_normalize=False, normalize_args=None, - test=False, - test_n=None, ): """Given the linking columns, merge single cell data. Normalization is also supported. @@ -636,20 +673,12 @@ def merge_single_cells( Whether or not to normalize the single cell data. normalize_args : dict, optional Additional arguments passed as input to pycytominer.normalize(). - test : bool, optional - If function run in test mode, read dataframe in chunks. - test_n : int, optional - Specifies size of test chunk loaded in test mode. - Returns ------- pandas.core.frame.DataFrame Either a dataframe (if output_file="none") or will write to file. """ - # When running in test mode, do not compute subsample - if test: - self.compute_subsample = False # Load the single cell dataframe by merging on the specific linking columns sc_df = "" @@ -676,8 +705,7 @@ def merge_single_cells( ] if isinstance(sc_df, str): - sc_df = self.load_compartment( - compartment=left_compartment, test=test, test_n=test_n) + sc_df = self.load_compartment(compartment=left_compartment) if compute_subsample: # Sample cells proportionally by self.strata @@ -692,9 +720,7 @@ def merge_single_cells( ).reindex(sc_df.columns, axis="columns") sc_df = sc_df.merge( - self.load_compartment( - compartment=right_compartment, - test=test, test_n=test_n), + self.load_compartment(compartment=right_compartment), left_on=self.merge_cols + [left_link_col], right_on=self.merge_cols + [right_link_col], suffixes=merge_suffix, @@ -702,9 +728,7 @@ def merge_single_cells( else: sc_df = sc_df.merge( - self.load_compartment( - compartment=right_compartment, - test=test, test_n=test_n), + self.load_compartment(compartment=right_compartment), left_on=self.merge_cols + [left_link_col], right_on=self.merge_cols + [right_link_col], suffixes=merge_suffix, From 0118ccd0c96355291ed497f984fbb4f65c519676 Mon Sep 17 00:00:00 2001 From: bunnech Date: Mon, 15 Aug 2022 19:14:59 -0400 Subject: [PATCH 3/9] Dump to parquet and add filename. --- debug.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/debug.py b/debug.py index 69ef1615..5886ce75 100644 --- a/debug.py +++ b/debug.py @@ -7,10 +7,9 @@ # define test SQL file +filename = "BR00117010" sql_file = "sqlite:////" + os.path.abspath( - "../perturbmatch/datasets/BR00117010.sqlite") -add_file = "sqlite:////" + os.path.abspath( - "../perturbmatch/datasets/BR00117010.sqlite") + f"../perturbmatch/datasets/{filename}.sqlite") # define dataframe ap = SingleCells( @@ -30,9 +29,13 @@ ) # load additional information of file -df_info = pd.read_csv("../perturbmatch/datasets/BR00117010_augmented.csv") +df_info = pd.read_csv(f"../perturbmatch/datasets/{filename}_augmented.csv") + +# select only metadata df_info_meta = [m for m in df_info.columns if m.startswith("Metadata_")] # merge single cell dataframe with additional information df_merged_sc = df_merged_sc.merge( right=df_info_meta, how="left", on=["Metadata_Plate", "Metadata_Well"]) + +df_merged_sc.to_parquet(f"../perturbmatch/datasets/{filename}.parquet") From abb2774c104f3249e0382b45810ef53e742e8e1e Mon Sep 17 00:00:00 2001 From: bunnech Date: Tue, 16 Aug 2022 09:58:31 -0400 Subject: [PATCH 4/9] Correct dtype and adapt pytest. --- debug.py | 41 ------------------- pycytominer/cyto_utils/cells.py | 10 +++-- .../tests/test_cyto_utils/test_cells.py | 10 +++-- 3 files changed, 13 insertions(+), 48 deletions(-) delete mode 100644 debug.py diff --git a/debug.py b/debug.py deleted file mode 100644 index 5886ce75..00000000 --- a/debug.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/python3 - -# imports -import os -import pandas as pd -from pycytominer.cyto_utils.cells import SingleCells - - -# define test SQL file -filename = "BR00117010" -sql_file = "sqlite:////" + os.path.abspath( - f"../perturbmatch/datasets/{filename}.sqlite") - -# define dataframe -ap = SingleCells( - sql_file=sql_file, - image_cols=["TableNumber", "ImageNumber", "Metadata_Site"], - strata=["Metadata_Plate", "Metadata_Well"] -) - -# merge comparments and metainformation into one dataframe -df_merged_sc = ap.merge_single_cells( - sc_output_file="none", - compute_subsample=False, - compression_options=None, - float_format=None, - single_cell_normalize=True, - normalize_args=None, -) - -# load additional information of file -df_info = pd.read_csv(f"../perturbmatch/datasets/{filename}_augmented.csv") - -# select only metadata -df_info_meta = [m for m in df_info.columns if m.startswith("Metadata_")] - -# merge single cell dataframe with additional information -df_merged_sc = df_merged_sc.merge( - right=df_info_meta, how="left", on=["Metadata_Plate", "Metadata_Well"]) - -df_merged_sc.to_parquet(f"../perturbmatch/datasets/{filename}.parquet") diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py index 4ab10093..46d4fbdb 100644 --- a/pycytominer/cyto_utils/cells.py +++ b/pycytominer/cyto_utils/cells.py @@ -444,20 +444,22 @@ def load_compartment(self, compartment): meta_cols, feat_cols = self.get_columns(compartment) num_meta, num_feats = len(meta_cols), len(feat_cols) - feats = np.empty(shape=(num_cells, num_feats), dtype=np.float32) + # Use pre-allocated np.array for data + feats = np.empty(shape=(num_cells, num_feats), dtype=np.float64) + # Use pre-allocated pd.DataFrame for metadata metas = pd.DataFrame(columns=meta_cols, index=range(num_cells)) - # Load data row by row for both meta information and features + # Query database for selected columns of chosen compartment columns = ", ".join(meta_cols + feat_cols) query = f"select {columns} from {compartment}" resultset = self.conn.execute(query) - print(f"Loading compartment {compartment}.") + # Load data row by row for both meta information and features for i, row in enumerate(resultset): metas.loc[i] = row[:num_meta] feats[i] = row[num_meta:] - # Concatenate both into final output per compartment + # Return concatenated data and metainformation of compartment return pd.concat( [pd.DataFrame(columns=feat_cols, data=feats), metas], axis=1) diff --git a/pycytominer/tests/test_cyto_utils/test_cells.py b/pycytominer/tests/test_cyto_utils/test_cells.py index 2f86849e..fd41bfea 100644 --- a/pycytominer/tests/test_cyto_utils/test_cells.py +++ b/pycytominer/tests/test_cyto_utils/test_cells.py @@ -235,10 +235,12 @@ def test_SingleCells_count(): def test_load_compartment(): loaded_compartment_df = ap.load_compartment(compartment="cells") - pd.testing.assert_frame_equal(loaded_compartment_df, cells_df) + pd.testing.assert_frame_equal( + loaded_compartment_df, cells_df, check_dtype=False) # Test non-canonical compartment loading - pd.testing.assert_frame_equal(new_compartment_df, ap_new.load_compartment("new")) + pd.testing.assert_frame_equal( + new_compartment_df, ap_new.load_compartment("new"), check_dtype=False) def test_merge_single_cells(): @@ -307,6 +309,7 @@ def test_merge_single_cells(): pd.testing.assert_frame_equal( norm_method_df.sort_index(axis=1), manual_merge_normalize.sort_index(axis=1), + check_dtype=False ) # Test non-canonical compartment merging @@ -337,7 +340,8 @@ def test_merge_single_cells(): default_feature_infer_df = ap_new.merge_single_cells(single_cell_normalize=True) - pd.testing.assert_frame_equal(norm_new_method_df, default_feature_infer_df) + pd.testing.assert_frame_equal( + norm_new_method_df, default_feature_infer_df, check_dtype=False) pd.testing.assert_frame_equal( norm_new_method_df, norm_new_method_no_feature_infer_df ) From 0ef040e1afcf90e6236e775fb06e6fdd1ecb23f9 Mon Sep 17 00:00:00 2001 From: bunnech Date: Tue, 16 Aug 2022 14:04:22 -0400 Subject: [PATCH 5/9] Add feedback. --- pycytominer/cyto_utils/cells.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py index 46d4fbdb..85a2965f 100644 --- a/pycytominer/cyto_utils/cells.py +++ b/pycytominer/cyto_utils/cells.py @@ -397,20 +397,12 @@ def get_subsample(self, df=None, compartment="cells", rename_col=True): self.is_subset_computed = True - def is_feature_col(self, col): - """Check if column is a feature.""" - return ( - col.startswith("Cell") - or col.startswith("Cytoplasm") - or col.startswith("Nuclei") - ) - - def count(self, table): + def count_sql_table_rows(self, table): """Count total number of rows for a table.""" (num_rows,) = next(self.conn.execute(f"SELECT COUNT(*) FROM {table}")) return num_rows - def get_columns(self, table): + def get_sql_table_col_names(self, table): """Get feature and metadata columns lists.""" ptr = self.conn.execute(f"SELECT * FROM {table} LIMIT 1").cursor col_names = [obj[0] for obj in ptr.description] @@ -418,7 +410,7 @@ def get_columns(self, table): feat_cols = [] meta_cols = [] for col in col_names: - if self.is_feature_col(col): + if col.lower().startswith(tuple(self.compartments)): feat_cols.append(col) else: meta_cols.append(col) @@ -440,8 +432,8 @@ def load_compartment(self, compartment): """ # Get data useful to pre-alloc memory - num_cells = self.count(compartment) - meta_cols, feat_cols = self.get_columns(compartment) + num_cells = self.count_sql_table_rows(compartment) + meta_cols, feat_cols = self.get_sql_table_col_names(compartment) num_meta, num_feats = len(meta_cols), len(feat_cols) # Use pre-allocated np.array for data @@ -452,10 +444,10 @@ def load_compartment(self, compartment): # Query database for selected columns of chosen compartment columns = ", ".join(meta_cols + feat_cols) query = f"select {columns} from {compartment}" - resultset = self.conn.execute(query) + query_result = self.conn.execute(query) # Load data row by row for both meta information and features - for i, row in enumerate(resultset): + for i, row in enumerate(query_result): metas.loc[i] = row[:num_meta] feats[i] = row[num_meta:] From f41d72af380f6ed13c032cad1fba066c1bc358e7 Mon Sep 17 00:00:00 2001 From: bunnech Date: Tue, 16 Aug 2022 14:46:00 -0400 Subject: [PATCH 6/9] Add additional pytests. --- pycytominer/tests/test_cyto_utils/test_cells.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pycytominer/tests/test_cyto_utils/test_cells.py b/pycytominer/tests/test_cyto_utils/test_cells.py index fd41bfea..8dafe2fb 100644 --- a/pycytominer/tests/test_cyto_utils/test_cells.py +++ b/pycytominer/tests/test_cyto_utils/test_cells.py @@ -243,6 +243,20 @@ def test_load_compartment(): new_compartment_df, ap_new.load_compartment("new"), check_dtype=False) +def test_sc_count_sql_table(): # or something + # Iterate over initialized compartments + for compartment in ap.compartments: + result_row_count = ap.count_sql_table_rows(table=compartment) + assert result_row_count == 100 + + +def test_get_sql_table_col_names(): # or something + # Iterate over initialized compartments + for compartment in ap.compartments: + meta_cols, _ = ap.get_sql_table_col_names(table=compartment) + assert meta_cols == ['ObjectNumber', 'ImageNumber', 'TableNumber'] + + def test_merge_single_cells(): sc_merged_df = ap.merge_single_cells() From 1c6c000ca8c0617a75f65d11d3a23daa6e48f9b3 Mon Sep 17 00:00:00 2001 From: bunnech Date: Tue, 16 Aug 2022 14:59:42 -0400 Subject: [PATCH 7/9] Add additional pytests. --- pycytominer/tests/test_cyto_utils/test_cells.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pycytominer/tests/test_cyto_utils/test_cells.py b/pycytominer/tests/test_cyto_utils/test_cells.py index 8dafe2fb..e9c0a154 100644 --- a/pycytominer/tests/test_cyto_utils/test_cells.py +++ b/pycytominer/tests/test_cyto_utils/test_cells.py @@ -243,14 +243,14 @@ def test_load_compartment(): new_compartment_df, ap_new.load_compartment("new"), check_dtype=False) -def test_sc_count_sql_table(): # or something +def test_sc_count_sql_table(): # Iterate over initialized compartments for compartment in ap.compartments: result_row_count = ap.count_sql_table_rows(table=compartment) assert result_row_count == 100 -def test_get_sql_table_col_names(): # or something +def test_get_sql_table_col_names(): # Iterate over initialized compartments for compartment in ap.compartments: meta_cols, _ = ap.get_sql_table_col_names(table=compartment) From e9d9edb2615333f75d0cb5ff089fc53310e4650f Mon Sep 17 00:00:00 2001 From: bunnech Date: Tue, 16 Aug 2022 15:07:52 -0400 Subject: [PATCH 8/9] Add additional pytests. --- pycytominer/tests/test_cyto_utils/test_cells.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pycytominer/tests/test_cyto_utils/test_cells.py b/pycytominer/tests/test_cyto_utils/test_cells.py index e9c0a154..462b4936 100644 --- a/pycytominer/tests/test_cyto_utils/test_cells.py +++ b/pycytominer/tests/test_cyto_utils/test_cells.py @@ -253,8 +253,10 @@ def test_sc_count_sql_table(): def test_get_sql_table_col_names(): # Iterate over initialized compartments for compartment in ap.compartments: - meta_cols, _ = ap.get_sql_table_col_names(table=compartment) + meta_cols, feat_cols = ap.get_sql_table_col_names(table=compartment) assert meta_cols == ['ObjectNumber', 'ImageNumber', 'TableNumber'] + for i in ['a', 'b', 'c', 'd']: + assert f"{compartment.capitalize()}_{i}" in feat_cols def test_merge_single_cells(): From b8ad4c83e88b22e0a4e43f99cfc745bd94d4765d Mon Sep 17 00:00:00 2001 From: bunnech Date: Tue, 16 Aug 2022 15:24:42 -0400 Subject: [PATCH 9/9] Change existing pytests to consider order meta features, then morphological features. --- pycytominer/cyto_utils/cells.py | 2 +- pycytominer/tests/test_cyto_utils/test_cells.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py index 85a2965f..e8271c70 100644 --- a/pycytominer/cyto_utils/cells.py +++ b/pycytominer/cyto_utils/cells.py @@ -453,7 +453,7 @@ def load_compartment(self, compartment): # Return concatenated data and metainformation of compartment return pd.concat( - [pd.DataFrame(columns=feat_cols, data=feats), metas], axis=1) + [metas, pd.DataFrame(columns=feat_cols, data=feats)], axis=1) def aggregate_compartment( self, diff --git a/pycytominer/tests/test_cyto_utils/test_cells.py b/pycytominer/tests/test_cyto_utils/test_cells.py index 462b4936..910b6734 100644 --- a/pycytominer/tests/test_cyto_utils/test_cells.py +++ b/pycytominer/tests/test_cyto_utils/test_cells.py @@ -236,11 +236,16 @@ def test_SingleCells_count(): def test_load_compartment(): loaded_compartment_df = ap.load_compartment(compartment="cells") pd.testing.assert_frame_equal( - loaded_compartment_df, cells_df, check_dtype=False) + loaded_compartment_df, + cells_df.reindex(columns=loaded_compartment_df.columns), + check_dtype=False) # Test non-canonical compartment loading + loaded_compartment_df = ap_new.load_compartment("new") pd.testing.assert_frame_equal( - new_compartment_df, ap_new.load_compartment("new"), check_dtype=False) + new_compartment_df.reindex(columns=loaded_compartment_df.columns), + loaded_compartment_df, + check_dtype=False) def test_sc_count_sql_table():