From 852d862cb94eb9d5708f6a536f1e3c7c14a81c5e Mon Sep 17 00:00:00 2001
From: bunnech <bunnec@inf.ethz.ch>
Date: Sat, 13 Aug 2022 17:47:31 -0400
Subject: [PATCH 1/9] Remove temporary df in merge function and add test mode.

---
 debug.py                        | 38 +++++++++++++++++++++++++++++
 pycytominer/cyto_utils/cells.py | 42 ++++++++++++++++++++++++++-------
 2 files changed, 71 insertions(+), 9 deletions(-)
 create mode 100644 debug.py

diff --git a/debug.py b/debug.py
new file mode 100644
index 00000000..508d8a92
--- /dev/null
+++ b/debug.py
@@ -0,0 +1,38 @@
+#!/usr/bin/python3
+
+# imports
+import os
+import pandas as pd
+from pycytominer.cyto_utils.cells import SingleCells
+
+
+# define test SQL file
+sql_file = "sqlite:////" + os.path.abspath(
+    "../perturbmatch/datasets/BR00117010.sqlite")
+add_file = "sqlite:////" + os.path.abspath(
+    "../perturbmatch/datasets/BR00117010.sqlite")
+
+# define dataframe
+ap = SingleCells(
+    sql_file=sql_file,
+    image_cols=["TableNumber", "ImageNumber", "Metadata_Site"],
+    strata=["Metadata_Plate", "Metadata_Well"]
+)
+
+# merge comparments and metainformation into one dataframe
+df_merged_sc = ap.merge_single_cells(
+    sc_output_file="none",
+    compute_subsample=False,
+    compression_options=None,
+    float_format=None,
+    single_cell_normalize=True,
+    normalize_args=None,
+    test=True,
+    test_n=100
+)
+
+# load additional information of file
+df_info = pd.read_csv("../perturbmatch/datasets/BR00117010_augmented.csv")
+
+# merge single cell dataframe with additional information
+df_merged_sc = df_merged_sc.merge(right=df_info, how="left", on="Metadata_Well")
diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py
index 7fd8a7e4..b56e8374 100644
--- a/pycytominer/cyto_utils/cells.py
+++ b/pycytominer/cyto_utils/cells.py
@@ -397,13 +397,17 @@ def get_subsample(self, df=None, compartment="cells", rename_col=True):
 
         self.is_subset_computed = True
 
-    def load_compartment(self, compartment):
+    def load_compartment(self, compartment, test=False, test_n=None):
         """Creates the compartment dataframe.
 
         Parameters
         ----------
         compartment : str
             The compartment to process.
+        test : bool
+            If true, run in test mode and only load small chunk of data
+        test_n : int
+            Set chunk size of the `read_sql` function.
 
         Returns
         -------
@@ -411,6 +415,10 @@ def load_compartment(self, compartment):
             Compartment dataframe.
         """
         compartment_query = "select * from {}".format(compartment)
+        if test:
+            df = pd.read_sql(
+                sql=compartment_query, con=self.conn, chunksize=test_n)
+            return next(df)
         df = pd.read_sql(sql=compartment_query, con=self.conn)
         return df
 
@@ -609,6 +617,8 @@ def merge_single_cells(
         float_format=None,
         single_cell_normalize=False,
         normalize_args=None,
+        test=False,
+        test_n=None,
     ):
         """Given the linking columns, merge single cell data. Normalization is also supported.
 
@@ -626,12 +636,20 @@ def merge_single_cells(
             Whether or not to normalize the single cell data.
         normalize_args : dict, optional
             Additional arguments passed as input to pycytominer.normalize().
+        test : bool, optional
+            If function run in test mode, read dataframe in chunks.
+        test_n : int, optional
+            Specifies size of test chunk loaded in test mode.
+
 
         Returns
         -------
         pandas.core.frame.DataFrame
             Either a dataframe (if output_file="none") or will write to file.
         """
+        # When running in test mode, do not compute subsample
+        if test:
+            self.compute_subsample = False
 
         # Load the single cell dataframe by merging on the specific linking columns
         sc_df = ""
@@ -658,29 +676,35 @@ def merge_single_cells(
                 ]
 
                 if isinstance(sc_df, str):
-                    initial_df = self.load_compartment(compartment=left_compartment)
+                    sc_df = self.load_compartment(
+                        compartment=left_compartment, test=test, test_n=test_n)
 
                     if compute_subsample:
                         # Sample cells proportionally by self.strata
-                        self.get_subsample(df=initial_df, rename_col=False)
+                        self.get_subsample(df=sc_df, rename_col=False)
 
                         subset_logic_df = self.subset_data_df.drop(
                             self.image_df.columns, axis="columns"
                         )
 
-                        initial_df = subset_logic_df.merge(
-                            initial_df, how="left", on=subset_logic_df.columns.tolist()
-                        ).reindex(initial_df.columns, axis="columns")
+                        sc_df = subset_logic_df.merge(
+                            sc_df, how="left", on=subset_logic_df.columns.tolist()
+                        ).reindex(sc_df.columns, axis="columns")
 
-                    sc_df = initial_df.merge(
-                        self.load_compartment(compartment=right_compartment),
+                    sc_df = sc_df.merge(
+                        self.load_compartment(
+                            compartment=right_compartment,
+                            test=test, test_n=test_n),
                         left_on=self.merge_cols + [left_link_col],
                         right_on=self.merge_cols + [right_link_col],
                         suffixes=merge_suffix,
                     )
+
                 else:
                     sc_df = sc_df.merge(
-                        self.load_compartment(compartment=right_compartment),
+                        self.load_compartment(
+                            compartment=right_compartment,
+                            test=test, test_n=test_n),
                         left_on=self.merge_cols + [left_link_col],
                         right_on=self.merge_cols + [right_link_col],
                         suffixes=merge_suffix,

From 766e4c2ea57230b2432fbc8bb684de6ad03964df Mon Sep 17 00:00:00 2001
From: bunnech <bunnec@inf.ethz.ch>
Date: Mon, 15 Aug 2022 19:08:51 -0400
Subject: [PATCH 2/9] Remove test environment and replace Pandas read_sql
 function.

---
 debug.py                        |  6 +--
 pycytominer/cyto_utils/cells.py | 84 +++++++++++++++++++++------------
 2 files changed, 57 insertions(+), 33 deletions(-)

diff --git a/debug.py b/debug.py
index 508d8a92..69ef1615 100644
--- a/debug.py
+++ b/debug.py
@@ -27,12 +27,12 @@
     float_format=None,
     single_cell_normalize=True,
     normalize_args=None,
-    test=True,
-    test_n=100
 )
 
 # load additional information of file
 df_info = pd.read_csv("../perturbmatch/datasets/BR00117010_augmented.csv")
+df_info_meta = [m for m in df_info.columns if m.startswith("Metadata_")]
 
 # merge single cell dataframe with additional information
-df_merged_sc = df_merged_sc.merge(right=df_info, how="left", on="Metadata_Well")
+df_merged_sc = df_merged_sc.merge(
+    right=df_info_meta, how="left", on=["Metadata_Plate", "Metadata_Well"])
diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py
index b56e8374..4ab10093 100644
--- a/pycytominer/cyto_utils/cells.py
+++ b/pycytominer/cyto_utils/cells.py
@@ -397,30 +397,69 @@ def get_subsample(self, df=None, compartment="cells", rename_col=True):
 
         self.is_subset_computed = True
 
-    def load_compartment(self, compartment, test=False, test_n=None):
+    def is_feature_col(self, col):
+        """Check if column is a feature."""
+        return (
+            col.startswith("Cell")
+            or col.startswith("Cytoplasm")
+            or col.startswith("Nuclei")
+        )
+
+    def count(self, table):
+        """Count total number of rows for a table."""
+        (num_rows,) = next(self.conn.execute(f"SELECT COUNT(*) FROM {table}"))
+        return num_rows
+
+    def get_columns(self, table):
+        """Get feature and metadata columns lists."""
+        ptr = self.conn.execute(f"SELECT * FROM {table} LIMIT 1").cursor
+        col_names = [obj[0] for obj in ptr.description]
+
+        feat_cols = []
+        meta_cols = []
+        for col in col_names:
+            if self.is_feature_col(col):
+                feat_cols.append(col)
+            else:
+                meta_cols.append(col)
+
+        return meta_cols, feat_cols
+
+    def load_compartment(self, compartment):
         """Creates the compartment dataframe.
 
         Parameters
         ----------
         compartment : str
             The compartment to process.
-        test : bool
-            If true, run in test mode and only load small chunk of data
-        test_n : int
-            Set chunk size of the `read_sql` function.
 
         Returns
         -------
         pandas.core.frame.DataFrame
             Compartment dataframe.
         """
-        compartment_query = "select * from {}".format(compartment)
-        if test:
-            df = pd.read_sql(
-                sql=compartment_query, con=self.conn, chunksize=test_n)
-            return next(df)
-        df = pd.read_sql(sql=compartment_query, con=self.conn)
-        return df
+
+        # Get data useful to pre-alloc memory
+        num_cells = self.count(compartment)
+        meta_cols, feat_cols = self.get_columns(compartment)
+        num_meta, num_feats = len(meta_cols), len(feat_cols)
+
+        feats = np.empty(shape=(num_cells, num_feats), dtype=np.float32)
+        metas = pd.DataFrame(columns=meta_cols, index=range(num_cells))
+
+        # Load data row by row for both meta information and features
+        columns = ", ".join(meta_cols + feat_cols)
+        query = f"select {columns} from {compartment}"
+        resultset = self.conn.execute(query)
+
+        print(f"Loading compartment {compartment}.")
+        for i, row in enumerate(resultset):
+            metas.loc[i] = row[:num_meta]
+            feats[i] = row[num_meta:]
+
+        # Concatenate both into final output per compartment
+        return pd.concat(
+            [pd.DataFrame(columns=feat_cols, data=feats), metas], axis=1)
 
     def aggregate_compartment(
         self,
@@ -617,8 +656,6 @@ def merge_single_cells(
         float_format=None,
         single_cell_normalize=False,
         normalize_args=None,
-        test=False,
-        test_n=None,
     ):
         """Given the linking columns, merge single cell data. Normalization is also supported.
 
@@ -636,20 +673,12 @@ def merge_single_cells(
             Whether or not to normalize the single cell data.
         normalize_args : dict, optional
             Additional arguments passed as input to pycytominer.normalize().
-        test : bool, optional
-            If function run in test mode, read dataframe in chunks.
-        test_n : int, optional
-            Specifies size of test chunk loaded in test mode.
-
 
         Returns
         -------
         pandas.core.frame.DataFrame
             Either a dataframe (if output_file="none") or will write to file.
         """
-        # When running in test mode, do not compute subsample
-        if test:
-            self.compute_subsample = False
 
         # Load the single cell dataframe by merging on the specific linking columns
         sc_df = ""
@@ -676,8 +705,7 @@ def merge_single_cells(
                 ]
 
                 if isinstance(sc_df, str):
-                    sc_df = self.load_compartment(
-                        compartment=left_compartment, test=test, test_n=test_n)
+                    sc_df = self.load_compartment(compartment=left_compartment)
 
                     if compute_subsample:
                         # Sample cells proportionally by self.strata
@@ -692,9 +720,7 @@ def merge_single_cells(
                         ).reindex(sc_df.columns, axis="columns")
 
                     sc_df = sc_df.merge(
-                        self.load_compartment(
-                            compartment=right_compartment,
-                            test=test, test_n=test_n),
+                        self.load_compartment(compartment=right_compartment),
                         left_on=self.merge_cols + [left_link_col],
                         right_on=self.merge_cols + [right_link_col],
                         suffixes=merge_suffix,
@@ -702,9 +728,7 @@ def merge_single_cells(
 
                 else:
                     sc_df = sc_df.merge(
-                        self.load_compartment(
-                            compartment=right_compartment,
-                            test=test, test_n=test_n),
+                        self.load_compartment(compartment=right_compartment),
                         left_on=self.merge_cols + [left_link_col],
                         right_on=self.merge_cols + [right_link_col],
                         suffixes=merge_suffix,

From 0118ccd0c96355291ed497f984fbb4f65c519676 Mon Sep 17 00:00:00 2001
From: bunnech <bunnec@inf.ethz.ch>
Date: Mon, 15 Aug 2022 19:14:59 -0400
Subject: [PATCH 3/9] Dump to parquet and add filename.

---
 debug.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/debug.py b/debug.py
index 69ef1615..5886ce75 100644
--- a/debug.py
+++ b/debug.py
@@ -7,10 +7,9 @@
 
 
 # define test SQL file
+filename = "BR00117010"
 sql_file = "sqlite:////" + os.path.abspath(
-    "../perturbmatch/datasets/BR00117010.sqlite")
-add_file = "sqlite:////" + os.path.abspath(
-    "../perturbmatch/datasets/BR00117010.sqlite")
+    f"../perturbmatch/datasets/{filename}.sqlite")
 
 # define dataframe
 ap = SingleCells(
@@ -30,9 +29,13 @@
 )
 
 # load additional information of file
-df_info = pd.read_csv("../perturbmatch/datasets/BR00117010_augmented.csv")
+df_info = pd.read_csv(f"../perturbmatch/datasets/{filename}_augmented.csv")
+
+# select only metadata
 df_info_meta = [m for m in df_info.columns if m.startswith("Metadata_")]
 
 # merge single cell dataframe with additional information
 df_merged_sc = df_merged_sc.merge(
     right=df_info_meta, how="left", on=["Metadata_Plate", "Metadata_Well"])
+
+df_merged_sc.to_parquet(f"../perturbmatch/datasets/{filename}.parquet")

From abb2774c104f3249e0382b45810ef53e742e8e1e Mon Sep 17 00:00:00 2001
From: bunnech <bunnec@inf.ethz.ch>
Date: Tue, 16 Aug 2022 09:58:31 -0400
Subject: [PATCH 4/9] Correct dtype and adapt pytest.

---
 debug.py                                      | 41 -------------------
 pycytominer/cyto_utils/cells.py               | 10 +++--
 .../tests/test_cyto_utils/test_cells.py       | 10 +++--
 3 files changed, 13 insertions(+), 48 deletions(-)
 delete mode 100644 debug.py

diff --git a/debug.py b/debug.py
deleted file mode 100644
index 5886ce75..00000000
--- a/debug.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/python3
-
-# imports
-import os
-import pandas as pd
-from pycytominer.cyto_utils.cells import SingleCells
-
-
-# define test SQL file
-filename = "BR00117010"
-sql_file = "sqlite:////" + os.path.abspath(
-    f"../perturbmatch/datasets/{filename}.sqlite")
-
-# define dataframe
-ap = SingleCells(
-    sql_file=sql_file,
-    image_cols=["TableNumber", "ImageNumber", "Metadata_Site"],
-    strata=["Metadata_Plate", "Metadata_Well"]
-)
-
-# merge comparments and metainformation into one dataframe
-df_merged_sc = ap.merge_single_cells(
-    sc_output_file="none",
-    compute_subsample=False,
-    compression_options=None,
-    float_format=None,
-    single_cell_normalize=True,
-    normalize_args=None,
-)
-
-# load additional information of file
-df_info = pd.read_csv(f"../perturbmatch/datasets/{filename}_augmented.csv")
-
-# select only metadata
-df_info_meta = [m for m in df_info.columns if m.startswith("Metadata_")]
-
-# merge single cell dataframe with additional information
-df_merged_sc = df_merged_sc.merge(
-    right=df_info_meta, how="left", on=["Metadata_Plate", "Metadata_Well"])
-
-df_merged_sc.to_parquet(f"../perturbmatch/datasets/{filename}.parquet")
diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py
index 4ab10093..46d4fbdb 100644
--- a/pycytominer/cyto_utils/cells.py
+++ b/pycytominer/cyto_utils/cells.py
@@ -444,20 +444,22 @@ def load_compartment(self, compartment):
         meta_cols, feat_cols = self.get_columns(compartment)
         num_meta, num_feats = len(meta_cols), len(feat_cols)
 
-        feats = np.empty(shape=(num_cells, num_feats), dtype=np.float32)
+        # Use pre-allocated np.array for data
+        feats = np.empty(shape=(num_cells, num_feats), dtype=np.float64)
+        # Use pre-allocated pd.DataFrame for metadata
         metas = pd.DataFrame(columns=meta_cols, index=range(num_cells))
 
-        # Load data row by row for both meta information and features
+        # Query database for selected columns of chosen compartment
         columns = ", ".join(meta_cols + feat_cols)
         query = f"select {columns} from {compartment}"
         resultset = self.conn.execute(query)
 
-        print(f"Loading compartment {compartment}.")
+        # Load data row by row for both meta information and features
         for i, row in enumerate(resultset):
             metas.loc[i] = row[:num_meta]
             feats[i] = row[num_meta:]
 
-        # Concatenate both into final output per compartment
+        # Return concatenated data and metainformation of compartment
         return pd.concat(
             [pd.DataFrame(columns=feat_cols, data=feats), metas], axis=1)
 
diff --git a/pycytominer/tests/test_cyto_utils/test_cells.py b/pycytominer/tests/test_cyto_utils/test_cells.py
index 2f86849e..fd41bfea 100644
--- a/pycytominer/tests/test_cyto_utils/test_cells.py
+++ b/pycytominer/tests/test_cyto_utils/test_cells.py
@@ -235,10 +235,12 @@ def test_SingleCells_count():
 
 def test_load_compartment():
     loaded_compartment_df = ap.load_compartment(compartment="cells")
-    pd.testing.assert_frame_equal(loaded_compartment_df, cells_df)
+    pd.testing.assert_frame_equal(
+        loaded_compartment_df, cells_df, check_dtype=False)
 
     # Test non-canonical compartment loading
-    pd.testing.assert_frame_equal(new_compartment_df, ap_new.load_compartment("new"))
+    pd.testing.assert_frame_equal(
+        new_compartment_df, ap_new.load_compartment("new"), check_dtype=False)
 
 
 def test_merge_single_cells():
@@ -307,6 +309,7 @@ def test_merge_single_cells():
                 pd.testing.assert_frame_equal(
                     norm_method_df.sort_index(axis=1),
                     manual_merge_normalize.sort_index(axis=1),
+                    check_dtype=False
                 )
 
     # Test non-canonical compartment merging
@@ -337,7 +340,8 @@ def test_merge_single_cells():
 
     default_feature_infer_df = ap_new.merge_single_cells(single_cell_normalize=True)
 
-    pd.testing.assert_frame_equal(norm_new_method_df, default_feature_infer_df)
+    pd.testing.assert_frame_equal(
+        norm_new_method_df, default_feature_infer_df, check_dtype=False)
     pd.testing.assert_frame_equal(
         norm_new_method_df, norm_new_method_no_feature_infer_df
     )

From 0ef040e1afcf90e6236e775fb06e6fdd1ecb23f9 Mon Sep 17 00:00:00 2001
From: bunnech <bunnec@inf.ethz.ch>
Date: Tue, 16 Aug 2022 14:04:22 -0400
Subject: [PATCH 5/9] Add feedback.

---
 pycytominer/cyto_utils/cells.py | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py
index 46d4fbdb..85a2965f 100644
--- a/pycytominer/cyto_utils/cells.py
+++ b/pycytominer/cyto_utils/cells.py
@@ -397,20 +397,12 @@ def get_subsample(self, df=None, compartment="cells", rename_col=True):
 
         self.is_subset_computed = True
 
-    def is_feature_col(self, col):
-        """Check if column is a feature."""
-        return (
-            col.startswith("Cell")
-            or col.startswith("Cytoplasm")
-            or col.startswith("Nuclei")
-        )
-
-    def count(self, table):
+    def count_sql_table_rows(self, table):
         """Count total number of rows for a table."""
         (num_rows,) = next(self.conn.execute(f"SELECT COUNT(*) FROM {table}"))
         return num_rows
 
-    def get_columns(self, table):
+    def get_sql_table_col_names(self, table):
         """Get feature and metadata columns lists."""
         ptr = self.conn.execute(f"SELECT * FROM {table} LIMIT 1").cursor
         col_names = [obj[0] for obj in ptr.description]
@@ -418,7 +410,7 @@ def get_columns(self, table):
         feat_cols = []
         meta_cols = []
         for col in col_names:
-            if self.is_feature_col(col):
+            if col.lower().startswith(tuple(self.compartments)):
                 feat_cols.append(col)
             else:
                 meta_cols.append(col)
@@ -440,8 +432,8 @@ def load_compartment(self, compartment):
         """
 
         # Get data useful to pre-alloc memory
-        num_cells = self.count(compartment)
-        meta_cols, feat_cols = self.get_columns(compartment)
+        num_cells = self.count_sql_table_rows(compartment)
+        meta_cols, feat_cols = self.get_sql_table_col_names(compartment)
         num_meta, num_feats = len(meta_cols), len(feat_cols)
 
         # Use pre-allocated np.array for data
@@ -452,10 +444,10 @@ def load_compartment(self, compartment):
         # Query database for selected columns of chosen compartment
         columns = ", ".join(meta_cols + feat_cols)
         query = f"select {columns} from {compartment}"
-        resultset = self.conn.execute(query)
+        query_result = self.conn.execute(query)
 
         # Load data row by row for both meta information and features
-        for i, row in enumerate(resultset):
+        for i, row in enumerate(query_result):
             metas.loc[i] = row[:num_meta]
             feats[i] = row[num_meta:]
 

From f41d72af380f6ed13c032cad1fba066c1bc358e7 Mon Sep 17 00:00:00 2001
From: bunnech <bunnec@inf.ethz.ch>
Date: Tue, 16 Aug 2022 14:46:00 -0400
Subject: [PATCH 6/9] Add additional pytests.

---
 pycytominer/tests/test_cyto_utils/test_cells.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/pycytominer/tests/test_cyto_utils/test_cells.py b/pycytominer/tests/test_cyto_utils/test_cells.py
index fd41bfea..8dafe2fb 100644
--- a/pycytominer/tests/test_cyto_utils/test_cells.py
+++ b/pycytominer/tests/test_cyto_utils/test_cells.py
@@ -243,6 +243,20 @@ def test_load_compartment():
         new_compartment_df, ap_new.load_compartment("new"), check_dtype=False)
 
 
+def test_sc_count_sql_table():  # or something
+    # Iterate over initialized compartments
+    for compartment in ap.compartments:
+        result_row_count = ap.count_sql_table_rows(table=compartment)
+        assert result_row_count == 100
+
+
+def test_get_sql_table_col_names():  # or something
+    # Iterate over initialized compartments
+    for compartment in ap.compartments:
+        meta_cols, _ = ap.get_sql_table_col_names(table=compartment)
+        assert meta_cols == ['ObjectNumber', 'ImageNumber', 'TableNumber']
+
+
 def test_merge_single_cells():
     sc_merged_df = ap.merge_single_cells()
 

From 1c6c000ca8c0617a75f65d11d3a23daa6e48f9b3 Mon Sep 17 00:00:00 2001
From: bunnech <bunnec@inf.ethz.ch>
Date: Tue, 16 Aug 2022 14:59:42 -0400
Subject: [PATCH 7/9] Add additional pytests.

---
 pycytominer/tests/test_cyto_utils/test_cells.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pycytominer/tests/test_cyto_utils/test_cells.py b/pycytominer/tests/test_cyto_utils/test_cells.py
index 8dafe2fb..e9c0a154 100644
--- a/pycytominer/tests/test_cyto_utils/test_cells.py
+++ b/pycytominer/tests/test_cyto_utils/test_cells.py
@@ -243,14 +243,14 @@ def test_load_compartment():
         new_compartment_df, ap_new.load_compartment("new"), check_dtype=False)
 
 
-def test_sc_count_sql_table():  # or something
+def test_sc_count_sql_table():
     # Iterate over initialized compartments
     for compartment in ap.compartments:
         result_row_count = ap.count_sql_table_rows(table=compartment)
         assert result_row_count == 100
 
 
-def test_get_sql_table_col_names():  # or something
+def test_get_sql_table_col_names():
     # Iterate over initialized compartments
     for compartment in ap.compartments:
         meta_cols, _ = ap.get_sql_table_col_names(table=compartment)

From e9d9edb2615333f75d0cb5ff089fc53310e4650f Mon Sep 17 00:00:00 2001
From: bunnech <bunnec@inf.ethz.ch>
Date: Tue, 16 Aug 2022 15:07:52 -0400
Subject: [PATCH 8/9] Add additional pytests.

---
 pycytominer/tests/test_cyto_utils/test_cells.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pycytominer/tests/test_cyto_utils/test_cells.py b/pycytominer/tests/test_cyto_utils/test_cells.py
index e9c0a154..462b4936 100644
--- a/pycytominer/tests/test_cyto_utils/test_cells.py
+++ b/pycytominer/tests/test_cyto_utils/test_cells.py
@@ -253,8 +253,10 @@ def test_sc_count_sql_table():
 def test_get_sql_table_col_names():
     # Iterate over initialized compartments
     for compartment in ap.compartments:
-        meta_cols, _ = ap.get_sql_table_col_names(table=compartment)
+        meta_cols, feat_cols = ap.get_sql_table_col_names(table=compartment)
         assert meta_cols == ['ObjectNumber', 'ImageNumber', 'TableNumber']
+        for i in ['a', 'b', 'c', 'd']:
+            assert f"{compartment.capitalize()}_{i}" in feat_cols
 
 
 def test_merge_single_cells():

From b8ad4c83e88b22e0a4e43f99cfc745bd94d4765d Mon Sep 17 00:00:00 2001
From: bunnech <bunnec@inf.ethz.ch>
Date: Tue, 16 Aug 2022 15:24:42 -0400
Subject: [PATCH 9/9] Change existing pytests to consider order meta features,
 then morphological features.

---
 pycytominer/cyto_utils/cells.py                 | 2 +-
 pycytominer/tests/test_cyto_utils/test_cells.py | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py
index 85a2965f..e8271c70 100644
--- a/pycytominer/cyto_utils/cells.py
+++ b/pycytominer/cyto_utils/cells.py
@@ -453,7 +453,7 @@ def load_compartment(self, compartment):
 
         # Return concatenated data and metainformation of compartment
         return pd.concat(
-            [pd.DataFrame(columns=feat_cols, data=feats), metas], axis=1)
+            [metas, pd.DataFrame(columns=feat_cols, data=feats)], axis=1)
 
     def aggregate_compartment(
         self,
diff --git a/pycytominer/tests/test_cyto_utils/test_cells.py b/pycytominer/tests/test_cyto_utils/test_cells.py
index 462b4936..910b6734 100644
--- a/pycytominer/tests/test_cyto_utils/test_cells.py
+++ b/pycytominer/tests/test_cyto_utils/test_cells.py
@@ -236,11 +236,16 @@ def test_SingleCells_count():
 def test_load_compartment():
     loaded_compartment_df = ap.load_compartment(compartment="cells")
     pd.testing.assert_frame_equal(
-        loaded_compartment_df, cells_df, check_dtype=False)
+        loaded_compartment_df,
+        cells_df.reindex(columns=loaded_compartment_df.columns),
+        check_dtype=False)
 
     # Test non-canonical compartment loading
+    loaded_compartment_df = ap_new.load_compartment("new")
     pd.testing.assert_frame_equal(
-        new_compartment_df, ap_new.load_compartment("new"), check_dtype=False)
+        new_compartment_df.reindex(columns=loaded_compartment_df.columns),
+        loaded_compartment_df,
+        check_dtype=False)
 
 
 def test_sc_count_sql_table():