computational-cell-analytics · constantinpape · Dec 6, 2024 · Dec 6, 2024 · Dec 7, 2024 · Dec 7, 2024
diff --git a/scripts/inner_ear/check_results.py b/scripts/inner_ear/check_results.py
@@ -1,3 +1,4 @@
+import json
 import os
 import sys
 from glob import glob
@@ -204,10 +205,11 @@ def visualize_folder(folder, segmentation_version, visualize_distances, binning)
         for name, seg in segmentations.items():
             # The function signature of the label layer has recently changed,
             # and we still need to support both versions.
+            visible = name != "vesicles"
             try:
-                v.add_labels(seg, name=name, color=colors.get(name, None), scale=scale)
+                v.add_labels(seg, name=name, color=colors.get(name, None), scale=scale, visible=visible)
             except TypeError:
-                v.add_labels(seg, name=name, colormap=colors.get(name, None), scale=scale)
+                v.add_labels(seg, name=name, colormap=colors.get(name, None), scale=scale, visible=visible)
 
         for name, lines in distance_lines.items():
             v.add_shapes(lines, shape_type="line", name=name, visible=False, scale=scale)
@@ -250,7 +252,7 @@ def visualize_all_data(
     data_root, table,
     segmentation_version=None, check_micro=None,
     visualize_distances=False, skip_iteration=None,
-    binning="auto", val_table=None,
+    binning="auto", val_table=None, tomo_list=None,
 ):
     from parse_table import check_val_table
 
@@ -264,7 +266,13 @@ def visualize_all_data(
         if skip_iteration is not None and i < skip_iteration:
             continue
 
-        if val_table is not None:
+        if tomo_list is not None:
+            tomo_name = os.path.relpath(
+                folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse")
+            )
+            if tomo_name not in tomo_list:
+                continue
+        elif val_table is not None:
             is_complete = check_val_table(val_table, row)
             if is_complete:
                 continue
@@ -303,6 +311,7 @@ def main():
     parser.add_argument("-d", "--visualize_distances", action="store_false")
     parser.add_argument("-b", "--binning", default="auto")
     parser.add_argument("-s", "--show_finished", action="store_true")
+    parser.add_argument("--tomos")  # Optional list of tomograms.
     args = parser.parse_args()
     assert args.microscope in (None, "both", "old", "new")
 
@@ -324,11 +333,16 @@ def main():
         val_table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx")
         val_table = pandas.read_excel(val_table_path)
 
+    tomo_list = args.tomos
+    if tomo_list is not None:
+        with open(tomo_list) as f:
+            tomo_list = json.load(f)
+
     visualize_all_data(
         data_root, table,
         segmentation_version=segmentation_version, check_micro=args.microscope,
         visualize_distances=args.visualize_distances, skip_iteration=args.iteration,
-        binning=binning, val_table=val_table
+        binning=binning, val_table=val_table, tomo_list=tomo_list
     )
 
 

diff --git a/scripts/inner_ear/clear_tomos_with_issues.py b/scripts/inner_ear/clear_tomos_with_issues.py
@@ -0,0 +1,16 @@
+import json
+import os
+
+root = "/home/pape/Work/data/moser/em-synapses/Electron-Microscopy-Susi/Analyse"
+with open("tomo_issues.json", "r") as f:
+    tomos = json.load(f)
+
+for name in tomos:
+    path = os.path.join(root, name, "Korrektur", "measurements.xlsx")
+    if not os.path.exists(path):
+        path = os.path.join(root, name, "korrektur", "measurements.xlsx")
+    if os.path.exists(path):
+        print("Removing", path)
+        os.remove(path)
+    else:
+        print("Skipping", path)
diff --git a/scripts/inner_ear/export_reformatted_results.py b/scripts/inner_ear/export_reformatted_results.py
@@ -0,0 +1,120 @@
+import os
+import argparse
+import pandas as pd
+
+
+def aggregate_statistics(vesicle_table, morpho_table):
+    boundary_dists = {"All": [], "RA-V": [], "MP-V": [], "Docked-V": []}
+    pd_dists = {"All": [], "RA-V": [], "MP-V": [], "Docked-V": []}
+    ribbon_dists = {"All": [], "RA-V": [], "MP-V": [], "Docked-V": []}
+    radii = {"All": [], "RA-V": [], "MP-V": [], "Docked-V": []}
+
+    tomo_names = []
+    n_ravs, n_mpvs, n_dockeds = [], [], []
+    n_vesicles, ves_per_surfs, ribbon_ids = [], [], []
+
+    tomograms = pd.unique(vesicle_table.tomogram)
+    for tomo in tomograms:
+        tomo_table = vesicle_table[vesicle_table.tomogram == tomo]
+        this_ribbon_ids = pd.unique(tomo_table.ribbon_id)
+
+        for ribbon_id in this_ribbon_ids:
+
+            ribbon_table = tomo_table[tomo_table.ribbon_id == ribbon_id]
+            # FIXME we need the ribbon_id for the morpho table
+            this_morpho_table = morpho_table[morpho_table.tomogram == tomo]
+
+            rav_mask = ribbon_table.pool == "RA-V"
+            mpv_mask = ribbon_table.pool == "MP-V"
+            docked_mask = ribbon_table.pool == "Docked-V"
+
+            masks = {"All": ribbon_table.pool != "", "RA-V": rav_mask, "MP-V": mpv_mask, "Docked-V": docked_mask}
+
+            for pool, mask in masks.items():
+                pool_table = ribbon_table[mask]
+                radii[pool].append(pool_table["radius [nm]"].mean())
+                ribbon_dists[pool].append(pool_table["ribbon_distance [nm]"].mean())
+                pd_dists[pool].append(pool_table["pd_distance [nm]"].mean())
+                boundary_dists[pool].append(pool_table["boundary_distance [nm]"].mean())
+
+            tomo_names.append(tomo)
+            ribbon_ids.append(ribbon_id)
+            n_rav = rav_mask.sum()
+            n_mpv = mpv_mask.sum()
+            n_docked = docked_mask.sum()
+
+            n_ves = n_rav + n_mpv + n_docked
+            ribbon_surface = this_morpho_table[this_morpho_table.structure == "ribbon"]["surface [nm^2]"].values[0]
+            ves_per_surface = n_ves / ribbon_surface
+
+            n_ravs.append(n_rav)
+            n_mpvs.append(n_mpv)
+            n_dockeds.append(n_docked)
+            n_vesicles.append(n_ves)
+            ves_per_surfs.append(ves_per_surface)
+
+    summary = {
+        "tomogram": tomo_names,
+        "ribbon_id": ribbon_ids,
+        "N_RA-V": n_ravs,
+        "N_MP-V": n_mpvs,
+        "N_Docked-V": n_dockeds,
+        "N_Vesicles": n_vesicles,
+        "Vesicles / Surface [1 / nm^2]": ves_per_surfs,
+    }
+    summary.update({f"{pool}: radius [nm]": dists for pool, dists in radii.items()})
+    summary.update({f"{pool}: ribbon_distance [nm]": dists for pool, dists in ribbon_dists.items()})
+    summary.update({f"{pool}: pd_distance [nm]": dists for pool, dists in pd_dists.items()})
+    summary.update({f"{pool}: boundary_distance [nm]": dists for pool, dists in boundary_dists.items()})
+    summary = pd.DataFrame(summary)
+    return summary
+
+
+# TODO
+# - add ribbon id to the morphology table!
+def export_reformatted_results(input_path, output_path):
+    vesicle_table = pd.read_excel(input_path, sheet_name="vesicles")
+    morpho_table = pd.read_excel(input_path, sheet_name="morphology")
+
+    vesicle_table["stimulation"] = vesicle_table["tomogram"].apply(lambda x: x.split("/")[0])
+    # Separating by mouse is currently not required, but we leave in the column for now.
+    vesicle_table["mouse"] = vesicle_table["tomogram"].apply(lambda x: x.split("/")[-3])
+    vesicle_table["pil_v_mod"] = vesicle_table["tomogram"].apply(lambda x: x.split("/")[-2])
+
+    # For now: export only the vesicle pools per tomogram.
+    for stim in ("WT strong stim", "WT control"):
+        for condition in ("pillar", "modiolar"):
+            condition_table = vesicle_table[
+                (vesicle_table.stimulation == stim) & (vesicle_table.pil_v_mod == condition)
+            ]
+
+            this_tomograms = pd.unique(condition_table.tomogram)
+            this_morpho_table = morpho_table[morpho_table.tomogram.isin(this_tomograms)]
+            condition_table = aggregate_statistics(condition_table, this_morpho_table)
+
+            # Simpler aggregation for just the number of vesicles.
+            # condition_table = condition_table.pivot_table(
+            #     index=["tomogram", "ribbon_id"], columns="pool", aggfunc="size", fill_value=0
+            # ).reset_index()
+
+            sheet_name = f"{stim}-{condition}"
+
+            if os.path.exists(output_path):
+                with pd.ExcelWriter(output_path, engine="openpyxl", mode="a") as writer:
+                    condition_table.to_excel(writer, sheet_name=sheet_name, index=False)
+            else:
+                condition_table.to_excel(output_path, sheet_name=sheet_name, index=False)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--input_path", "-i",
+        default="results/20250221_1/automatic_analysis_results.xlsx"
+    )
+    parser.add_argument(
+        "--output_path", "-o",
+        default="results/vesicle_pools_automatic.xlsx"
+    )
+    args = parser.parse_args()
+    export_reformatted_results(args.input_path, args.output_path)
diff --git a/scripts/inner_ear/processing/filter_objects.py b/scripts/inner_ear/processing/filter_objects.py
@@ -0,0 +1,134 @@
+import os
+from pathlib import Path
+from tqdm import tqdm
+
+import h5py
+import imageio.v3 as imageio
+import numpy as np
+from skimage.measure import label
+from skimage.segmentation import relabel_sequential
+
+from synapse_net.file_utils import get_data_path
+from parse_table import parse_table, get_data_root, _match_correction_folder, _match_correction_file
+
+
+def _load_segmentation(seg_path):
+    ext = Path(seg_path).suffix
+    assert ext in (".h5", ".tif"), ext
+    if ext == ".tif":
+        seg = imageio.imread(seg_path)
+    else:
+        with h5py.File(seg_path, "r") as f:
+            seg = f["segmentation"][:]
+    return seg
+
+
+def _save_segmentation(seg_path, seg):
+    ext = Path(seg_path).suffix
+    assert ext in (".h5", ".tif"), ext
+    if ext == ".tif":
+        imageio.imwrite(seg_path, seg, compression="zlib")
+    else:
+        with h5py.File(seg_path, "a") as f:
+            f.create_dataset("segmentation", data=seg, compression="gzip")
+    return seg
+
+
+def _filter_n_objects(segmentation, num_objects):
+    # Create individual objects for all disconnected pieces.
+    segmentation = label(segmentation)
+    # Find object ids and sizes, excluding background.
+    ids, sizes = np.unique(segmentation, return_counts=True)
+    ids, sizes = ids[1:], sizes[1:]
+    # Only keep the biggest 'num_objects' objects.
+    keep_ids = ids[np.argsort(sizes)[::-1]][:num_objects]
+    segmentation[~np.isin(segmentation, keep_ids)] = 0
+    # Relabel the segmentation sequentially.
+    segmentation, _, _ = relabel_sequential(segmentation)
+    # Ensure that we have the correct number of objects.
+    n_ids = int(segmentation.max())
+    assert n_ids == num_objects
+    return segmentation
+
+
+def process_tomogram(folder, num_ribbon, num_pd):
+    data_path = get_data_path(folder)
+    output_folder = os.path.join(folder, "automatisch", "v2")
+    fname = Path(data_path).stem
+
+    correction_folder = _match_correction_folder(folder)
+
+    ribbon_path = _match_correction_file(correction_folder, "ribbon")
+    if not os.path.exists(ribbon_path):
+        ribbon_path = os.path.join(output_folder, f"{fname}_ribbon.h5")
+    assert os.path.exists(ribbon_path), ribbon_path
+    ribbon = _load_segmentation(ribbon_path)
+
+    pd_path = _match_correction_file(correction_folder, "PD")
+    if not os.path.exists(pd_path):
+        pd_path = os.path.join(output_folder, f"{fname}_pd.h5")
+    assert os.path.exists(pd_path), pd_path
+    PD = _load_segmentation(pd_path)
+
+    # Filter the ribbon and the PD.
+    print("Filtering number of ribbons:", num_ribbon)
+    ribbon = _filter_n_objects(ribbon, num_ribbon)
+    bkp_path_ribbon = ribbon_path + ".bkp"
+    os.rename(ribbon_path, bkp_path_ribbon)
+    _save_segmentation(ribbon_path, ribbon)
+
+    print("Filtering number of PDs:", num_pd)
+    PD = _filter_n_objects(PD, num_pd)
+    bkp_path_pd = pd_path + ".bkp"
+    os.rename(pd_path, bkp_path_pd)
+    _save_segmentation(pd_path, PD)
+
+
+def filter_objects(table, version):
+    for i, row in tqdm(table.iterrows(), total=len(table)):
+        folder = row["Local Path"]
+        if folder == "":
+            continue
+
+        # We have to handle the segmentation without ribbon separately.
+        if row["PD vorhanden? "] == "nein":
+            continue
+
+        n_pds = row["Anzahl PDs"]
+        if n_pds == "unklar":
+            n_pds = 1
+
+        n_pds = int(n_pds)
+        n_ribbons = int(row["Anzahl Ribbons"])
+        if (n_ribbons == 2 and n_pds == 1):
+            print(f"The tomogram {folder} has {n_ribbons} ribbons and {n_pds} PDs.")
+            print("The structure post-processing for this case is not yet implemented and will be skipped.")
+            continue
+
+        micro = row["EM alt vs. Neu"]
+        if micro == "beides":
+            process_tomogram(folder, n_ribbons, n_pds)
+
+            folder_new = os.path.join(folder, "Tomo neues EM")
+            if not os.path.exists(folder_new):
+                folder_new = os.path.join(folder, "neues EM")
+            assert os.path.exists(folder_new), folder_new
+            process_tomogram(folder_new, n_ribbons, n_pds)
+
+        elif micro == "alt":
+            process_tomogram(folder, n_ribbons, n_pds)
+
+        elif micro == "neu":
+            process_tomogram(folder, n_ribbons, n_pds)
+
+
+def main():
+    data_root = get_data_root()
+    table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx")
+    table = parse_table(table_path, data_root)
+    version = 2
+    filter_objects(table, version)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/inner_ear/processing/run_analyis.py b/scripts/inner_ear/processing/run_analyis.py
@@ -54,6 +54,7 @@ def _load_segmentation(seg_path, tomo_shape):
 
 
 def compute_distances(segmentation_paths, save_folder, resolution, force, tomo_shape, use_corrected_vesicles=True):
+    print(save_folder)
     os.makedirs(save_folder, exist_ok=True)
 
     vesicles = None
@@ -78,6 +79,15 @@ def _require_vesicles():
         ribbon_path = segmentation_paths["ribbon"]
         ribbon = _load_segmentation(ribbon_path, tomo_shape)
 
+        import napari
+        print("Tomo    :", tomo_shape)
+        print("Ribbon  :", ribbon.shape)
+        print("Vesicles:", vesicles.shape)
+        v = napari.Viewer()
+        v.add_labels(ribbon)
+        v.add_labels(vesicles)
+        napari.run()
+
         if ribbon is None or ribbon.sum() == 0:
             print("The ribbon segmentation at", segmentation_paths["ribbon"], "is empty. Skipping analysis.")
             return None, True
@@ -102,6 +112,8 @@ def _require_vesicles():
         mem_path = segmentation_paths["membrane"]
         membrane = _load_segmentation(mem_path, tomo_shape)
 
+        if membrane is None:
+            return None, True
         try:
             measure_segmentation_to_object_distances(
                 vesicles, membrane, save_path=membrane_save, resolution=resolution
@@ -469,7 +481,7 @@ def main():
 
     version = 2
     force = False
-    use_corrected_vesicles = False
+    use_corrected_vesicles = True
 
     # val_table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx")
     # val_table = pandas.read_excel(val_table_path)