From 456f99a59160140ed54f30665b18de29571e57ee Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Fri, 6 Dec 2024 20:48:51 +0100 Subject: [PATCH 01/30] Add first version of otoferlin processing code --- scripts/otoferlin/automatic_processing.py | 150 ++++++++++++++++++++ scripts/otoferlin/check_automatic_result.py | 65 +++++++++ scripts/otoferlin/common.py | 49 +++++++ 3 files changed, 264 insertions(+) create mode 100644 scripts/otoferlin/automatic_processing.py create mode 100644 scripts/otoferlin/check_automatic_result.py create mode 100644 scripts/otoferlin/common.py diff --git a/scripts/otoferlin/automatic_processing.py b/scripts/otoferlin/automatic_processing.py new file mode 100644 index 0000000..fb8b95a --- /dev/null +++ b/scripts/otoferlin/automatic_processing.py @@ -0,0 +1,150 @@ +import os + +import h5py +import numpy as np +import pandas as pd + +from synaptic_reconstruction.distance_measurements import measure_segmentation_to_object_distances, load_distances +from synaptic_reconstruction.file_utils import read_mrc +from synaptic_reconstruction.inference.vesicles import segment_vesicles +from synaptic_reconstruction.tools.util import get_model, compute_scale_from_voxel_size, _segment_ribbon_AZ +from tqdm import tqdm + +from common import INPUT_ROOT, OUTPUT_ROOT, STRUCTURE_NAMES, get_all_tomograms + + +def process_vesicles(mrc_path, output_path, version): + key = "segmentation/vesicles" + if os.path.exists(output_path): + with h5py.File(output_path, "r") as f: + if key in f: + return + + input_, voxel_size = read_mrc(mrc_path) + + model_name = "vesicles_3d" + model = get_model(model_name) + scale = compute_scale_from_voxel_size(voxel_size, model_name) + print("Rescaling volume for vesicle segmentation with factor:", scale) + segmentation = segment_vesicles(input_, model=model, scale=scale) + + with h5py.File(output_path, "a") as f: + f.create_dataset(key, data=segmentation, compression="gzip") + + +def process_ribbon_structures(mrc_path, output_path, version): + key = "segmentation/ribbon" + with h5py.File(output_path, "r") as f: + if key in f: + return + vesicles = f["segmentation/vesicles"][:] + + input_, voxel_size = read_mrc(mrc_path) + model_name = "ribbon" + model = get_model(model_name) + scale = compute_scale_from_voxel_size(voxel_size, model_name) + segmentations = _segment_ribbon_AZ(input_, model, tiling=None, scale=scale, verbose=True, extra_segmentation=vesicles) + + with h5py.File(output_path, "a") as f: + for name, seg in segmentations.items(): + f.create_dataset(f"segmentation/{name}", data=seg, compression="gzip") + + +def measure_distances(mrc_path, seg_path, output_folder): + result_folder = os.path.join(output_folder, "distances") + if os.path.exists(result_folder): + return + + # Get the voxel size. + _, voxel_size = read_mrc(mrc_path) + resolution = tuple(voxel_size[ax] for ax in "zyx") + + # Load the segmentations. + with h5py.File(seg_path, "r") as f: + g = f["segmentation"] + vesicles = g["vesicles"][:] + structures = {name: g[name][:] for name in STRUCTURE_NAMES} + + # Measure all the object distances. + os.makedirs(result_folder, exist_ok=True) + for name, seg in structures.items(): + if seg.sum() == 0: + print(name, "was not found, skipping the distance computation.") + continue + print("Compute vesicle distances to", name) + save_path = os.path.join(result_folder, f"{name}.npz") + measure_segmentation_to_object_distances(vesicles, seg, save_path=save_path, resolution=resolution) + + +def assign_vesicle_pools(output_folder): + assignment_path = os.path.join(output_folder, "vesicle_pools.csv") + if os.path.exists(assignment_path): + return + + distance_folder = os.path.join(output_folder, "distances") + distance_paths = {name: os.path.join(distance_folder, f"{name}.npz") for name in STRUCTURE_NAMES} + if not all(os.path.exists(path) for path in distance_paths.values()): + print("Skip vesicle pool assignment, because some distances are missing.") + print("This is probably due to the fact that the corresponding structures were not found.") + return + distances = {name: load_distances(path) for name, path in distance_paths.items()} + + # The distance criteria. + rav_ribbon_distance = 80 # nm + mpv_pd_distance = 100 # nm + mpv_mem_distance = 50 # nm + docked_pd_distance = 100 # nm + docked_mem_distance = 2 # nm + + rav_distances, seg_ids = distances["ribbon"][0], np.array(distances["ribbon"][-1]) + rav_ids = seg_ids[rav_distances < rav_ribbon_distance] + + pd_distances, mem_distances = distances["PD"][0], distances["membrane"][0] + assert len(pd_distances) == len(mem_distances) == len(rav_distances) + + mpv_ids = seg_ids[np.logical_and(pd_distances < mpv_pd_distance, mem_distances < mpv_mem_distance)] + docked_ids = seg_ids[np.logical_and(pd_distances < docked_pd_distance, mem_distances < docked_mem_distance)] + + # Keep only the vesicle ids that are in one of the three categories. + vesicle_ids = np.unique(np.concatenate([rav_ids, mpv_ids, docked_ids])) + + # Create a dictionary to map vesicle ids to their corresponding pool. + # (RA-V get's over-written by MP-V, which is correct). + pool_assignments = {vid: "RA-V" for vid in rav_ids} + pool_assignments.update({vid: "MP-V" for vid in mpv_ids}) + pool_assignments.update({vid: "Docked-V" for vid in docked_ids}) + + pool_assignments = pd.DataFrame({ + "vesicle_id": list(pool_assignments.keys()), + "pool": list(pool_assignments.values()), + }) + pool_assignments.to_csv(assignment_path, index=False) + + +def process_tomogram(mrc_path, version): + relative_path = os.path.relpath(mrc_path, INPUT_ROOT) + relative_folder = os.path.split(relative_path)[0] + + output_folder = os.path.join(OUTPUT_ROOT, f"v{version}", relative_folder) + os.makedirs(output_folder, exist_ok=True) + relative_name = os.path.splitext(relative_path)[0] + output_path = os.path.join(OUTPUT_ROOT, f"v{version}", f"{relative_name}.h5") + + process_vesicles(mrc_path, output_path, version) + process_ribbon_structures(mrc_path, output_path, version) + + measure_distances(mrc_path, output_path, output_folder) + assign_vesicle_pools(output_folder) + + +def main(): + # The version of automatic processing. Current versions: + # 1: process everything with the synapse net default models + version = 1 + tomograms = get_all_tomograms() + for tomogram in tqdm(tomograms, desc="Process tomograms"): + process_tomogram(tomogram, version) + + +if __name__: + main() diff --git a/scripts/otoferlin/check_automatic_result.py b/scripts/otoferlin/check_automatic_result.py new file mode 100644 index 0000000..8a10798 --- /dev/null +++ b/scripts/otoferlin/check_automatic_result.py @@ -0,0 +1,65 @@ +import os + +import h5py +import napari +import numpy as np +import pandas as pd + +from common import get_all_tomograms, get_seg_path, get_colormaps +from synaptic_reconstruction.file_utils import read_mrc + +from tqdm import tqdm + + +def _get_vesicle_pools(seg, assignment_path): + assignments = pd.read_csv(assignment_path) + pool_names = pd.unique(assignments.pool).tolist() + pools = np.zeros_like(seg) + + pool_colors = get_colormaps()["pools"] + colormap = {} + for pool_id, pool_name in enumerate(pool_names, 1): + pool_vesicle_ids = assignments[assignments.pool == pool_name].vesicle_id + pool_mask = np.isin(seg, pool_vesicle_ids) + pools[pool_mask] = pool_id + colormap[pool_id] = pool_colors[pool_name] + + return pools, colormap + + +def check_automatic_result(mrc_path): + + seg_path = get_seg_path(mrc_path) + + segmentations, colormaps = {}, {} + with h5py.File(seg_path, "r") as f: + g = f["segmentation"] + for name, ds in g.items(): + segmentations[name] = ds[:] + + output_folder = os.path.split(seg_path)[0] + assignment_path = os.path.join(output_folder, "vesicle_pools.csv") + if os.path.exists(assignment_path): + segmentations["pools"], colormaps["pools"] = _get_vesicle_pools(segmentations["vesicles"], assignment_path) + + tomogram, _ = read_mrc(mrc_path) + + v = napari.Viewer() + v.add_image(tomogram) + for name, seg in segmentations.items(): + v.add_labels(seg, name=name, colormap=colormaps.get(name)) + v.title = os.path.basename(mrc_path) + napari.run() + + +def main(): + # The version of automatic processing. Current versions: + # 1: process everything with the synapse net default models + version = 1 + tomograms = get_all_tomograms() + for tomogram in tqdm(tomograms, desc="Visualize automatic segmentation results"): + check_automatic_result(tomogram) + + +if __name__: + main() diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py new file mode 100644 index 0000000..a691fe3 --- /dev/null +++ b/scripts/otoferlin/common.py @@ -0,0 +1,49 @@ +import os +from glob import glob + + +# These are the files just for the test data. +# INPUT_ROOT = "/home/ag-wichmann/data/test-data/tomograms" +# OUTPUT_ROOT = "/home/ag-wichmann/data/test-data/segmentation" + +# These are the otoferlin tomograms. +INPUT_ROOT = "/home/ag-wichmann/data/otoferlin/tomograms" +OUTPUT_ROOT = "/home/ag-wichmann/data/otoferlin/segmentation" + +STRUCTURE_NAMES = ("ribbon", "PD", "membrane") + + +def get_all_tomograms(): + tomograms = glob(os.path.join(INPUT_ROOT, "**", "*.mrc"), recursive=True) + tomograms += glob(os.path.join(INPUT_ROOT, "**", "*.rec"), recursive=True) + tomograms = sorted(tomograms) + return tomograms + + +def get_seg_path(mrc_path, version=1): + rel_path = os.path.relpath(mrc_path, INPUT_ROOT) + rel_folder, fname = os.path.split(rel_path) + fname = os.path.splitext(fname)[0] + seg_path = os.path.join(OUTPUT_ROOT, f"v{version}", rel_folder, f"{fname}.h5") + return seg_path + + +def get_colormaps(): + pool_map = { + "RA-V": (0, 0.33, 0), + "MP-V": (1.0, 0.549, 0.0), + "Docked-V": (1, 1, 0), + } + return {"pools": pool_map} + + +# TODO: sync the ukon folder with the tomograms. +# UKON Path: +# /run/user/1000/gvfs/smb-share:server=wfs-medizin.top.gwdg.de,share=ukon-all$/UKON100/archiv/EM/For Segmentation +def sync_tomograms(): + pass + + +if __name__ == "__main__": + tomos = get_all_tomograms() + print("We have", len(tomos), "tomograms") From f10df219a4c92cc94bc3d243310b55f36f3038b6 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Fri, 6 Dec 2024 22:11:47 +0100 Subject: [PATCH 02/30] Enable local file paths for otoferlin experiments --- scripts/otoferlin/.gitignore | 1 + scripts/otoferlin/automatic_processing.py | 33 +++++++++------------ scripts/otoferlin/check_automatic_result.py | 28 ++++++++++------- scripts/otoferlin/common.py | 21 +++++++++---- 4 files changed, 48 insertions(+), 35 deletions(-) create mode 100644 scripts/otoferlin/.gitignore diff --git a/scripts/otoferlin/.gitignore b/scripts/otoferlin/.gitignore new file mode 100644 index 0000000..8fce603 --- /dev/null +++ b/scripts/otoferlin/.gitignore @@ -0,0 +1 @@ +data/ diff --git a/scripts/otoferlin/automatic_processing.py b/scripts/otoferlin/automatic_processing.py index fb8b95a..8038cc4 100644 --- a/scripts/otoferlin/automatic_processing.py +++ b/scripts/otoferlin/automatic_processing.py @@ -4,13 +4,13 @@ import numpy as np import pandas as pd -from synaptic_reconstruction.distance_measurements import measure_segmentation_to_object_distances, load_distances -from synaptic_reconstruction.file_utils import read_mrc -from synaptic_reconstruction.inference.vesicles import segment_vesicles -from synaptic_reconstruction.tools.util import get_model, compute_scale_from_voxel_size, _segment_ribbon_AZ +from synapse_net.distance_measurements import measure_segmentation_to_object_distances, load_distances +from synapse_net.file_utils import read_mrc +from synapse_net.inference.vesicles import segment_vesicles +from synapse_net.tools.util import get_model, compute_scale_from_voxel_size, _segment_ribbon_AZ from tqdm import tqdm -from common import INPUT_ROOT, OUTPUT_ROOT, STRUCTURE_NAMES, get_all_tomograms +from common import STRUCTURE_NAMES, get_all_tomograms, get_seg_path def process_vesicles(mrc_path, output_path, version): @@ -38,12 +38,14 @@ def process_ribbon_structures(mrc_path, output_path, version): if key in f: return vesicles = f["segmentation/vesicles"][:] - + input_, voxel_size = read_mrc(mrc_path) model_name = "ribbon" model = get_model(model_name) scale = compute_scale_from_voxel_size(voxel_size, model_name) - segmentations = _segment_ribbon_AZ(input_, model, tiling=None, scale=scale, verbose=True, extra_segmentation=vesicles) + segmentations = _segment_ribbon_AZ( + input_, model, tiling=None, scale=scale, verbose=True, extra_segmentation=vesicles + ) with h5py.File(output_path, "a") as f: for name, seg in segmentations.items(): @@ -57,7 +59,7 @@ def measure_distances(mrc_path, seg_path, output_folder): # Get the voxel size. _, voxel_size = read_mrc(mrc_path) - resolution = tuple(voxel_size[ax] for ax in "zyx") + resolution = tuple(voxel_size[ax] for ax in "zyx") # Load the segmentations. with h5py.File(seg_path, "r") as f: @@ -98,16 +100,13 @@ def assign_vesicle_pools(output_folder): rav_distances, seg_ids = distances["ribbon"][0], np.array(distances["ribbon"][-1]) rav_ids = seg_ids[rav_distances < rav_ribbon_distance] - + pd_distances, mem_distances = distances["PD"][0], distances["membrane"][0] assert len(pd_distances) == len(mem_distances) == len(rav_distances) mpv_ids = seg_ids[np.logical_and(pd_distances < mpv_pd_distance, mem_distances < mpv_mem_distance)] docked_ids = seg_ids[np.logical_and(pd_distances < docked_pd_distance, mem_distances < docked_mem_distance)] - - # Keep only the vesicle ids that are in one of the three categories. - vesicle_ids = np.unique(np.concatenate([rav_ids, mpv_ids, docked_ids])) - + # Create a dictionary to map vesicle ids to their corresponding pool. # (RA-V get's over-written by MP-V, which is correct). pool_assignments = {vid: "RA-V" for vid in rav_ids} @@ -122,13 +121,9 @@ def assign_vesicle_pools(output_folder): def process_tomogram(mrc_path, version): - relative_path = os.path.relpath(mrc_path, INPUT_ROOT) - relative_folder = os.path.split(relative_path)[0] - - output_folder = os.path.join(OUTPUT_ROOT, f"v{version}", relative_folder) + output_path = get_seg_path(mrc_path, version) + output_folder = os.path.split(output_path)[0] os.makedirs(output_folder, exist_ok=True) - relative_name = os.path.splitext(relative_path)[0] - output_path = os.path.join(OUTPUT_ROOT, f"v{version}", f"{relative_name}.h5") process_vesicles(mrc_path, output_path, version) process_ribbon_structures(mrc_path, output_path, version) diff --git a/scripts/otoferlin/check_automatic_result.py b/scripts/otoferlin/check_automatic_result.py index 8a10798..9a4b2d5 100644 --- a/scripts/otoferlin/check_automatic_result.py +++ b/scripts/otoferlin/check_automatic_result.py @@ -5,11 +5,12 @@ import numpy as np import pandas as pd -from common import get_all_tomograms, get_seg_path, get_colormaps -from synaptic_reconstruction.file_utils import read_mrc - +from synapse_net.file_utils import read_mrc +from skimage.exposure import equalize_adapthist from tqdm import tqdm +from common import get_all_tomograms, get_seg_path, get_colormaps + def _get_vesicle_pools(seg, assignment_path): assignments = pd.read_csv(assignment_path) @@ -27,22 +28,27 @@ def _get_vesicle_pools(seg, assignment_path): return pools, colormap -def check_automatic_result(mrc_path): +def check_automatic_result(mrc_path, version, use_clahe=False): - seg_path = get_seg_path(mrc_path) + seg_path = get_seg_path(mrc_path, version) segmentations, colormaps = {}, {} - with h5py.File(seg_path, "r") as f: - g = f["segmentation"] - for name, ds in g.items(): - segmentations[name] = ds[:] + if os.path.exists(seg_path): + with h5py.File(seg_path, "r") as f: + g = f["segmentation"] + for name, ds in g.items(): + segmentations[name] = ds[:] output_folder = os.path.split(seg_path)[0] assignment_path = os.path.join(output_folder, "vesicle_pools.csv") if os.path.exists(assignment_path): segmentations["pools"], colormaps["pools"] = _get_vesicle_pools(segmentations["vesicles"], assignment_path) - + tomogram, _ = read_mrc(mrc_path) + if use_clahe: + print("Run CLAHE ...") + tomogram = equalize_adapthist(tomogram, clip_limit=0.03) + print("... done") v = napari.Viewer() v.add_image(tomogram) @@ -58,7 +64,7 @@ def main(): version = 1 tomograms = get_all_tomograms() for tomogram in tqdm(tomograms, desc="Visualize automatic segmentation results"): - check_automatic_result(tomogram) + check_automatic_result(tomogram, version) if __name__: diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py index a691fe3..55785b0 100644 --- a/scripts/otoferlin/common.py +++ b/scripts/otoferlin/common.py @@ -11,20 +11,31 @@ OUTPUT_ROOT = "/home/ag-wichmann/data/otoferlin/segmentation" STRUCTURE_NAMES = ("ribbon", "PD", "membrane") - + + +def get_folders(): + if os.path.exists(INPUT_ROOT): + return INPUT_ROOT, OUTPUT_ROOT + root_in = "./data/tomograms" + assert os.path.exists(root_in) + root_out = "./data/segmentation" + return root_in, root_out + def get_all_tomograms(): - tomograms = glob(os.path.join(INPUT_ROOT, "**", "*.mrc"), recursive=True) - tomograms += glob(os.path.join(INPUT_ROOT, "**", "*.rec"), recursive=True) + root, _ = get_folders() + tomograms = glob(os.path.join(root, "**", "*.mrc"), recursive=True) + tomograms += glob(os.path.join(root, "**", "*.rec"), recursive=True) tomograms = sorted(tomograms) return tomograms def get_seg_path(mrc_path, version=1): - rel_path = os.path.relpath(mrc_path, INPUT_ROOT) + input_root, output_root = get_folders() + rel_path = os.path.relpath(mrc_path, input_root) rel_folder, fname = os.path.split(rel_path) fname = os.path.splitext(fname)[0] - seg_path = os.path.join(OUTPUT_ROOT, f"v{version}", rel_folder, f"{fname}.h5") + seg_path = os.path.join(output_root, f"v{version}", rel_folder, f"{fname}.h5") return seg_path From 504842271cb028998aa43758c9b079c23c0a9dd7 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sat, 7 Dec 2024 21:45:47 +0100 Subject: [PATCH 03/30] Update visualization scripts --- scripts/otoferlin/README.md | 9 +++++++ scripts/otoferlin/check_automatic_result.py | 30 +++++++++++++-------- scripts/otoferlin/common.py | 1 + 3 files changed, 29 insertions(+), 11 deletions(-) create mode 100644 scripts/otoferlin/README.md diff --git a/scripts/otoferlin/README.md b/scripts/otoferlin/README.md new file mode 100644 index 0000000..a96eda3 --- /dev/null +++ b/scripts/otoferlin/README.md @@ -0,0 +1,9 @@ +# Otoferlin Analysis + + +## Notes on improvements: + +- Try less than 20 exclude slices +- Update boundary post-proc (not robust when PD not found and selects wrong objects) +- Can we find fiducials with ilastik and mask them out? They are interfering with Ribbon finding. + - Alternative: just restrict the processing to a center crop by default. diff --git a/scripts/otoferlin/check_automatic_result.py b/scripts/otoferlin/check_automatic_result.py index 9a4b2d5..99b349f 100644 --- a/scripts/otoferlin/check_automatic_result.py +++ b/scripts/otoferlin/check_automatic_result.py @@ -28,28 +28,35 @@ def _get_vesicle_pools(seg, assignment_path): return pools, colormap -def check_automatic_result(mrc_path, version, use_clahe=False): +def check_automatic_result(mrc_path, version, use_clahe=False, center_crop=True, segmentation_group="segmentation"): + tomogram, _ = read_mrc(mrc_path) + if center_crop: + halo = (50, 512, 512) + bb = tuple( + slice(max(sh // 2 - ha, 0), min(sh // 2 + ha, sh)) for sh, ha in zip(tomogram.shape, halo) + ) + tomogram = tomogram[bb] + else: + bb = np.s_[:] - seg_path = get_seg_path(mrc_path, version) + if use_clahe: + print("Run CLAHE ...") + tomogram = equalize_adapthist(tomogram, clip_limit=0.03) + print("... done") + seg_path = get_seg_path(mrc_path, version) segmentations, colormaps = {}, {} if os.path.exists(seg_path): with h5py.File(seg_path, "r") as f: - g = f["segmentation"] + g = f[segmentation_group] for name, ds in g.items(): - segmentations[name] = ds[:] + segmentations[name] = ds[bb] output_folder = os.path.split(seg_path)[0] assignment_path = os.path.join(output_folder, "vesicle_pools.csv") - if os.path.exists(assignment_path): + if os.path.exists(assignment_path) and "vesicles" in segmentations: segmentations["pools"], colormaps["pools"] = _get_vesicle_pools(segmentations["vesicles"], assignment_path) - tomogram, _ = read_mrc(mrc_path) - if use_clahe: - print("Run CLAHE ...") - tomogram = equalize_adapthist(tomogram, clip_limit=0.03) - print("... done") - v = napari.Viewer() v.add_image(tomogram) for name, seg in segmentations.items(): @@ -64,6 +71,7 @@ def main(): version = 1 tomograms = get_all_tomograms() for tomogram in tqdm(tomograms, desc="Visualize automatic segmentation results"): + # check_automatic_result(tomogram, version, segmentation_group="vesicles") check_automatic_result(tomogram, version) diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py index 55785b0..4780305 100644 --- a/scripts/otoferlin/common.py +++ b/scripts/otoferlin/common.py @@ -44,6 +44,7 @@ def get_colormaps(): "RA-V": (0, 0.33, 0), "MP-V": (1.0, 0.549, 0.0), "Docked-V": (1, 1, 0), + None: "Gray", } return {"pools": pool_map} From 964cf2c41d4bd94b87947d6e6034e60aa1ae95dd Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sat, 7 Dec 2024 22:12:55 +0100 Subject: [PATCH 04/30] Add vesicle comparison and domain adaptation --- .../otoferlin/compare_vesicle_segmentation.py | 54 +++++++++++++++ .../train_domain_adaptation.py | 66 +++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 scripts/otoferlin/compare_vesicle_segmentation.py create mode 100644 scripts/otoferlin/domain_adaptation/train_domain_adaptation.py diff --git a/scripts/otoferlin/compare_vesicle_segmentation.py b/scripts/otoferlin/compare_vesicle_segmentation.py new file mode 100644 index 0000000..fe33cc1 --- /dev/null +++ b/scripts/otoferlin/compare_vesicle_segmentation.py @@ -0,0 +1,54 @@ +import os + +import h5py + +from skimage.exposure import equalize_adapthist +from synapse_net.inference.vesicles import segment_vesicles +from synapse_net.file_utils import read_mrc +from synapse_net.tools.util import get_model, compute_scale_from_voxel_size, load_custom_model +from tqdm import tqdm + +from common import get_all_tomograms, get_seg_path + + +def compare_vesicles(tomo_path): + seg_path = get_seg_path(tomo_path) + seg_folder = os.path.split(seg_path)[0] + os.makedirs(seg_folder, exist_ok=True) + + model_path = "/mnt/vast-nhr/home/pape41/u12086/inner-ear-da.pt" + for model_type in ("vesicles_3d", "adapted_v1"): + for use_clahe in (False, True): + seg_key = f"vesicles/{model_type}" + if use_clahe: + seg_key += "_clahe" + + if os.path.exists(seg_path): + with h5py.File(seg_path, "r") as f: + if seg_key in f: + continue + + tomogram, voxel_size = read_mrc(tomo_path) + if use_clahe: + tomogram = equalize_adapthist(tomogram, clip_limit=0.03) + + if model_type == "vesicles_3d": + model = get_model(model_type) + scale = compute_scale_from_voxel_size(voxel_size, model_type) + else: + model = load_custom_model(model_path) + scale = compute_scale_from_voxel_size(voxel_size, "ribbon") + + seg = segment_vesicles(tomogram, model=model, scale=scale) + with h5py.File(seg_path, "a") as f: + f.create_dataset(seg_key, data=seg, compression="gzip") + + +def main(): + tomograms = get_all_tomograms() + for tomo in tqdm(tomograms): + compare_vesicles(tomo) + + +if __name__ == "__main__": + main() diff --git a/scripts/otoferlin/domain_adaptation/train_domain_adaptation.py b/scripts/otoferlin/domain_adaptation/train_domain_adaptation.py new file mode 100644 index 0000000..99b32f7 --- /dev/null +++ b/scripts/otoferlin/domain_adaptation/train_domain_adaptation.py @@ -0,0 +1,66 @@ +import os +from glob import glob + +import h5py + +from synapse_net.file_utils import read_mrc +from synapse_net.training.domain_adaptation import mean_teacher_adaptation +from synapse_net.tools.util import compute_scale_from_voxel_size +from synapse_net.inference.util import _Scaler + + +# Apply rescaling, depending on what the segmentation comparison shows. +def preprocess_training_data(): + root = "../data/tomograms" + tomograms = glob(os.path.join(root, "**", "*.mrc"), recursive=True) + tomograms += glob(os.path.join(root, "**", "*.rec"), recursive=True) + tomograms = sorted(tomograms) + + train_folder = "./train_data" + os.makedirs(train_folder, exist_ok=True) + + all_paths = [] + for i, tomo_path in enumerate(tomograms): + out_path = os.path.join(train_folder, f"tomo{i}.h5") + if os.path.exists(out_path): + all_paths.append(out_path) + continue + + data, voxel_size = read_mrc(tomo_path) + scale = compute_scale_from_voxel_size(voxel_size, "ribbon") + print("Scale factor:", scale) + scaler = _Scaler(scale, verbose=True) + data = scaler.scale_input(data) + + with h5py.File(out_path, "a") as f: + f.create_dataset("raw", data=data, compression="gzip") + all_paths.append(out_path) + + train_paths, val_paths = all_paths[:-1], all_paths[-1:] + return train_paths, val_paths + + +def train_domain_adaptation(train_paths, val_paths): + model_path = "/mnt/vast-nhr/home/pape41/u12086/inner-ear-da.pt" + model_name = "adapted_otoferlin" + + patch_shape = [48, 384, 384] + mean_teacher_adaptation( + name=model_name, + unsupervised_train_paths=train_paths, + unsupervised_val_paths=val_paths, + raw_key="raw", + patch_shape=patch_shape, + source_checkpoint=model_path, + confidence_threshold=0.75, + n_iterations=int(2.5*1e4), + ) + + +def main(): + train_paths, val_paths = preprocess_training_data() + train_domain_adaptation(train_paths, val_paths) + + +if __name__ == "__main__": + main() From 10181ced2f7f9a67615238095687f256b7fd7937 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sun, 8 Dec 2024 09:50:53 +0100 Subject: [PATCH 05/30] Update inference for otoferlin WIP --- scripts/otoferlin/automatic_processing.py | 34 +++++++++++---------- scripts/otoferlin/check_automatic_result.py | 11 ++++--- scripts/otoferlin/common.py | 14 +++++++-- synapse_net/tools/util.py | 4 ++- 4 files changed, 40 insertions(+), 23 deletions(-) diff --git a/scripts/otoferlin/automatic_processing.py b/scripts/otoferlin/automatic_processing.py index 8038cc4..0fd4cac 100644 --- a/scripts/otoferlin/automatic_processing.py +++ b/scripts/otoferlin/automatic_processing.py @@ -10,10 +10,10 @@ from synapse_net.tools.util import get_model, compute_scale_from_voxel_size, _segment_ribbon_AZ from tqdm import tqdm -from common import STRUCTURE_NAMES, get_all_tomograms, get_seg_path +from common import STRUCTURE_NAMES, get_all_tomograms, get_seg_path, get_adapted_model -def process_vesicles(mrc_path, output_path, version): +def process_vesicles(mrc_path, output_path): key = "segmentation/vesicles" if os.path.exists(output_path): with h5py.File(output_path, "r") as f: @@ -22,9 +22,8 @@ def process_vesicles(mrc_path, output_path, version): input_, voxel_size = read_mrc(mrc_path) - model_name = "vesicles_3d" - model = get_model(model_name) - scale = compute_scale_from_voxel_size(voxel_size, model_name) + model = get_adapted_model() + scale = compute_scale_from_voxel_size(voxel_size, "ribbon") print("Rescaling volume for vesicle segmentation with factor:", scale) segmentation = segment_vesicles(input_, model=model, scale=scale) @@ -32,7 +31,7 @@ def process_vesicles(mrc_path, output_path, version): f.create_dataset(key, data=segmentation, compression="gzip") -def process_ribbon_structures(mrc_path, output_path, version): +def process_ribbon_structures(mrc_path, output_path): key = "segmentation/ribbon" with h5py.File(output_path, "r") as f: if key in f: @@ -43,13 +42,18 @@ def process_ribbon_structures(mrc_path, output_path, version): model_name = "ribbon" model = get_model(model_name) scale = compute_scale_from_voxel_size(voxel_size, model_name) - segmentations = _segment_ribbon_AZ( - input_, model, tiling=None, scale=scale, verbose=True, extra_segmentation=vesicles + segmentations, predictions = _segment_ribbon_AZ( + input_, model, tiling=None, scale=scale, verbose=True, extra_segmentation=vesicles, + return_predictions=True, ) + # TODO binarize the predictions + breakpoint() + with h5py.File(output_path, "a") as f: for name, seg in segmentations.items(): f.create_dataset(f"segmentation/{name}", data=seg, compression="gzip") + f.create_dataset(f"prediction/{name}", data=predictions[name], compression="gzip") def measure_distances(mrc_path, seg_path, output_folder): @@ -120,25 +124,23 @@ def assign_vesicle_pools(output_folder): pool_assignments.to_csv(assignment_path, index=False) -def process_tomogram(mrc_path, version): - output_path = get_seg_path(mrc_path, version) +def process_tomogram(mrc_path): + output_path = get_seg_path(mrc_path) output_folder = os.path.split(output_path)[0] os.makedirs(output_folder, exist_ok=True) - process_vesicles(mrc_path, output_path, version) - process_ribbon_structures(mrc_path, output_path, version) + process_vesicles(mrc_path, output_path) + process_ribbon_structures(mrc_path, output_path) + return measure_distances(mrc_path, output_path, output_folder) assign_vesicle_pools(output_folder) def main(): - # The version of automatic processing. Current versions: - # 1: process everything with the synapse net default models - version = 1 tomograms = get_all_tomograms() for tomogram in tqdm(tomograms, desc="Process tomograms"): - process_tomogram(tomogram, version) + process_tomogram(tomogram) if __name__: diff --git a/scripts/otoferlin/check_automatic_result.py b/scripts/otoferlin/check_automatic_result.py index 99b349f..b78b986 100644 --- a/scripts/otoferlin/check_automatic_result.py +++ b/scripts/otoferlin/check_automatic_result.py @@ -66,11 +66,14 @@ def check_automatic_result(mrc_path, version, use_clahe=False, center_crop=True, def main(): - # The version of automatic processing. Current versions: - # 1: process everything with the synapse net default models - version = 1 + version = 2 tomograms = get_all_tomograms() - for tomogram in tqdm(tomograms, desc="Visualize automatic segmentation results"): + for i, tomogram in tqdm( + enumerate(tomograms), total=len(tomograms), desc="Visualize automatic segmentation results" + ): + if i < 3: + continue + print("Checking tomogram", tomogram) # check_automatic_result(tomogram, version, segmentation_group="vesicles") check_automatic_result(tomogram, version) diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py index 4780305..4cb9fd7 100644 --- a/scripts/otoferlin/common.py +++ b/scripts/otoferlin/common.py @@ -12,6 +12,16 @@ STRUCTURE_NAMES = ("ribbon", "PD", "membrane") +# The version of the automatic segmentation. We have: +# - version 1: using the default models for all structures and the initial version of post-processing. +# - version 2: using the adapted model for vesicles in the otoferlin and updating the post-processing. +VERSION = 2 + + +def get_adapted_model(): + # Path on nhr, need to put the model on the WS and update this. + return "/mnt/vast-nhr/home/pape41/u12086/Work/my_projects/synaptic-reconstruction/scripts/otoferlin/domain_adaptation/checkpoints/otoferlin_da.pt" # noqa + def get_folders(): if os.path.exists(INPUT_ROOT): @@ -30,12 +40,12 @@ def get_all_tomograms(): return tomograms -def get_seg_path(mrc_path, version=1): +def get_seg_path(mrc_path, version=VERSION): input_root, output_root = get_folders() rel_path = os.path.relpath(mrc_path, input_root) rel_folder, fname = os.path.split(rel_path) fname = os.path.splitext(fname)[0] - seg_path = os.path.join(output_root, f"v{version}", rel_folder, f"{fname}.h5") + seg_path = os.path.join(output_root, f"v{VERSION}", rel_folder, f"{fname}.h5") return seg_path diff --git a/synapse_net/tools/util.py b/synapse_net/tools/util.py index 1495112..0160993 100644 --- a/synapse_net/tools/util.py +++ b/synapse_net/tools/util.py @@ -59,7 +59,7 @@ def get_model(model_type: str, device: Optional[Union[str, torch.device]] = None return model -def _segment_ribbon_AZ(image, model, tiling, scale, verbose, **kwargs): +def _segment_ribbon_AZ(image, model, tiling, scale, verbose, return_predictions=False, **kwargs): # Parse additional keyword arguments from the kwargs. vesicles = kwargs.pop("extra_segmentation") threshold = kwargs.pop("threshold", 0.5) @@ -94,6 +94,8 @@ def _segment_ribbon_AZ(image, model, tiling, scale, verbose, **kwargs): segmentation = {"ribbon": ribbon, "PD": PD, "membrane": membrane} + if return_predictions: + return predictions, segmentation return segmentation From b6e223f96bf96e26d0a0e05daf20e0d7d53a3bb7 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sun, 8 Dec 2024 14:33:08 +0100 Subject: [PATCH 06/30] Update otoferlin inference WIP --- scripts/otoferlin/automatic_processing.py | 6 +++--- scripts/otoferlin/common.py | 6 +++++- scripts/otoferlin/compare_vesicle_segmentation.py | 8 ++++++-- synapse_net/tools/util.py | 13 +++++++++---- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/scripts/otoferlin/automatic_processing.py b/scripts/otoferlin/automatic_processing.py index 0fd4cac..d65bf95 100644 --- a/scripts/otoferlin/automatic_processing.py +++ b/scripts/otoferlin/automatic_processing.py @@ -47,9 +47,6 @@ def process_ribbon_structures(mrc_path, output_path): return_predictions=True, ) - # TODO binarize the predictions - breakpoint() - with h5py.File(output_path, "a") as f: for name, seg in segmentations.items(): f.create_dataset(f"segmentation/{name}", data=seg, compression="gzip") @@ -132,6 +129,9 @@ def process_tomogram(mrc_path): process_vesicles(mrc_path, output_path) process_ribbon_structures(mrc_path, output_path) return + # TODO vesicle post-processing: + # snap to boundaries? + # remove vesicles in ribbon measure_distances(mrc_path, output_path, output_folder) assign_vesicle_pools(output_folder) diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py index 4cb9fd7..bb00af5 100644 --- a/scripts/otoferlin/common.py +++ b/scripts/otoferlin/common.py @@ -1,6 +1,8 @@ import os from glob import glob +from synapse_net.tools.util import load_custom_model + # These are the files just for the test data. # INPUT_ROOT = "/home/ag-wichmann/data/test-data/tomograms" @@ -20,7 +22,9 @@ def get_adapted_model(): # Path on nhr, need to put the model on the WS and update this. - return "/mnt/vast-nhr/home/pape41/u12086/Work/my_projects/synaptic-reconstruction/scripts/otoferlin/domain_adaptation/checkpoints/otoferlin_da.pt" # noqa + model_path = "/mnt/vast-nhr/home/pape41/u12086/Work/my_projects/synaptic-reconstruction/scripts/otoferlin/domain_adaptation/checkpoints/otoferlin_da.pt" # noqa + model = load_custom_model(model_path) + return model def get_folders(): diff --git a/scripts/otoferlin/compare_vesicle_segmentation.py b/scripts/otoferlin/compare_vesicle_segmentation.py index fe33cc1..555947d 100644 --- a/scripts/otoferlin/compare_vesicle_segmentation.py +++ b/scripts/otoferlin/compare_vesicle_segmentation.py @@ -16,8 +16,11 @@ def compare_vesicles(tomo_path): seg_folder = os.path.split(seg_path)[0] os.makedirs(seg_folder, exist_ok=True) - model_path = "/mnt/vast-nhr/home/pape41/u12086/inner-ear-da.pt" - for model_type in ("vesicles_3d", "adapted_v1"): + model_paths = { + "adapted_v1": "/mnt/vast-nhr/home/pape41/u12086/inner-ear-da.pt", + "adapted_v2": "./domain_adaptation/checkpoints/otoferlin_da.pt" + } + for model_type in ("vesicles_3d", "adapted_v1", "adapted_v2"): for use_clahe in (False, True): seg_key = f"vesicles/{model_type}" if use_clahe: @@ -36,6 +39,7 @@ def compare_vesicles(tomo_path): model = get_model(model_type) scale = compute_scale_from_voxel_size(voxel_size, model_type) else: + model_path = model_paths[model_type] model = load_custom_model(model_path) scale = compute_scale_from_voxel_size(voxel_size, "ribbon") diff --git a/synapse_net/tools/util.py b/synapse_net/tools/util.py index 0160993..7a0d149 100644 --- a/synapse_net/tools/util.py +++ b/synapse_net/tools/util.py @@ -72,7 +72,9 @@ def _segment_ribbon_AZ(image, model, tiling, scale, verbose, return_predictions= # If the vesicles were passed then run additional post-processing. if vesicles is None: - segmentation = predictions + if verbose: + print("Vesicle segmentation was not passed, WILL NOT run post-processing.") + segmentations = predictions # Otherwise, just return the predictions. else: @@ -80,6 +82,9 @@ def _segment_ribbon_AZ(image, model, tiling, scale, verbose, return_predictions= segment_ribbon, segment_presynaptic_density, segment_membrane_distance_based, ) + if verbose: + print("Vesicle segmentation was passed, WILL run post-processing.") + ribbon = segment_ribbon( predictions["ribbon"], vesicles, n_slices_exclude=n_slices_exclude, n_ribbons=n_ribbons, max_vesicle_distance=40, @@ -92,11 +97,11 @@ def _segment_ribbon_AZ(image, model, tiling, scale, verbose, return_predictions= predictions["membrane"], ref_segmentation, max_distance=500, n_slices_exclude=n_slices_exclude, ) - segmentation = {"ribbon": ribbon, "PD": PD, "membrane": membrane} + segmentations = {"ribbon": ribbon, "PD": PD, "membrane": membrane} if return_predictions: - return predictions, segmentation - return segmentation + return segmentations, predictions + return segmentations def run_segmentation( From fd11ee8f2b9612470e2e099028debb97cfcb3516 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sun, 8 Dec 2024 14:33:53 +0100 Subject: [PATCH 07/30] Add script for structure postprocessing --- .../check_structure_postprocessing.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 scripts/otoferlin/check_structure_postprocessing.py diff --git a/scripts/otoferlin/check_structure_postprocessing.py b/scripts/otoferlin/check_structure_postprocessing.py new file mode 100644 index 0000000..52b8e70 --- /dev/null +++ b/scripts/otoferlin/check_structure_postprocessing.py @@ -0,0 +1,53 @@ +import os + +import h5py +import napari +import numpy as np + +from synapse_net.file_utils import read_mrc +from tqdm import tqdm + +from common import get_seg_path, get_all_tomograms, STRUCTURE_NAMES + + +def check_structure_postprocessing(mrc_path, center_crop=True): + tomogram, _ = read_mrc(mrc_path) + if center_crop: + halo = (50, 512, 512) + bb = tuple( + slice(max(sh // 2 - ha, 0), min(sh // 2 + ha, sh)) for sh, ha in zip(tomogram.shape, halo) + ) + tomogram = tomogram[bb] + else: + bb = np.s_[:] + + seg_path = get_seg_path(mrc_path) + assert os.path.exists(seg_path) + + segmentations, predictions, colormaps = {}, {}, {} + with h5py.File(seg_path, "r") as f: + g = f["segmentation"] + for name in STRUCTURE_NAMES: + segmentations[name] = g[name][bb] + g = f["prediction"] + for name in STRUCTURE_NAMES: + predictions[name] = g[name][bb] + + v = napari.Viewer() + v.add_image(tomogram) + for name, seg in segmentations.items(): + v.add_labels(seg, name=name, colormap=colormaps.get(name)) + for name, seg in predictions.items(): + v.add_labels(seg, name=name, colormap=colormaps.get(name)) + v.title = os.path.basename(mrc_path) + napari.run() + + +def main(): + tomograms = get_all_tomograms() + for i, tomogram in tqdm(enumerate(tomograms), total=len(tomograms), desc="Check structure postproc"): + check_structure_postprocessing(tomogram) + + +if __name__: + main() From e7288b5a938f25efe6b8c6d0fe9a1fa55f1c5c56 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sun, 8 Dec 2024 15:15:37 +0100 Subject: [PATCH 08/30] More ribbon inference updates --- scripts/otoferlin/automatic_processing.py | 55 ++++++++++++++++--- scripts/otoferlin/check_automatic_result.py | 5 +- .../check_structure_postprocessing.py | 12 ++-- scripts/otoferlin/common.py | 6 +- synapse_net/tools/util.py | 44 ++++++++------- 5 files changed, 85 insertions(+), 37 deletions(-) diff --git a/scripts/otoferlin/automatic_processing.py b/scripts/otoferlin/automatic_processing.py index d65bf95..cd6dec3 100644 --- a/scripts/otoferlin/automatic_processing.py +++ b/scripts/otoferlin/automatic_processing.py @@ -13,7 +13,22 @@ from common import STRUCTURE_NAMES, get_all_tomograms, get_seg_path, get_adapted_model -def process_vesicles(mrc_path, output_path): +def _get_center_crop(input_): + halo_xy = (600, 600) + bb_xy = tuple( + slice(max(sh // 2 - ha, 0), min(sh // 2 + ha, sh)) for sh, ha in zip(input_.shape[1:], halo_xy) + ) + bb = (np.s_[:],) + bb_xy + return bb, input_.shape + + +def _get_tiling(): + tile = {"x": 768, "y": 768, "z": 64} + halo = {"x": 128, "y": 128, "z": 8} + return {"tile": tile, "halo": halo} + + +def process_vesicles(mrc_path, output_path, process_center_crop): key = "segmentation/vesicles" if os.path.exists(output_path): with h5py.File(output_path, "r") as f: @@ -21,17 +36,26 @@ def process_vesicles(mrc_path, output_path): return input_, voxel_size = read_mrc(mrc_path) + if process_center_crop: + bb, full_shape = _get_center_crop(input_) + input_ = input_[bb] model = get_adapted_model() scale = compute_scale_from_voxel_size(voxel_size, "ribbon") print("Rescaling volume for vesicle segmentation with factor:", scale) - segmentation = segment_vesicles(input_, model=model, scale=scale) + tiling = _get_tiling() + segmentation = segment_vesicles(input_, model=model, scale=scale, tiling=tiling) + + if process_center_crop: + full_seg = np.zeros(full_shape, dtype=segmentation.dtype) + full_seg[bb] = segmentation + segmentation = full_seg with h5py.File(output_path, "a") as f: f.create_dataset(key, data=segmentation, compression="gzip") -def process_ribbon_structures(mrc_path, output_path): +def process_ribbon_structures(mrc_path, output_path, process_center_crop): key = "segmentation/ribbon" with h5py.File(output_path, "r") as f: if key in f: @@ -39,14 +63,29 @@ def process_ribbon_structures(mrc_path, output_path): vesicles = f["segmentation/vesicles"][:] input_, voxel_size = read_mrc(mrc_path) + if process_center_crop: + bb, full_shape = _get_center_crop(input_) + input_ = input_[bb] + model_name = "ribbon" model = get_model(model_name) scale = compute_scale_from_voxel_size(voxel_size, model_name) + tiling = _get_tiling() segmentations, predictions = _segment_ribbon_AZ( - input_, model, tiling=None, scale=scale, verbose=True, extra_segmentation=vesicles, - return_predictions=True, + input_, model, tiling=tiling, scale=scale, verbose=True, extra_segmentation=vesicles, + return_predictions=True, n_slices_exclude=5, ) + if process_center_crop: + for name, seg in segmentations: + full_seg = np.zeros(full_shape, dtype=seg.dtype) + full_seg[bb] = seg + segmentations[name] = full_seg + for name, pred in predictions: + full_pred = np.zeros(full_shape, dtype=seg.dtype) + full_pred[bb] = pred + predictions[name] = full_pred + with h5py.File(output_path, "a") as f: for name, seg in segmentations.items(): f.create_dataset(f"segmentation/{name}", data=seg, compression="gzip") @@ -126,8 +165,10 @@ def process_tomogram(mrc_path): output_folder = os.path.split(output_path)[0] os.makedirs(output_folder, exist_ok=True) - process_vesicles(mrc_path, output_path) - process_ribbon_structures(mrc_path, output_path) + process_center_crop = True + + process_vesicles(mrc_path, output_path, process_center_crop) + process_ribbon_structures(mrc_path, output_path, process_center_crop) return # TODO vesicle post-processing: # snap to boundaries? diff --git a/scripts/otoferlin/check_automatic_result.py b/scripts/otoferlin/check_automatic_result.py index b78b986..04aba4c 100644 --- a/scripts/otoferlin/check_automatic_result.py +++ b/scripts/otoferlin/check_automatic_result.py @@ -51,6 +51,7 @@ def check_automatic_result(mrc_path, version, use_clahe=False, center_crop=True, g = f[segmentation_group] for name, ds in g.items(): segmentations[name] = ds[bb] + colormaps[name] = get_colormaps().get(name, None) output_folder = os.path.split(seg_path)[0] assignment_path = os.path.join(output_folder, "vesicle_pools.csv") @@ -71,11 +72,9 @@ def main(): for i, tomogram in tqdm( enumerate(tomograms), total=len(tomograms), desc="Visualize automatic segmentation results" ): - if i < 3: - continue print("Checking tomogram", tomogram) # check_automatic_result(tomogram, version, segmentation_group="vesicles") - check_automatic_result(tomogram, version) + check_automatic_result(tomogram, version, segmentation_group="prediction") if __name__: diff --git a/scripts/otoferlin/check_structure_postprocessing.py b/scripts/otoferlin/check_structure_postprocessing.py index 52b8e70..00e4d2d 100644 --- a/scripts/otoferlin/check_structure_postprocessing.py +++ b/scripts/otoferlin/check_structure_postprocessing.py @@ -7,7 +7,7 @@ from synapse_net.file_utils import read_mrc from tqdm import tqdm -from common import get_seg_path, get_all_tomograms, STRUCTURE_NAMES +from common import get_seg_path, get_all_tomograms, get_colormaps, STRUCTURE_NAMES def check_structure_postprocessing(mrc_path, center_crop=True): @@ -28,17 +28,18 @@ def check_structure_postprocessing(mrc_path, center_crop=True): with h5py.File(seg_path, "r") as f: g = f["segmentation"] for name in STRUCTURE_NAMES: - segmentations[name] = g[name][bb] + segmentations[f"seg/{name}"] = g[name][bb] g = f["prediction"] for name in STRUCTURE_NAMES: - predictions[name] = g[name][bb] + predictions[f"pred/{name}"] = g[name][bb] + colormaps[name] = get_colormaps().get(name, None) v = napari.Viewer() v.add_image(tomogram) for name, seg in segmentations.items(): - v.add_labels(seg, name=name, colormap=colormaps.get(name)) + v.add_labels(seg, name=name, colormap=colormaps.get(name.split("/")[1])) for name, seg in predictions.items(): - v.add_labels(seg, name=name, colormap=colormaps.get(name)) + v.add_labels(seg, name=name, colormap=colormaps.get(name.split("/")[1]), visible=False) v.title = os.path.basename(mrc_path) napari.run() @@ -46,6 +47,7 @@ def check_structure_postprocessing(mrc_path, center_crop=True): def main(): tomograms = get_all_tomograms() for i, tomogram in tqdm(enumerate(tomograms), total=len(tomograms), desc="Check structure postproc"): + print(tomogram) check_structure_postprocessing(tomogram) diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py index bb00af5..d1c4f4e 100644 --- a/scripts/otoferlin/common.py +++ b/scripts/otoferlin/common.py @@ -58,9 +58,11 @@ def get_colormaps(): "RA-V": (0, 0.33, 0), "MP-V": (1.0, 0.549, 0.0), "Docked-V": (1, 1, 0), - None: "Gray", + None: "gray", } - return {"pools": pool_map} + membrane_map = {1: "purple", None: "gray"} + pd_map = {1: "magenta", None: "gray"} + return {"pools": pool_map, "membrane": membrane_map, "PD": pd_map} # TODO: sync the ukon folder with the tomograms. diff --git a/synapse_net/tools/util.py b/synapse_net/tools/util.py index 7a0d149..a2113c5 100644 --- a/synapse_net/tools/util.py +++ b/synapse_net/tools/util.py @@ -59,6 +59,27 @@ def get_model(model_type: str, device: Optional[Union[str, torch.device]] = None return model +def _ribbon_AZ_postprocessing(predictions, vesicles, n_slices_exclude, n_ribbons): + from synapse_net.inference.postprocessing import ( + segment_ribbon, segment_presynaptic_density, segment_membrane_distance_based, + ) + + ribbon = segment_ribbon( + predictions["ribbon"], vesicles, n_slices_exclude=n_slices_exclude, n_ribbons=n_ribbons, + max_vesicle_distance=40, + ) + PD = segment_presynaptic_density( + predictions["PD"], ribbon, n_slices_exclude=n_slices_exclude, max_distance_to_ribbon=40, + ) + ref_segmentation = PD if PD.sum() > 0 else ribbon + membrane = segment_membrane_distance_based( + predictions["membrane"], ref_segmentation, max_distance=500, n_slices_exclude=n_slices_exclude, + ) + + segmentations = {"ribbon": ribbon, "PD": PD, "membrane": membrane} + return segmentations + + def _segment_ribbon_AZ(image, model, tiling, scale, verbose, return_predictions=False, **kwargs): # Parse additional keyword arguments from the kwargs. vesicles = kwargs.pop("extra_segmentation") @@ -70,34 +91,17 @@ def _segment_ribbon_AZ(image, model, tiling, scale, verbose, return_predictions= image, model=model, tiling=tiling, scale=scale, verbose=verbose, threshold=threshold, **kwargs ) - # If the vesicles were passed then run additional post-processing. + # Otherwise, just return the predictions. if vesicles is None: if verbose: print("Vesicle segmentation was not passed, WILL NOT run post-processing.") segmentations = predictions - # Otherwise, just return the predictions. + # If the vesicles were passed then run additional post-processing. else: - from synapse_net.inference.postprocessing import ( - segment_ribbon, segment_presynaptic_density, segment_membrane_distance_based, - ) - if verbose: print("Vesicle segmentation was passed, WILL run post-processing.") - - ribbon = segment_ribbon( - predictions["ribbon"], vesicles, n_slices_exclude=n_slices_exclude, n_ribbons=n_ribbons, - max_vesicle_distance=40, - ) - PD = segment_presynaptic_density( - predictions["PD"], ribbon, n_slices_exclude=n_slices_exclude, max_distance_to_ribbon=40, - ) - ref_segmentation = PD if PD.sum() > 0 else ribbon - membrane = segment_membrane_distance_based( - predictions["membrane"], ref_segmentation, max_distance=500, n_slices_exclude=n_slices_exclude, - ) - - segmentations = {"ribbon": ribbon, "PD": PD, "membrane": membrane} + segmentations = _ribbon_AZ_postprocessing(predictions, vesicles, n_slices_exclude, n_ribbons) if return_predictions: return segmentations, predictions From 224ef6ced5315dc1a9a3c1887f38af660c178bda Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sun, 8 Dec 2024 16:14:58 +0100 Subject: [PATCH 09/30] Fix issues in structure processing --- scripts/otoferlin/automatic_processing.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/otoferlin/automatic_processing.py b/scripts/otoferlin/automatic_processing.py index cd6dec3..cdfc123 100644 --- a/scripts/otoferlin/automatic_processing.py +++ b/scripts/otoferlin/automatic_processing.py @@ -23,7 +23,7 @@ def _get_center_crop(input_): def _get_tiling(): - tile = {"x": 768, "y": 768, "z": 64} + tile = {"x": 768, "y": 768, "z": 48} halo = {"x": 128, "y": 128, "z": 8} return {"tile": tile, "halo": halo} @@ -65,23 +65,25 @@ def process_ribbon_structures(mrc_path, output_path, process_center_crop): input_, voxel_size = read_mrc(mrc_path) if process_center_crop: bb, full_shape = _get_center_crop(input_) - input_ = input_[bb] + input_, vesicles = input_[bb], vesicles[bb] + assert input_.shape == vesicles.shape model_name = "ribbon" model = get_model(model_name) scale = compute_scale_from_voxel_size(voxel_size, model_name) tiling = _get_tiling() + segmentations, predictions = _segment_ribbon_AZ( input_, model, tiling=tiling, scale=scale, verbose=True, extra_segmentation=vesicles, return_predictions=True, n_slices_exclude=5, ) if process_center_crop: - for name, seg in segmentations: + for name, seg in segmentations.items(): full_seg = np.zeros(full_shape, dtype=seg.dtype) full_seg[bb] = seg segmentations[name] = full_seg - for name, pred in predictions: + for name, pred in predictions.items(): full_pred = np.zeros(full_shape, dtype=seg.dtype) full_pred[bb] = pred predictions[name] = full_pred From 451ff69a0c7a0f3ca555c0d62177e01070a01877 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sun, 8 Dec 2024 17:37:23 +0100 Subject: [PATCH 10/30] Implement vesicle post-processing --- scripts/otoferlin/automatic_processing.py | 89 +++++++++++++++++-- scripts/otoferlin/check_automatic_result.py | 3 +- .../check_structure_postprocessing.py | 7 +- synapse_net/ground_truth/shape_refinement.py | 5 +- 4 files changed, 93 insertions(+), 11 deletions(-) diff --git a/scripts/otoferlin/automatic_processing.py b/scripts/otoferlin/automatic_processing.py index cdfc123..84ecb46 100644 --- a/scripts/otoferlin/automatic_processing.py +++ b/scripts/otoferlin/automatic_processing.py @@ -4,6 +4,9 @@ import numpy as np import pandas as pd +from skimage.measure import label +from skimage.segmentation import relabel_sequential + from synapse_net.distance_measurements import measure_segmentation_to_object_distances, load_distances from synapse_net.file_utils import read_mrc from synapse_net.inference.vesicles import segment_vesicles @@ -12,6 +15,12 @@ from common import STRUCTURE_NAMES, get_all_tomograms, get_seg_path, get_adapted_model +# These are tomograms for which the sophisticated membrane processing fails. +# In this case, we just select the largest boundary piece. +SIMPLE_MEM_POSTPROCESSING = [ + "Otof_TDAKO1blockA_GridN5_2_rec.mrc", "Otof_TDAKO2blockC_GridF5_1_rec.mrc", "Otof_TDAKO2blockC_GridF5_2_rec.mrc" +] + def _get_center_crop(input_): halo_xy = (600, 600) @@ -55,6 +64,13 @@ def process_vesicles(mrc_path, output_path, process_center_crop): f.create_dataset(key, data=segmentation, compression="gzip") +def _simple_membrane_postprocessing(membrane_prediction): + seg = label(membrane_prediction) + ids, sizes = np.unique(seg, return_counts=True) + ids, sizes = ids[1:], sizes[1:] + return (seg == ids[np.argmax(sizes)]).astype("uint8") + + def process_ribbon_structures(mrc_path, output_path, process_center_crop): key = "segmentation/ribbon" with h5py.File(output_path, "r") as f: @@ -78,6 +94,12 @@ def process_ribbon_structures(mrc_path, output_path, process_center_crop): return_predictions=True, n_slices_exclude=5, ) + # The distance based post-processing for membranes fails for some tomograms. + # In these cases, just choose the largest membrane piece. + fname = os.path.basename(mrc_path) + if fname in SIMPLE_MEM_POSTPROCESSING: + segmentations["membrane"] = _simple_membrane_postprocessing(predictions["membrane"]) + if process_center_crop: for name, seg in segmentations.items(): full_seg = np.zeros(full_shape, dtype=seg.dtype) @@ -94,6 +116,49 @@ def process_ribbon_structures(mrc_path, output_path, process_center_crop): f.create_dataset(f"prediction/{name}", data=predictions[name], compression="gzip") +def postprocess_vesicles(mrc_path, output_path, process_center_crop): + key = "segmentation/veiscles_postprocessed" + with h5py.File(output_path, "r") as f: + if key in f: + return + vesicles = f["segmentation/vesicles"][:] + if process_center_crop: + bb, full_shape = _get_center_crop(vesicles) + vesicles = vesicles[bb] + else: + bb = np.s_[:] + + ribbon = f["segmentation/ribbon"][bb] + membrane = f["segmentation/membrane"][bb] + + # Filter out small vesicle fragments. + min_size = 5000 + ids, sizes = np.unique(vesicles, return_counts=True) + ids, sizes = ids[1:], sizes[1:] + filter_ids = ids[sizes < min_size] + vesicles[np.isin(vesicles, filter_ids)] = 0 + + input_, voxel_size = read_mrc(mrc_path) + voxel_size = tuple(voxel_size[ax] for ax in "zyx") + input_ = input_[bb] + + # Filter out all vesicles farther than 120 nm from the membrane or ribbon. + max_dist = 120 + seg = (ribbon + membrane) > 0 + distances, _, _, seg_ids = measure_segmentation_to_object_distances(vesicles, seg, resolution=voxel_size) + filter_ids = seg_ids[distances > max_dist] + vesicles[np.isin(vesicles, filter_ids)] = 0 + + vesicles, _, _ = relabel_sequential(vesicles) + + if process_center_crop: + full_seg = np.zeros(full_shape, dtype=vesicles.dtype) + full_seg[bb] = vesicles + vesicles = full_seg + with h5py.File(output_path, "a") as f: + f.create_dataset(key, data=vesicles, compression="gzip") + + def measure_distances(mrc_path, seg_path, output_folder): result_folder = os.path.join(output_folder, "distances") if os.path.exists(result_folder): @@ -171,13 +236,14 @@ def process_tomogram(mrc_path): process_vesicles(mrc_path, output_path, process_center_crop) process_ribbon_structures(mrc_path, output_path, process_center_crop) - return - # TODO vesicle post-processing: - # snap to boundaries? - # remove vesicles in ribbon + postprocess_vesicles(mrc_path, output_path, process_center_crop) - measure_distances(mrc_path, output_path, output_folder) - assign_vesicle_pools(output_folder) + # We don't need to do the analysis of the auto semgentation, it only + # makes sense to do this after segmentation. I am leaving this here for + # now, to move it to the files for analysis later. + + # measure_distances(mrc_path, output_path, output_folder) + # assign_vesicle_pools(output_folder) def main(): @@ -185,6 +251,17 @@ def main(): for tomogram in tqdm(tomograms, desc="Process tomograms"): process_tomogram(tomogram) + # Update the membrane postprocessing for the tomograms where this went wrong. + # for tomo in tqdm(tomograms, desc="Fix membrame postprocesing"): + # if os.path.basename(tomo) not in SIMPLE_MEM_POSTPROCESSING: + # continue + # seg_path = get_seg_path(tomo) + # with h5py.File(seg_path, "r") as f: + # pred = f["prediction/membrane"][:] + # seg = _simple_membrane_postprocessing(pred) + # with h5py.File(seg_path, "a") as f: + # f["segmentation/membrane"][:] = seg + if __name__: main() diff --git a/scripts/otoferlin/check_automatic_result.py b/scripts/otoferlin/check_automatic_result.py index 04aba4c..bf1f4c2 100644 --- a/scripts/otoferlin/check_automatic_result.py +++ b/scripts/otoferlin/check_automatic_result.py @@ -73,8 +73,9 @@ def main(): enumerate(tomograms), total=len(tomograms), desc="Visualize automatic segmentation results" ): print("Checking tomogram", tomogram) + check_automatic_result(tomogram, version) # check_automatic_result(tomogram, version, segmentation_group="vesicles") - check_automatic_result(tomogram, version, segmentation_group="prediction") + # check_automatic_result(tomogram, version, segmentation_group="prediction") if __name__: diff --git a/scripts/otoferlin/check_structure_postprocessing.py b/scripts/otoferlin/check_structure_postprocessing.py index 00e4d2d..6b8d4de 100644 --- a/scripts/otoferlin/check_structure_postprocessing.py +++ b/scripts/otoferlin/check_structure_postprocessing.py @@ -29,17 +29,18 @@ def check_structure_postprocessing(mrc_path, center_crop=True): g = f["segmentation"] for name in STRUCTURE_NAMES: segmentations[f"seg/{name}"] = g[name][bb] + colormaps[name] = get_colormaps().get(name, None) + g = f["prediction"] for name in STRUCTURE_NAMES: predictions[f"pred/{name}"] = g[name][bb] - colormaps[name] = get_colormaps().get(name, None) v = napari.Viewer() v.add_image(tomogram) for name, seg in segmentations.items(): v.add_labels(seg, name=name, colormap=colormaps.get(name.split("/")[1])) - for name, seg in predictions.items(): - v.add_labels(seg, name=name, colormap=colormaps.get(name.split("/")[1]), visible=False) + for name, pred in predictions.items(): + v.add_labels(pred, name=name, colormap=colormaps.get(name.split("/")[1]), visible=False) v.title = os.path.basename(mrc_path) napari.run() diff --git a/synapse_net/ground_truth/shape_refinement.py b/synapse_net/ground_truth/shape_refinement.py index 8c357ae..26e8e56 100644 --- a/synapse_net/ground_truth/shape_refinement.py +++ b/synapse_net/ground_truth/shape_refinement.py @@ -203,6 +203,7 @@ def refine_individual_vesicle_shapes( edge_map: np.ndarray, foreground_erosion: int = 4, background_erosion: int = 8, + compactness: float = 0.5, ) -> np.ndarray: """Refine vesicle shapes by fitting vesicles to a boundary map. @@ -215,6 +216,8 @@ def refine_individual_vesicle_shapes( You can use `edge_filter` to compute this based on the tomogram. foreground_erosion: By how many pixels the foreground should be eroded in the seeds. background_erosion: By how many pixels the background should be eroded in the seeds. + compactness: The compactness parameter passed to the watershed function. + Higher compactness leads to more regular sized vesicles. Returns: The refined vesicles. """ @@ -250,7 +253,7 @@ def fit_vesicle(prop): # Run seeded watershed to fit the shapes. seeds = fg_seed + 2 * bg_seed - seg[z] = watershed(hmap[z], seeds) == 1 + seg[z] = watershed(hmap[z], seeds, compactness=compactness) == 1 # import napari # v = napari.Viewer() From 3bcca740ef65c70845301a822c1f0cefac7277ad Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sun, 8 Dec 2024 23:11:18 +0100 Subject: [PATCH 11/30] Add first version of correction script --- .../correct_structure_segmentation.py | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 scripts/otoferlin/correct_structure_segmentation.py diff --git a/scripts/otoferlin/correct_structure_segmentation.py b/scripts/otoferlin/correct_structure_segmentation.py new file mode 100644 index 0000000..ab5cebc --- /dev/null +++ b/scripts/otoferlin/correct_structure_segmentation.py @@ -0,0 +1,64 @@ +import os +from glob import glob + +import imageio.v3 as imageio +import h5py +import mrcfile +import napari +import numpy as np + +# TODO refactor everything once things are merged +ROOT = "/home/ag-wichmann/data/otoferlin/tomograms" +if not os.path.exists(ROOT): + ROOT = "./data/tomograms" + +SEG_ROOT = "./segmentation/v2" + + +def correct_structure_segmentation(mrc_path): + rel_path = os.path.relpath(mrc_path, ROOT) + rel_folder, fname = os.path.split(rel_path) + fname = os.path.splitext(fname)[0] + seg_path = os.path.join(SEG_ROOT, rel_folder, f"{fname}.h5") + + with mrcfile.open(mrc_path, permissive=True) as mrc: + data = np.asarray(mrc.data[:]) + data = np.flip(data, axis=1) + + correction_folder = os.path.join(SEG_ROOT, rel_folder, "correction") + + names = ("ribbon", "PD", "membrane", "veiscles_postprocessed") + segmentations = {} + with h5py.File(seg_path, "r") as f: + for name in names: + correction_path = os.path.join(correction_folder, f"{name}.tif") + if os.path.exists(correction_path): + print("Loading segmentation for", name, "from", correction_path) + segmentations[name] = imageio.imread(correction_path) + else: + segmentations[name] = f[f"segmentation/{name}"][:] + color_maps = { + "ribbon": {1: "red", None: "gray"}, + "PD": {1: "purple", None: "gray"}, + "membrane": {1: "magenta", None: "gray"}, + } + + v = napari.Viewer() + v.add_image(data) + for name, seg in segmentations.items(): + v.add_labels(seg, name=name, colormap=color_maps.get(name, None)) + v.title = fname + napari.run() + + +def main(): + tomograms = glob(os.path.join(ROOT, "**", "*.mrc"), recursive=True) + tomograms += glob(os.path.join(ROOT, "**", "*.rec"), recursive=True) + tomograms = sorted(tomograms) + + for tomo in tomograms: + correct_structure_segmentation(tomo) + + +if __name__ == "__main__": + main() From 70a585601f8c78f8d154de4b7ec98747bbfca3f2 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Mon, 9 Dec 2024 17:16:05 +0100 Subject: [PATCH 12/30] Update otoferlin correction script --- scripts/otoferlin/common.py | 2 +- .../correct_structure_segmentation.py | 30 +++++-------------- 2 files changed, 9 insertions(+), 23 deletions(-) diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py index d1c4f4e..c2b4f36 100644 --- a/scripts/otoferlin/common.py +++ b/scripts/otoferlin/common.py @@ -10,7 +10,7 @@ # These are the otoferlin tomograms. INPUT_ROOT = "/home/ag-wichmann/data/otoferlin/tomograms" -OUTPUT_ROOT = "/home/ag-wichmann/data/otoferlin/segmentation" +OUTPUT_ROOT = "./segmentation" STRUCTURE_NAMES = ("ribbon", "PD", "membrane") diff --git a/scripts/otoferlin/correct_structure_segmentation.py b/scripts/otoferlin/correct_structure_segmentation.py index ab5cebc..267d863 100644 --- a/scripts/otoferlin/correct_structure_segmentation.py +++ b/scripts/otoferlin/correct_structure_segmentation.py @@ -1,31 +1,20 @@ import os -from glob import glob +from pathlib import Path import imageio.v3 as imageio import h5py -import mrcfile import napari -import numpy as np -# TODO refactor everything once things are merged -ROOT = "/home/ag-wichmann/data/otoferlin/tomograms" -if not os.path.exists(ROOT): - ROOT = "./data/tomograms" - -SEG_ROOT = "./segmentation/v2" +from synapse_net.file_utils import read_mrc +from common import get_all_tomograms, get_seg_path def correct_structure_segmentation(mrc_path): - rel_path = os.path.relpath(mrc_path, ROOT) - rel_folder, fname = os.path.split(rel_path) - fname = os.path.splitext(fname)[0] - seg_path = os.path.join(SEG_ROOT, rel_folder, f"{fname}.h5") - - with mrcfile.open(mrc_path, permissive=True) as mrc: - data = np.asarray(mrc.data[:]) - data = np.flip(data, axis=1) + seg_path = get_seg_path(mrc_path) - correction_folder = os.path.join(SEG_ROOT, rel_folder, "correction") + data, _ = read_mrc(mrc_path) + correction_folder = os.path.join(os.path.split(seg_path)[0], "correction") + fname = Path(mrc_path).stem names = ("ribbon", "PD", "membrane", "veiscles_postprocessed") segmentations = {} @@ -52,10 +41,7 @@ def correct_structure_segmentation(mrc_path): def main(): - tomograms = glob(os.path.join(ROOT, "**", "*.mrc"), recursive=True) - tomograms += glob(os.path.join(ROOT, "**", "*.rec"), recursive=True) - tomograms = sorted(tomograms) - + tomograms = get_all_tomograms() for tomo in tomograms: correct_structure_segmentation(tomo) From 03dc0317b1943cd7146d448967d55cec91155592 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Mon, 9 Dec 2024 17:29:24 +0100 Subject: [PATCH 13/30] Add path to DA model on the WS --- scripts/otoferlin/common.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py index c2b4f36..17d7617 100644 --- a/scripts/otoferlin/common.py +++ b/scripts/otoferlin/common.py @@ -21,8 +21,10 @@ def get_adapted_model(): - # Path on nhr, need to put the model on the WS and update this. - model_path = "/mnt/vast-nhr/home/pape41/u12086/Work/my_projects/synaptic-reconstruction/scripts/otoferlin/domain_adaptation/checkpoints/otoferlin_da.pt" # noqa + # Path on nhr. + # model_path = "/mnt/vast-nhr/home/pape41/u12086/Work/my_projects/synaptic-reconstruction/scripts/otoferlin/domain_adaptation/checkpoints/otoferlin_da.pt" # noqa + # Path on the Workstation. + model_path = "/home/ag-wichmann/Downloads/otoferlin_da.pt" model = load_custom_model(model_path) return model From 69d502117abdea3d940296af612f01e18a7672e3 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Tue, 10 Dec 2024 19:30:48 +0100 Subject: [PATCH 14/30] Implement vesicle pool correction WIP --- scripts/otoferlin/automatic_processing.py | 80 +----------- scripts/otoferlin/check_automatic_result.py | 22 ---- scripts/otoferlin/common.py | 31 +++-- .../correct_structure_segmentation.py | 28 +---- scripts/otoferlin/correct_vesicle_pools.py | 100 +++++++++++++++ scripts/otoferlin/export_to_imod.py | 70 +++++++++++ .../pool_assignments_and_measurements.py | 118 ++++++++++++++++++ 7 files changed, 319 insertions(+), 130 deletions(-) create mode 100644 scripts/otoferlin/correct_vesicle_pools.py create mode 100644 scripts/otoferlin/export_to_imod.py create mode 100644 scripts/otoferlin/pool_assignments_and_measurements.py diff --git a/scripts/otoferlin/automatic_processing.py b/scripts/otoferlin/automatic_processing.py index 84ecb46..fbe1b00 100644 --- a/scripts/otoferlin/automatic_processing.py +++ b/scripts/otoferlin/automatic_processing.py @@ -2,18 +2,17 @@ import h5py import numpy as np -import pandas as pd from skimage.measure import label from skimage.segmentation import relabel_sequential -from synapse_net.distance_measurements import measure_segmentation_to_object_distances, load_distances +from synapse_net.distance_measurements import measure_segmentation_to_object_distances from synapse_net.file_utils import read_mrc from synapse_net.inference.vesicles import segment_vesicles from synapse_net.tools.util import get_model, compute_scale_from_voxel_size, _segment_ribbon_AZ from tqdm import tqdm -from common import STRUCTURE_NAMES, get_all_tomograms, get_seg_path, get_adapted_model +from common import get_all_tomograms, get_seg_path, get_adapted_model # These are tomograms for which the sophisticated membrane processing fails. # In this case, we just select the largest boundary piece. @@ -159,74 +158,6 @@ def postprocess_vesicles(mrc_path, output_path, process_center_crop): f.create_dataset(key, data=vesicles, compression="gzip") -def measure_distances(mrc_path, seg_path, output_folder): - result_folder = os.path.join(output_folder, "distances") - if os.path.exists(result_folder): - return - - # Get the voxel size. - _, voxel_size = read_mrc(mrc_path) - resolution = tuple(voxel_size[ax] for ax in "zyx") - - # Load the segmentations. - with h5py.File(seg_path, "r") as f: - g = f["segmentation"] - vesicles = g["vesicles"][:] - structures = {name: g[name][:] for name in STRUCTURE_NAMES} - - # Measure all the object distances. - os.makedirs(result_folder, exist_ok=True) - for name, seg in structures.items(): - if seg.sum() == 0: - print(name, "was not found, skipping the distance computation.") - continue - print("Compute vesicle distances to", name) - save_path = os.path.join(result_folder, f"{name}.npz") - measure_segmentation_to_object_distances(vesicles, seg, save_path=save_path, resolution=resolution) - - -def assign_vesicle_pools(output_folder): - assignment_path = os.path.join(output_folder, "vesicle_pools.csv") - if os.path.exists(assignment_path): - return - - distance_folder = os.path.join(output_folder, "distances") - distance_paths = {name: os.path.join(distance_folder, f"{name}.npz") for name in STRUCTURE_NAMES} - if not all(os.path.exists(path) for path in distance_paths.values()): - print("Skip vesicle pool assignment, because some distances are missing.") - print("This is probably due to the fact that the corresponding structures were not found.") - return - distances = {name: load_distances(path) for name, path in distance_paths.items()} - - # The distance criteria. - rav_ribbon_distance = 80 # nm - mpv_pd_distance = 100 # nm - mpv_mem_distance = 50 # nm - docked_pd_distance = 100 # nm - docked_mem_distance = 2 # nm - - rav_distances, seg_ids = distances["ribbon"][0], np.array(distances["ribbon"][-1]) - rav_ids = seg_ids[rav_distances < rav_ribbon_distance] - - pd_distances, mem_distances = distances["PD"][0], distances["membrane"][0] - assert len(pd_distances) == len(mem_distances) == len(rav_distances) - - mpv_ids = seg_ids[np.logical_and(pd_distances < mpv_pd_distance, mem_distances < mpv_mem_distance)] - docked_ids = seg_ids[np.logical_and(pd_distances < docked_pd_distance, mem_distances < docked_mem_distance)] - - # Create a dictionary to map vesicle ids to their corresponding pool. - # (RA-V get's over-written by MP-V, which is correct). - pool_assignments = {vid: "RA-V" for vid in rav_ids} - pool_assignments.update({vid: "MP-V" for vid in mpv_ids}) - pool_assignments.update({vid: "Docked-V" for vid in docked_ids}) - - pool_assignments = pd.DataFrame({ - "vesicle_id": list(pool_assignments.keys()), - "pool": list(pool_assignments.values()), - }) - pool_assignments.to_csv(assignment_path, index=False) - - def process_tomogram(mrc_path): output_path = get_seg_path(mrc_path) output_folder = os.path.split(output_path)[0] @@ -238,13 +169,6 @@ def process_tomogram(mrc_path): process_ribbon_structures(mrc_path, output_path, process_center_crop) postprocess_vesicles(mrc_path, output_path, process_center_crop) - # We don't need to do the analysis of the auto semgentation, it only - # makes sense to do this after segmentation. I am leaving this here for - # now, to move it to the files for analysis later. - - # measure_distances(mrc_path, output_path, output_folder) - # assign_vesicle_pools(output_folder) - def main(): tomograms = get_all_tomograms() diff --git a/scripts/otoferlin/check_automatic_result.py b/scripts/otoferlin/check_automatic_result.py index bf1f4c2..4c4c46c 100644 --- a/scripts/otoferlin/check_automatic_result.py +++ b/scripts/otoferlin/check_automatic_result.py @@ -3,7 +3,6 @@ import h5py import napari import numpy as np -import pandas as pd from synapse_net.file_utils import read_mrc from skimage.exposure import equalize_adapthist @@ -12,22 +11,6 @@ from common import get_all_tomograms, get_seg_path, get_colormaps -def _get_vesicle_pools(seg, assignment_path): - assignments = pd.read_csv(assignment_path) - pool_names = pd.unique(assignments.pool).tolist() - pools = np.zeros_like(seg) - - pool_colors = get_colormaps()["pools"] - colormap = {} - for pool_id, pool_name in enumerate(pool_names, 1): - pool_vesicle_ids = assignments[assignments.pool == pool_name].vesicle_id - pool_mask = np.isin(seg, pool_vesicle_ids) - pools[pool_mask] = pool_id - colormap[pool_id] = pool_colors[pool_name] - - return pools, colormap - - def check_automatic_result(mrc_path, version, use_clahe=False, center_crop=True, segmentation_group="segmentation"): tomogram, _ = read_mrc(mrc_path) if center_crop: @@ -53,11 +36,6 @@ def check_automatic_result(mrc_path, version, use_clahe=False, center_crop=True, segmentations[name] = ds[bb] colormaps[name] = get_colormaps().get(name, None) - output_folder = os.path.split(seg_path)[0] - assignment_path = os.path.join(output_folder, "vesicle_pools.csv") - if os.path.exists(assignment_path) and "vesicles" in segmentations: - segmentations["pools"], colormaps["pools"] = _get_vesicle_pools(segmentations["vesicles"], assignment_path) - v = napari.Viewer() v.add_image(tomogram) for name, seg in segmentations.items(): diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py index 17d7617..1cea584 100644 --- a/scripts/otoferlin/common.py +++ b/scripts/otoferlin/common.py @@ -1,6 +1,8 @@ import os from glob import glob +import imageio.v3 as imageio +import h5py from synapse_net.tools.util import load_custom_model @@ -62,16 +64,29 @@ def get_colormaps(): "Docked-V": (1, 1, 0), None: "gray", } + ribbon_map = {1: "red", None: "gray"} membrane_map = {1: "purple", None: "gray"} pd_map = {1: "magenta", None: "gray"} - return {"pools": pool_map, "membrane": membrane_map, "PD": pd_map} - - -# TODO: sync the ukon folder with the tomograms. -# UKON Path: -# /run/user/1000/gvfs/smb-share:server=wfs-medizin.top.gwdg.de,share=ukon-all$/UKON100/archiv/EM/For Segmentation -def sync_tomograms(): - pass + return {"pools": pool_map, "membrane": membrane_map, "PD": pd_map, "ribbon": ribbon_map} + + +def load_segmentations(seg_path): + # Keep the typo in the name, as these are the hdf5 keys! + seg_names = {"vesicles": "veiscles_postprocessed"} + seg_names.update({name: name for name in STRUCTURE_NAMES}) + + segmentations = {} + correction_folder = os.path.join(os.path.split(seg_path)[0], "correction") + with h5py.File(seg_path, "r") as f: + g = f["segmentation"] + for out_name, name in seg_names.items(): + correction_path = os.path.join(correction_folder, f"{name}.tif") + if os.path.exists(correction_path): + print("Loading corrected", name, "segmentation from", correction_path) + segmentations[out_name] = imageio.imread(correction_path) + else: + segmentations[out_name] = g[f"{name}"][:] + return segmentations if __name__ == "__main__": diff --git a/scripts/otoferlin/correct_structure_segmentation.py b/scripts/otoferlin/correct_structure_segmentation.py index 267d863..5c2ff15 100644 --- a/scripts/otoferlin/correct_structure_segmentation.py +++ b/scripts/otoferlin/correct_structure_segmentation.py @@ -1,41 +1,25 @@ -import os from pathlib import Path -import imageio.v3 as imageio -import h5py import napari from synapse_net.file_utils import read_mrc -from common import get_all_tomograms, get_seg_path +from common import get_all_tomograms, get_seg_path, load_segmentations, get_colormaps def correct_structure_segmentation(mrc_path): seg_path = get_seg_path(mrc_path) data, _ = read_mrc(mrc_path) - correction_folder = os.path.join(os.path.split(seg_path)[0], "correction") - fname = Path(mrc_path).stem - - names = ("ribbon", "PD", "membrane", "veiscles_postprocessed") - segmentations = {} - with h5py.File(seg_path, "r") as f: - for name in names: - correction_path = os.path.join(correction_folder, f"{name}.tif") - if os.path.exists(correction_path): - print("Loading segmentation for", name, "from", correction_path) - segmentations[name] = imageio.imread(correction_path) - else: - segmentations[name] = f[f"segmentation/{name}"][:] - color_maps = { - "ribbon": {1: "red", None: "gray"}, - "PD": {1: "purple", None: "gray"}, - "membrane": {1: "magenta", None: "gray"}, - } + segmentations = load_segmentations(seg_path) + color_maps = get_colormaps() v = napari.Viewer() v.add_image(data) for name, seg in segmentations.items(): + if name == "vesicles": + name = "veiscles_postprocessed" v.add_labels(seg, name=name, colormap=color_maps.get(name, None)) + fname = Path(mrc_path).stem v.title = fname napari.run() diff --git a/scripts/otoferlin/correct_vesicle_pools.py b/scripts/otoferlin/correct_vesicle_pools.py new file mode 100644 index 0000000..5ff8e7e --- /dev/null +++ b/scripts/otoferlin/correct_vesicle_pools.py @@ -0,0 +1,100 @@ +import os + +import imageio.v3 as imageio +import napari +import numpy as np +import pandas as pd +from magicgui import magicgui + +from synapse_net.file_utils import read_mrc +from skimage.measure import regionprops +from common import load_segmentations, get_seg_path, get_all_tomograms, get_colormaps, STRUCTURE_NAMES + + +def _create_pool_layer(seg, assignment_path): + assignments = pd.read_csv(assignment_path) + pool_names = pd.unique(assignments.pool).tolist() + pools = np.zeros_like(seg) + + pool_colors = get_colormaps()["pools"] + colormap = {} + for pool_id, pool_name in enumerate(pool_names, 1): + pool_vesicle_ids = assignments[assignments.pool == pool_name].vesicle_id + pool_mask = np.isin(seg, pool_vesicle_ids) + pools[pool_mask] = pool_id + colormap[pool_id] = pool_colors[pool_name] + + return pools, colormap + + +def _update_assignments(vesicles, pool_correction, assignment_path): + old_assignments = pd.read_csv(assignment_path) + props = regionprops(vesicles, pool_correction) + + new_assignments = old_assignments.copy() + val_to_pool = {1: "RA-V", 2: "MP-V", 3: "Docked-V", 4: None} + for prop in props: + correction_val = prop.max_intensity + if correction_val == 0: + continue + new_assignments[new_assignments.vesicle_id == prop.label] = val_to_pool[correction_val] + + new_assignments.to_csv(assignment_path, index=False) + + +# TODO: also enable correcting vesicle segmentation??? +def correct_vesicle_pools(mrc_path): + seg_path = get_seg_path(mrc_path) + + output_folder = os.path.split(seg_path)[0] + assignment_path = os.path.join(output_folder, "vesicle_pools.csv") + + data, _ = read_mrc(mrc_path) + segmentations = load_segmentations(seg_path) + vesicles = segmentations["vesicles"] + + colormaps = get_colormaps() + pool_colors = colormaps["pools"] + correction_colors = { + 1: pool_colors["RA-V"], 2: pool_colors["MP-V"], 3: pool_colors["Docked-V"], 4: "Gray", None: "Gray" + } + + vesicle_pools, pool_colors = _create_pool_layer(vesicles, assignment_path) + + pool_correction_path = os.path.join(output_folder, "correction", "pool_correction.tif") + if os.path.exists(pool_correction_path): + pool_correction = imageio.imread(pool_correction_path) + else: + pool_correction = np.zeros_like(vesicles) + + v = napari.Viewer() + v.add_image(data) + v.add_labels(vesicle_pools, colormap=pool_colors) + v.add_labels(pool_correction, colormap=correction_colors) + v.add_labels(vesicles, visible=False) + for name in STRUCTURE_NAMES: + v.add_labels(segmentations[name], name=name, visible=False, colormap=colormaps[name]) + + @magicgui(call_button="Update Pools") + def update_pools(viewer: napari.Viewer): + pool_data = viewer.layers["vesicle_pools"].data + vesicles = viewer.layers["vesicles"].data + pool_correction = viewer.layers["pool_correction"].data + _update_assignments(vesicles, pool_correction, assignment_path) + imageio.imwrite(pool_correction_path, pool_correction, compression="zlib") + pool_data, pool_colors = _create_pool_layer(vesicles, assignment_path) + viewer.layers["vesicle_pools"].data = pool_data + + v.window.add_dock_widget(update_pools) + + napari.run() + + +def main(): + tomograms = get_all_tomograms() + for tomo in tomograms: + correct_vesicle_pools(tomo) + + +if __name__ == "__main__": + main() diff --git a/scripts/otoferlin/export_to_imod.py b/scripts/otoferlin/export_to_imod.py new file mode 100644 index 0000000..b3943ea --- /dev/null +++ b/scripts/otoferlin/export_to_imod.py @@ -0,0 +1,70 @@ +import os + +import numpy as np +import pandas as pd + +from synapse_net.imod.to_imod import write_segmentation_to_imod, write_segmentation_to_imod_as_points +from common import STRUCTURE_NAMES, get_all_tomograms, get_seg_path, load_segmentations +from tqdm import tqdm + + +# TODO check if we need to remove offset from mrc +def export_tomogram(mrc_path, force): + seg_path = get_seg_path(mrc_path) + output_folder = os.path.split(seg_path)[0] + assert os.path.exists(output_folder) + + export_folder = os.path.join(output_folder, "imod") + if os.path.exists(export_folder) and not force: + return + + segmentations = load_segmentations(seg_path) + vesicles = segmentations["vesicles"] + + os.makedirs(export_folder, exist_ok=True) + + # Export the structures to IMOD. + for name in STRUCTURE_NAMES: + export_path = os.path.join(export_folder, f"{name}.mod") + write_segmentation_to_imod(mrc_path, segmentations[name], export_path) + + # Load the pool assignments and export the pools to IMOD. + assignment_path = os.path.join(output_folder, "vesicle_pools.csv") + assignments = pd.read_csv(assignment_path) + + pools = pd.unique(assignments.pool) + radius_factor = 1.0 # TODO! + for pool in pools: + export_path = os.path.join(export_folder, f"{pool}.mod") + pool_ids = assignments[assignments.pool == pool].vesicle_ids + pool_seg = vesicles.copy() + pool_seg[~np.isin(pool_seg, pool_ids)] = 0 + write_segmentation_to_imod_as_points( + mrc_path, pool_seg, export_path, min_radius=5, radius_factor=radius_factor + ) + + # TODO: read measurements for ribbon and PD volume / surface from IMOD. + # - convert to meshes + # - smooth the meshes + # - run imodinfo to get the measurements + measures = pd.DataFrame({ + }) + return measures + + +def main(): + force = False + tomograms = get_all_tomograms() + + measurements = [] + for tomogram in tqdm(tomograms, desc="Process tomograms"): + measures = export_tomogram(tomogram, force) + measurements.append(measures) + + save_path = "./data/structure_measurements.xlsx" + measurements = pd.concat(measurements) + measurements.to_excel(save_path, index=False) + + +if __name__ == "__main__": + main() diff --git a/scripts/otoferlin/pool_assignments_and_measurements.py b/scripts/otoferlin/pool_assignments_and_measurements.py new file mode 100644 index 0000000..bab0483 --- /dev/null +++ b/scripts/otoferlin/pool_assignments_and_measurements.py @@ -0,0 +1,118 @@ +import os + +import numpy as np +import pandas as pd + +from synapse_net.distance_measurements import measure_segmentation_to_object_distances, load_distances +from synapse_net.file_utils import read_mrc +from synapse_net.imod.to_imod import convert_segmentation_to_spheres +from tqdm import tqdm + +from common import STRUCTURE_NAMES, get_all_tomograms, get_seg_path, load_segmentations + + +def measure_distances(mrc_path, seg_path, output_folder, force): + result_folder = os.path.join(output_folder, "distances") + if os.path.exists(result_folder) and not force: + return + + # Get the voxel size. + _, voxel_size = read_mrc(mrc_path) + resolution = tuple(voxel_size[ax] for ax in "zyx") + + # Load the segmentations. + segmentations = load_segmentations(seg_path) + vesicles = segmentations["vesicles"] + structures = {name: segmentations[name] for name in STRUCTURE_NAMES} + + # Measure all the object distances. + os.makedirs(result_folder, exist_ok=True) + for name, seg in structures.items(): + if seg.sum() == 0: + print(name, "was not found, skipping the distance computation.") + continue + print("Compute vesicle distances to", name) + save_path = os.path.join(result_folder, f"{name}.npz") + measure_segmentation_to_object_distances(vesicles, seg, save_path=save_path, resolution=resolution) + + +def _measure_radii(seg_path): + segmentations = load_segmentations(seg_path) + vesicles = segmentations["vesicles"] + # TODO should we update this to account for elongated vesicles? + # TODO should we update this with different radius factors? (check IMOD export) + _, radii = convert_segmentation_to_spheres(vesicles, radius_factor=1.0) + return np.array(radii) + + +def assign_vesicle_pools_and_measure_radii(seg_path, output_folder, force): + assignment_path = os.path.join(output_folder, "vesicle_pools.csv") + if os.path.exists(assignment_path) and not force: + return + + distance_folder = os.path.join(output_folder, "distances") + distance_paths = {name: os.path.join(distance_folder, f"{name}.npz") for name in STRUCTURE_NAMES} + if not all(os.path.exists(path) for path in distance_paths.values()): + print("Skip vesicle pool assignment, because some distances are missing.") + print("This is probably due to the fact that the corresponding structures were not found.") + return + distances = {name: load_distances(path) for name, path in distance_paths.items()} + + # The distance criteria. + rav_ribbon_distance = 80 # nm + mpv_pd_distance = 100 # nm + mpv_mem_distance = 50 # nm + docked_pd_distance = 100 # nm + docked_mem_distance = 2 # nm + + rav_distances, seg_ids = distances["ribbon"][0], np.array(distances["ribbon"][-1]) + rav_ids = seg_ids[rav_distances < rav_ribbon_distance] + + pd_distances, mem_distances = distances["PD"][0], distances["membrane"][0] + assert len(pd_distances) == len(mem_distances) == len(rav_distances) + + mpv_ids = seg_ids[np.logical_and(pd_distances < mpv_pd_distance, mem_distances < mpv_mem_distance)] + docked_ids = seg_ids[np.logical_and(pd_distances < docked_pd_distance, mem_distances < docked_mem_distance)] + + # Create a dictionary to map vesicle ids to their corresponding pool. + # (RA-V get's over-written by MP-V, which is correct). + pool_assignments = {vid: "RA-V" for vid in rav_ids} + pool_assignments.update({vid: "MP-V" for vid in mpv_ids}) + pool_assignments.update({vid: "Docked-V" for vid in docked_ids}) + + pool_values = [pool_assignments.get(vid, None) for vid in seg_ids] + radii = _measure_radii(seg_path) + assert len(radii) == len(pool_values) + + pool_assignments = pd.DataFrame({ + "vesicle_id": seg_ids, + "pool": pool_values, + "radius": radii, + "diameter": 2 * radii, + }) + pool_assignments.to_csv(assignment_path, index=False) + + +def process_tomogram(mrc_path, force): + seg_path = get_seg_path(mrc_path) + output_folder = os.path.split(seg_path)[0] + assert os.path.exists(output_folder) + + # Measure the distances. + measure_distances(mrc_path, seg_path, output_folder, force) + + # Assign the vesicle pools. + assign_vesicle_pools_and_measure_radii(seg_path, output_folder, force) + + # The surface area / volume for ribbon and PD will be done in a separate script. + + +def main(): + force = True + tomograms = get_all_tomograms() + for tomogram in tqdm(tomograms, desc="Process tomograms"): + process_tomogram(tomogram, force) + + +if __name__: + main() From f630ee1300a853abb200e676ced7b60a70cd1688 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Tue, 10 Dec 2024 21:35:31 +0100 Subject: [PATCH 15/30] Update otoferlin analysis --- scripts/otoferlin/.gitignore | 3 + scripts/otoferlin/common.py | 9 +- scripts/otoferlin/correct_vesicle_pools.py | 15 ++- scripts/otoferlin/export_results.py | 114 +++++++++++++++++++++ 4 files changed, 135 insertions(+), 6 deletions(-) create mode 100644 scripts/otoferlin/export_results.py diff --git a/scripts/otoferlin/.gitignore b/scripts/otoferlin/.gitignore index 8fce603..59d5ea8 100644 --- a/scripts/otoferlin/.gitignore +++ b/scripts/otoferlin/.gitignore @@ -1 +1,4 @@ data/ +sync_segmentation.sh +segmentation/ +results/ diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py index 1cea584..956e852 100644 --- a/scripts/otoferlin/common.py +++ b/scripts/otoferlin/common.py @@ -36,8 +36,7 @@ def get_folders(): return INPUT_ROOT, OUTPUT_ROOT root_in = "./data/tomograms" assert os.path.exists(root_in) - root_out = "./data/segmentation" - return root_in, root_out + return root_in, OUTPUT_ROOT def get_all_tomograms(): @@ -89,6 +88,12 @@ def load_segmentations(seg_path): return segmentations +def to_condition(mrc_path): + fname = os.path.basename(mrc_path) + # TODO: Is this correct, or is it the otherway round? + return "MUT" if fname.startswith("Otof") else "WT" + + if __name__ == "__main__": tomos = get_all_tomograms() print("We have", len(tomos), "tomograms") diff --git a/scripts/otoferlin/correct_vesicle_pools.py b/scripts/otoferlin/correct_vesicle_pools.py index 5ff8e7e..48c2500 100644 --- a/scripts/otoferlin/correct_vesicle_pools.py +++ b/scripts/otoferlin/correct_vesicle_pools.py @@ -18,8 +18,10 @@ def _create_pool_layer(seg, assignment_path): pool_colors = get_colormaps()["pools"] colormap = {} - for pool_id, pool_name in enumerate(pool_names, 1): - pool_vesicle_ids = assignments[assignments.pool == pool_name].vesicle_id + for pool_id, pool_name in enumerate(pool_names): + if not isinstance(pool_name, str) and np.isnan(pool_name): + continue + pool_vesicle_ids = assignments[assignments.pool == pool_name].vesicle_id.values pool_mask = np.isin(seg, pool_vesicle_ids) pools[pool_mask] = pool_id colormap[pool_id] = pool_colors[pool_name] @@ -37,7 +39,7 @@ def _update_assignments(vesicles, pool_correction, assignment_path): correction_val = prop.max_intensity if correction_val == 0: continue - new_assignments[new_assignments.vesicle_id == prop.label] = val_to_pool[correction_val] + new_assignments[new_assignments.vesicle_id == prop.label].pool = val_to_pool[correction_val] new_assignments.to_csv(assignment_path, index=False) @@ -48,6 +50,9 @@ def correct_vesicle_pools(mrc_path): output_folder = os.path.split(seg_path)[0] assignment_path = os.path.join(output_folder, "vesicle_pools.csv") + if not os.path.exists(assignment_path): + print("Skip", seg_path, "due to missing assignments") + return data, _ = read_mrc(mrc_path) segmentations = load_segmentations(seg_path) @@ -62,6 +67,7 @@ def correct_vesicle_pools(mrc_path): vesicle_pools, pool_colors = _create_pool_layer(vesicles, assignment_path) pool_correction_path = os.path.join(output_folder, "correction", "pool_correction.tif") + os.makedirs(os.path.join(output_folder, "correction"), exist_ok=True) if os.path.exists(pool_correction_path): pool_correction = imageio.imread(pool_correction_path) else: @@ -81,9 +87,10 @@ def update_pools(viewer: napari.Viewer): vesicles = viewer.layers["vesicles"].data pool_correction = viewer.layers["pool_correction"].data _update_assignments(vesicles, pool_correction, assignment_path) - imageio.imwrite(pool_correction_path, pool_correction, compression="zlib") + # imageio.imwrite(pool_correction_path, pool_correction, compression="zlib") pool_data, pool_colors = _create_pool_layer(vesicles, assignment_path) viewer.layers["vesicle_pools"].data = pool_data + viewer.layers["vesicle_pools"].colormap = pool_colors v.window.add_dock_widget(update_pools) diff --git a/scripts/otoferlin/export_results.py b/scripts/otoferlin/export_results.py new file mode 100644 index 0000000..bdb1e3f --- /dev/null +++ b/scripts/otoferlin/export_results.py @@ -0,0 +1,114 @@ +import os +from datetime import datetime + +import numpy as np +import pandas as pd +from common import get_all_tomograms, get_seg_path, to_condition + +from synapse_net.distance_measurements import load_distances + + +def get_output_folder(): + output_root = "./results" + date = datetime.now().strftime("%Y%m%d") + + version = 1 + output_folder = os.path.join(output_root, f"{date}_{version}") + while os.path.exists(output_folder): + version += 1 + output_folder = os.path.join(output_root, f"{date}_{version}") + + os.makedirs(output_folder) + return output_folder + + +def _export_results(tomograms, result_path, result_extraction): + results = {} + for tomo in tomograms: + condition = to_condition(tomo) + res = result_extraction(tomo) + if condition in results: + results[condition].append(res) + else: + results[condition] = [res] + + for condition, res in results.items(): + res = pd.concat(res) + if os.path.exists(result_path): + with pd.ExcelWriter(result_path, engine="openpyxl", mode="a") as writer: + res.to_excel(writer, sheet_name=condition, index=False) + else: + res.to_excel(result_path, sheet_name=condition, index=False) + + +def export_vesicle_pools(tomograms, result_path): + + def result_extraction(tomo): + folder = os.path.split(get_seg_path(tomo))[0] + measure_path = os.path.join(folder, "vesicle_pools.csv") + measures = pd.read_csv(measure_path).dropna() + pool_names, counts = np.unique(measures.pool.values, return_counts=True) + res = {"tomogram": [os.path.basename(tomo)]} + res.update({k: v for k, v in zip(pool_names, counts)}) + res = pd.DataFrame(res) + return res + + _export_results(tomograms, result_path, result_extraction) + + +def export_distances(tomograms, result_path): + def result_extraction(tomo): + folder = os.path.split(get_seg_path(tomo))[0] + measure_path = os.path.join(folder, "vesicle_pools.csv") + measures = pd.read_csv(measure_path).dropna() + + measures = measures[measures.pool.isin(["MP-V", "Docked-V"])][["vesicle_id", "pool"]] + + # Load the distances to PD. + pd_distances, _, _, seg_ids = load_distances(os.path.join(folder, "distances", "PD.npz")) + pd_distances = {sid: dist for sid, dist in zip(seg_ids, pd_distances)} + measures["distance-to-pd"] = [pd_distances[vid] for vid in measures.vesicle_id.values] + + # Load the distances to membrane. + mem_distances, _, _, seg_ids = load_distances(os.path.join(folder, "distances", "membrane.npz")) + mem_distances = {sid: dist for sid, dist in zip(seg_ids, mem_distances)} + measures["distance-to-membrane"] = [mem_distances[vid] for vid in measures.vesicle_id.values] + + measures = measures.drop(columns=["vesicle_id"]) + measures.insert(0, "tomogram", len(measures) * [os.path.basename(tomo)]) + + return measures + + _export_results(tomograms, result_path, result_extraction) + + +def export_diameter(tomograms, result_path): + def result_extraction(tomo): + folder = os.path.split(get_seg_path(tomo))[0] + measure_path = os.path.join(folder, "vesicle_pools.csv") + measures = pd.read_csv(measure_path).dropna() + + measures = measures[measures.pool.isin(["MP-V", "Docked-V"])][["pool", "diameter"]] + measures.insert(0, "tomogram", len(measures) * [os.path.basename(tomo)]) + + return measures + + _export_results(tomograms, result_path, result_extraction) + + +def main(): + tomograms = get_all_tomograms() + result_folder = get_output_folder() + + result_path = os.path.join(result_folder, "vesicle_pools.xlsx") + export_vesicle_pools(tomograms, result_path) + + result_path = os.path.join(result_folder, "distances.xlsx") + export_distances(tomograms, result_path) + + result_path = os.path.join(result_folder, "diameter.xlsx") + export_diameter(tomograms, result_path) + + +if __name__ == "__main__": + main() From ea84a3d7e627e3843bffd0b0fb54480068d89f1b Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Wed, 11 Dec 2024 04:32:41 +0100 Subject: [PATCH 16/30] Add vesicle postprocessing scripts --- scripts/otoferlin/automatic_processing.py | 18 +++++--- scripts/otoferlin/postprocess_vesicles.py | 54 +++++++++++++++++++++++ 2 files changed, 66 insertions(+), 6 deletions(-) create mode 100644 scripts/otoferlin/postprocess_vesicles.py diff --git a/scripts/otoferlin/automatic_processing.py b/scripts/otoferlin/automatic_processing.py index fbe1b00..5710573 100644 --- a/scripts/otoferlin/automatic_processing.py +++ b/scripts/otoferlin/automatic_processing.py @@ -12,7 +12,7 @@ from synapse_net.tools.util import get_model, compute_scale_from_voxel_size, _segment_ribbon_AZ from tqdm import tqdm -from common import get_all_tomograms, get_seg_path, get_adapted_model +from common import get_all_tomograms, get_seg_path, get_adapted_model, load_segmentations # These are tomograms for which the sophisticated membrane processing fails. # In this case, we just select the largest boundary piece. @@ -115,10 +115,12 @@ def process_ribbon_structures(mrc_path, output_path, process_center_crop): f.create_dataset(f"prediction/{name}", data=predictions[name], compression="gzip") -def postprocess_vesicles(mrc_path, output_path, process_center_crop): +def postprocess_vesicles( + mrc_path, output_path, process_center_crop, force=False +): key = "segmentation/veiscles_postprocessed" with h5py.File(output_path, "r") as f: - if key in f: + if key in f and not force: return vesicles = f["segmentation/vesicles"][:] if process_center_crop: @@ -127,8 +129,9 @@ def postprocess_vesicles(mrc_path, output_path, process_center_crop): else: bb = np.s_[:] - ribbon = f["segmentation/ribbon"][bb] - membrane = f["segmentation/membrane"][bb] + segs = load_segmentations(output_path) + ribbon = segs["ribbon"][bb] + membrane = segs["membrane"][bb] # Filter out small vesicle fragments. min_size = 5000 @@ -155,7 +158,10 @@ def postprocess_vesicles(mrc_path, output_path, process_center_crop): full_seg[bb] = vesicles vesicles = full_seg with h5py.File(output_path, "a") as f: - f.create_dataset(key, data=vesicles, compression="gzip") + if key in f: + f[key][:] = vesicles + else: + f.create_dataset(key, data=vesicles, compression="gzip") def process_tomogram(mrc_path): diff --git a/scripts/otoferlin/postprocess_vesicles.py b/scripts/otoferlin/postprocess_vesicles.py new file mode 100644 index 0000000..da2c43f --- /dev/null +++ b/scripts/otoferlin/postprocess_vesicles.py @@ -0,0 +1,54 @@ +from pathlib import Path +from shutil import copyfile + +import napari +import h5py + +from tqdm import tqdm +from common import get_all_tomograms, get_seg_path +from automatic_processing import postprocess_vesicles + +TOMOS = [ + "Otof_TDAKO2blockC_GridE2_1", + "Otof_TDAKO1blockA_GridN5_3", + "Otof_TDAKO1blockA_GridN5_5", + "Bl6_NtoTDAWT1_blockH_GridG2_3", +] + + +def postprocess(mrc_path, process_center_crop): + output_path = get_seg_path(mrc_path) + copyfile(output_path, output_path + ".bkp") + postprocess_vesicles( + mrc_path, output_path, process_center_crop=process_center_crop, force=False + ) + + with h5py.File(output_path, "r") as f: + ves = f["segmentation/veiscles_postprocessed"][:] + + v = napari.Viewer() + v.add_labels(ves) + napari.run() + + +# Postprocess vesicles in specific tomograms, where this initially +# failed due to wrong structure segmentations. +def redo_initial_postprocessing(): + tomograms = get_all_tomograms() + for tomogram in tqdm(tomograms, desc="Process tomograms"): + fname = Path(tomogram).stem + if fname not in TOMOS: + continue + print("Postprocessing", fname) + postprocess(tomogram, process_center_crop=True) + + +def main(): + redo_initial_postprocessing() + # TODO TODO TODO + # Label all vesicle corrections to make sure everyone has its own id + # label_all_vesicles() + + +if __name__: + main() From 9b8be053db72f3ef3e0480db5cc8b6d7937fdd57 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Wed, 11 Dec 2024 12:45:16 +0100 Subject: [PATCH 17/30] Update vesicle pool correction script --- scripts/otoferlin/automatic_processing.py | 2 +- scripts/otoferlin/common.py | 13 ++++++++++--- scripts/otoferlin/correct_vesicle_pools.py | 22 ++++++++++++---------- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/scripts/otoferlin/automatic_processing.py b/scripts/otoferlin/automatic_processing.py index 5710573..7928c83 100644 --- a/scripts/otoferlin/automatic_processing.py +++ b/scripts/otoferlin/automatic_processing.py @@ -193,5 +193,5 @@ def main(): # f["segmentation/membrane"][:] = seg -if __name__: +if __name__ == "__main__": main() diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py index 956e852..20b936e 100644 --- a/scripts/otoferlin/common.py +++ b/scripts/otoferlin/common.py @@ -3,6 +3,7 @@ import imageio.v3 as imageio import h5py +import pandas as pd from synapse_net.tools.util import load_custom_model @@ -39,11 +40,18 @@ def get_folders(): return root_in, OUTPUT_ROOT -def get_all_tomograms(): +def get_all_tomograms(restrict_to_good_tomos=False): root, _ = get_folders() tomograms = glob(os.path.join(root, "**", "*.mrc"), recursive=True) tomograms += glob(os.path.join(root, "**", "*.rec"), recursive=True) tomograms = sorted(tomograms) + if restrict_to_good_tomos: + # TODO update path to table for the workstation + table_path = "overview Otoferlin samples.xlsx" + table = pd.read_excel(table_path) + table = table[table["Einschluss? "] == "ja"] + fnames = [os.path.basename(row["File name"]) for _, row in table.iterrows()] + tomograms = [tomo for tomo in tomograms if os.path.basename(tomo) in fnames] return tomograms @@ -90,8 +98,7 @@ def load_segmentations(seg_path): def to_condition(mrc_path): fname = os.path.basename(mrc_path) - # TODO: Is this correct, or is it the otherway round? - return "MUT" if fname.startswith("Otof") else "WT" + return "TDA KO" if fname.startswith("Otof") else "TDA WT" if __name__ == "__main__": diff --git a/scripts/otoferlin/correct_vesicle_pools.py b/scripts/otoferlin/correct_vesicle_pools.py index 48c2500..977185d 100644 --- a/scripts/otoferlin/correct_vesicle_pools.py +++ b/scripts/otoferlin/correct_vesicle_pools.py @@ -33,14 +33,17 @@ def _update_assignments(vesicles, pool_correction, assignment_path): old_assignments = pd.read_csv(assignment_path) props = regionprops(vesicles, pool_correction) - new_assignments = old_assignments.copy() - val_to_pool = {1: "RA-V", 2: "MP-V", 3: "Docked-V", 4: None} - for prop in props: - correction_val = prop.max_intensity - if correction_val == 0: - continue - new_assignments[new_assignments.vesicle_id == prop.label].pool = val_to_pool[correction_val] - + val_to_pool = {0: 0, 1: "RA-V", 2: "MP-V", 3: "Docked-V", 4: None} + corrected_pools = {prop.label: val_to_pool[int(prop.max_intensity)] for prop in props} + + new_assignments = [] + for _, row in old_assignments.iterrows(): + vesicle_id = row.vesicle_id + corrected_pool = corrected_pools[vesicle_id] + if corrected_pool != 0: + row.pool = corrected_pool + new_assignments.append(row) + new_assignments = pd.DataFrame(new_assignments) new_assignments.to_csv(assignment_path, index=False) @@ -87,7 +90,6 @@ def update_pools(viewer: napari.Viewer): vesicles = viewer.layers["vesicles"].data pool_correction = viewer.layers["pool_correction"].data _update_assignments(vesicles, pool_correction, assignment_path) - # imageio.imwrite(pool_correction_path, pool_correction, compression="zlib") pool_data, pool_colors = _create_pool_layer(vesicles, assignment_path) viewer.layers["vesicle_pools"].data = pool_data viewer.layers["vesicle_pools"].colormap = pool_colors @@ -98,7 +100,7 @@ def update_pools(viewer: napari.Viewer): def main(): - tomograms = get_all_tomograms() + tomograms = get_all_tomograms(restrict_to_good_tomos=True) for tomo in tomograms: correct_vesicle_pools(tomo) From 6eb5d127bc2735afb5f8b2fd9f282fa0b83aa221 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Wed, 11 Dec 2024 16:19:59 +0100 Subject: [PATCH 18/30] Implement vesicle labeling --- scripts/otoferlin/common.py | 3 +-- scripts/otoferlin/postprocess_vesicles.py | 22 +++++++++++++++++++--- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py index 20b936e..bb6ba99 100644 --- a/scripts/otoferlin/common.py +++ b/scripts/otoferlin/common.py @@ -46,7 +46,6 @@ def get_all_tomograms(restrict_to_good_tomos=False): tomograms += glob(os.path.join(root, "**", "*.rec"), recursive=True) tomograms = sorted(tomograms) if restrict_to_good_tomos: - # TODO update path to table for the workstation table_path = "overview Otoferlin samples.xlsx" table = pd.read_excel(table_path) table = table[table["Einschluss? "] == "ja"] @@ -102,5 +101,5 @@ def to_condition(mrc_path): if __name__ == "__main__": - tomos = get_all_tomograms() + tomos = get_all_tomograms(restrict_to_good_tomos=True) print("We have", len(tomos), "tomograms") diff --git a/scripts/otoferlin/postprocess_vesicles.py b/scripts/otoferlin/postprocess_vesicles.py index da2c43f..0d93217 100644 --- a/scripts/otoferlin/postprocess_vesicles.py +++ b/scripts/otoferlin/postprocess_vesicles.py @@ -1,10 +1,14 @@ +import os from pathlib import Path from shutil import copyfile +import imageio.v3 as imageio import napari import h5py +from skimage.measure import label from tqdm import tqdm + from common import get_all_tomograms, get_seg_path from automatic_processing import postprocess_vesicles @@ -43,11 +47,23 @@ def redo_initial_postprocessing(): postprocess(tomogram, process_center_crop=True) +def label_all_vesicles(): + tomograms = get_all_tomograms(restrict_to_good_tomos=True) + for mrc_path in tqdm(tomograms, desc="Process tomograms"): + output_path = get_seg_path(mrc_path) + output_folder = os.path.split(output_path)[0] + vesicle_path = os.path.join(output_folder, "correction", "veiscles_postprocessed.tif") + assert os.path.exists(vesicle_path), vesicle_path + copyfile(vesicle_path, vesicle_path + ".bkp") + vesicles = imageio.imread(vesicle_path) + vesicles = label(vesicles) + imageio.imwrite(vesicle_path, vesicles, compression="zlib") + + def main(): - redo_initial_postprocessing() - # TODO TODO TODO + # redo_initial_postprocessing() # Label all vesicle corrections to make sure everyone has its own id - # label_all_vesicles() + label_all_vesicles() if __name__: From 504144ced60764d18499a9843a9d23164e08eaea Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Wed, 11 Dec 2024 16:22:51 +0100 Subject: [PATCH 19/30] merge on WS --- scripts/otoferlin/automatic_processing.py | 30 +++++++++++++---------- scripts/otoferlin/postprocess_vesicles.py | 13 ++++++---- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/scripts/otoferlin/automatic_processing.py b/scripts/otoferlin/automatic_processing.py index 7928c83..531a38c 100644 --- a/scripts/otoferlin/automatic_processing.py +++ b/scripts/otoferlin/automatic_processing.py @@ -17,7 +17,10 @@ # These are tomograms for which the sophisticated membrane processing fails. # In this case, we just select the largest boundary piece. SIMPLE_MEM_POSTPROCESSING = [ - "Otof_TDAKO1blockA_GridN5_2_rec.mrc", "Otof_TDAKO2blockC_GridF5_1_rec.mrc", "Otof_TDAKO2blockC_GridF5_2_rec.mrc" + "Otof_TDAKO1blockA_GridN5_2_rec.mrc", "Otof_TDAKO2blockC_GridF5_1_rec.mrc", "Otof_TDAKO2blockC_GridF5_2_rec.mrc", + "Bl6_NtoTDAWT1_blockH_GridF3_1_rec.mrc", "Bl6_NtoTDAWT1_blockH_GridG2_3_rec.mrc", "Otof_TDAKO1blockA_GridN5_5_rec.mrc", + "Otof_TDAKO2blockC_GridE2_1_rec.mrc", "Otof_TDAKO2blockC_GridE2_2_rec.mrc", + ] @@ -31,7 +34,8 @@ def _get_center_crop(input_): def _get_tiling(): - tile = {"x": 768, "y": 768, "z": 48} + # tile = {"x": 768, "y": 768, "z": 48} + tile = {"x": 512, "y": 512, "z": 48} halo = {"x": 128, "y": 128, "z": 8} return {"tile": tile, "halo": halo} @@ -178,19 +182,19 @@ def process_tomogram(mrc_path): def main(): tomograms = get_all_tomograms() - for tomogram in tqdm(tomograms, desc="Process tomograms"): - process_tomogram(tomogram) + # for tomogram in tqdm(tomograms, desc="Process tomograms"): + # process_tomogram(tomogram) # Update the membrane postprocessing for the tomograms where this went wrong. - # for tomo in tqdm(tomograms, desc="Fix membrame postprocesing"): - # if os.path.basename(tomo) not in SIMPLE_MEM_POSTPROCESSING: - # continue - # seg_path = get_seg_path(tomo) - # with h5py.File(seg_path, "r") as f: - # pred = f["prediction/membrane"][:] - # seg = _simple_membrane_postprocessing(pred) - # with h5py.File(seg_path, "a") as f: - # f["segmentation/membrane"][:] = seg + for tomo in tqdm(tomograms, desc="Fix membrame postprocesing"): + if os.path.basename(tomo) not in SIMPLE_MEM_POSTPROCESSING: + continue + seg_path = get_seg_path(tomo) + with h5py.File(seg_path, "r") as f: + pred = f["prediction/membrane"][:] + seg = _simple_membrane_postprocessing(pred) + with h5py.File(seg_path, "a") as f: + f["segmentation/membrane"][:] = seg if __name__ == "__main__": diff --git a/scripts/otoferlin/postprocess_vesicles.py b/scripts/otoferlin/postprocess_vesicles.py index 0d93217..370d38c 100644 --- a/scripts/otoferlin/postprocess_vesicles.py +++ b/scripts/otoferlin/postprocess_vesicles.py @@ -10,13 +10,14 @@ from tqdm import tqdm from common import get_all_tomograms, get_seg_path +from synapse_net.file_utils import read_mrc from automatic_processing import postprocess_vesicles TOMOS = [ - "Otof_TDAKO2blockC_GridE2_1", - "Otof_TDAKO1blockA_GridN5_3", - "Otof_TDAKO1blockA_GridN5_5", - "Bl6_NtoTDAWT1_blockH_GridG2_3", + "Otof_TDAKO2blockC_GridE2_1_rec", + "Otof_TDAKO1blockA_GridN5_3_rec", + "Otof_TDAKO1blockA_GridN5_5_rec", + "Bl6_NtoTDAWT1_blockH_GridG2_3_rec", ] @@ -24,13 +25,15 @@ def postprocess(mrc_path, process_center_crop): output_path = get_seg_path(mrc_path) copyfile(output_path, output_path + ".bkp") postprocess_vesicles( - mrc_path, output_path, process_center_crop=process_center_crop, force=False + mrc_path, output_path, process_center_crop=process_center_crop, force=True ) + tomo, _ = read_mrc(mrc_path) with h5py.File(output_path, "r") as f: ves = f["segmentation/veiscles_postprocessed"][:] v = napari.Viewer() + v.add_image(tomo) v.add_labels(ves) napari.run() From e99ab19b527b5f5ab7ac8e9309765ce0654e8a75 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Wed, 11 Dec 2024 16:23:15 +0100 Subject: [PATCH 20/30] Add overview table --- .../otoferlin/overview Otoferlin samples.xlsx | Bin 0 -> 8293 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 scripts/otoferlin/overview Otoferlin samples.xlsx diff --git a/scripts/otoferlin/overview Otoferlin samples.xlsx b/scripts/otoferlin/overview Otoferlin samples.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..b4f7972cf148bbb5e729a88063a0ffe98770f83e GIT binary patch literal 8293 zcmbVxbyU>t);2?TgEZ1ecY~C4BOom>Lw5;|ARygcA}NeRx3qMZbV#Ql-RU=Y&ieHD zzUTSl-LrlZYwf-7+H+m^y0(Tg0wN(CDk>_RyjO%i+#iMk`)=wAvUlU;cz7>~Z&T~w z!VEqDhqGMpFG!;16f~+RG}7=a}NyN4AlP-iSJ^in>+Isu_;j1!H(Zik(%PQ1n;UIZ32yFuaZjM z8;DR7J{Bk#-Fp$_Q$MhDkcdr+$)%u*mqN3--%C9HIhy7!ao~GuDnUd2rmM60De_Pg zejt~XJ6M&@M?qyji-?lfR4KwB^LAj9KtmY?H7K1geIK?7!|-r$8vk{ha9|!@_MESr zTIiYoo*`K4?bJ?I8fSh0N*hs#R));$-d(6+ z&T7>ZUdMK34y5t&GL1KCGc+TRuvUlb&tRpZu#DGh%%}SJn0UwyR`u!(=j-BD@xJgB z1PhAWRw>skdMB3I4=Q!OUcC`u%~`E2veQ_9QVkUvo3!^aBwwy)2b9PA5+cfcS(UE9 zXz#qm3FbhLYm>IRK_@hk#8imxTujcbwHIIy3`|j$p(@)|Z=#(;OCde6a;pA9!nrZ) zXj;ZBVu_SO{OPL}^;5{wW0{>$<-Vp2y1=#UrQloSoH$x2CMkJGgkRp>hEpl_kqP-x zA(3nSaAY!%rSdp{wm^@;D1c42pBPyD3i` zb3^k%xfLOEs^e2A>GMHkKt56X3-@Of=nyq*$+L8F{`DpIb&s3Xddrb&S z4Tkkh{;VC}^#R}upG{!}JhpdU38ZFkIMDIe8o$CJQi>z1+a6Yo^#;?KpWoeakum0( zO@wWll@SCFNNvfKJ>4!&E+WDb)2CmX#EH+7`7oMbbZN+U>Le~zA(23@#%-9%6YJK~ z2Pq$z*E}(NGUN;YP(&HXpQun_1q4C(ry|1oR}s0nd)tHD9!h9W-#F&I5TVaOnPig_ z4NXTNQBFFj)7Dg1$BISA;H(WGQ%puq3Q@h@o$d}Sn;BOt(Bj3H#k(}g0>{K~BBvcL zg|2_srGWVk;nXF*sXLn>l=mSPbNUYJOS%F4mA1 z9x8YqeX^t810APile$aRjYU;g(?_ebu55?0B1`EiYlMBs-@R5t!>!1R{W0)yy(Vc) zd?i56s&e9cmI}#Tk^|J4dk8V<3ynYK%lR;A2o`RTFoO@pl5Q|_@fSl=0I?KtC0L0Y zM9f-CE~@x#Gu9Gb=~F`&Tpqj3Sn50cl{_9!@0_X-l@jaL^2nMY357(`=bF__$Mh%>65uVnP<@R$6?ds!#l?N$D0^c41GT2?xG&_int_I{7gCD zlPgtXM71)CGH5H9(en`$&RFHjO@-eVXaxg(LxGgdzELLT2=M zMD>lh-?c-J-Wk1?*q{u_XAPh_KFJT@!>Y(t+~3O&pAeALS+S4?#TKk5t&|lE^ty*= zN+}6_zs%Ic)?O^mh!RF`gXY!bVfsAE0l5(GDKR`Uh}N>vTB~8l#<@kuc-Gr_2o`c|xZqAU0VamvobaRxd+ zA5M};Qrj*~RYo64uhAQ)A)qA>Z(7=*$|gP!>p^;EVo{e)g}yP-`wjjIG+$m)tIO+B zlTDO^qx<}_qmbK-tJo0?=$G18*KzhmTYcxdjJG;P0?f4=#dAo7LpA5)g$t*J-c`1b zJ_$_E$?`*d%CTz)%qr)TjMz)Upb3#}$T> ztjVDIcHh75PQ6_f%5u=A+8E#Bo>co81=@+@&+qZd%+Mha9H+EDNA&Nbv(q%qYbn!% z{FL^Pcv{f-@uydQ$2O@~W*^;wTvbn}q$`(q)28r-%9PVqb*bpx#oqSY2V7v80oyOY zFoB->pKyWXH@Uc3gFxU27B4;n@}5A9be5a1Qn z0c%S}>i{~@XVs2(_3%?AmANHU-Y90DZ96tYlBLA2jx!jisHy@+?QOy0)3wgTUsHiG z+Bef98<02$A_@?W?c_U>=^*p=qCPQoKkMb+>eT*AYthFlos z1QPQ@BMi^C357eDb#d~;)*AO)74iChjCELfM&mTh#-egsS1Y@8D>Gp-g+`o4u=hpe zecrlcd&@0@!aZpr5k>fDLt$?Oga7T5FcS9d;0xMxUFi2J3emMGvGMJUPOZ*N&nu=C zp+^be$7m~@Eb?Fh&EdCPb~5Xmk1sb9;GM0X6^uRg~a zVdTamRGMWE7Sz5ey144bpqHh&r@7SND=jfs#!-%2=8)?8n#EH}66T~B9F$aL47zOfi!qE_WnVm;W01$W5NgBgaXG1Pq*RxUy-7LsXRLWg;xx*! z!h?htU76>@$&)#kw$6*kVZ$La=n~)SJ*P`wA=v)&adnBF()bN3Ljc2w2l>U)rEMRTX=?si3Qt6y!i-wy?(Y{=Mc&%bN+7d+qSh3Ine1 zb>2RSmeAzL(FSwr^27I?Z$xW)#GP;(5WbjJ(evqB^wlXCyf6*I@3{#F8^phfxDdj# zPEr|XY)Wx+Gib9Skh0FXSF$BYe5BRXZ&S@Gq*_$0k<-_ zY_&B)TzHOR{G1cZr8V8eLqwlEBT~{VBe41uowHc)lNGYEOvq(9A13Y#GPBPnW)PEL zq1L{_@xD1O(5God1?95)ptGG_OuwO(emDRFETbeE$MwSNGycLHk`JLZnPH6GFt5nA zpe~Yaz5s_En3w3_H#h(x8}foVrbySeZ1I^2AGeycfjK58Tek)TZyzW^?RN%{<_l4!h)KqnC3&TcO?^c_r3lWv3Od0CCl^Ky}u=3rg373KEc>w4Nr zD6o=*i;g)0z#ClYW!1atF_0Uw)2KmAnKzJYv6!?mmUW+bU-IoDn+-V<2gLmno_T8HsBz>g+T?dnP%ZZUrZPVCO@GY$?wg~SW}WqYdy+(D!pT-gb^?Pb zJHe`ur-f%KY+E@G))~la*yPjGTSfWd?^^LV`jz3NVB9O85pwU$T@-|+S29O~!CH<~ zt~;9}9G_hiEj-mtDqc@MA=|^6BrH^mYTl`J#F$BTcjvN+6L_j_rvth3UZ(i=#k}i&1S}C%lbO#K4$^O=;Qt zwoZqI{fs-D2XZ(Vu~D8zf`j9s{6CNb&o6R#q3X8!iyWM3 zh35q`@XssQ%jh5Xl`SRPQy6MJ4#9j>ZZL9}d94qBvAye-E(9JF%9)+PGHBUdX3OR7 z6R^0L?zkOj^{A87kHk-3skjNKzwtPp-j4OMXe6Tjax6v#qXw^tO`&&b z{GccJTtM=eEzGHjdDh9oI^)@6QpmFXoMspk;gZ!zSgef2T0p zejL4Q6LZu)@MHW>;BY!UeANLTvgs7_3dyExbsH<2?}n|3*k~56kQz84PH$0+vE3h%o9mhEVDg=yE{ZWi*f^H(;-SdFhlZzYm`U z5Tom}YKqAqicB(VFSSZ(62X!$8N-^ftgu`s<@SXDt-RP|Og{ZNsdirW2Pa79zv!tqffiC_+Sjg=IGrft@KjbM9DPxrLw5EpXSd#v29bn&Joo~ zan*GUkypwBDr}zRtJ;!QwhG4^LY}XPpyq;&KIxzCuC3~Z{DiRi*Y|U$)%p6IT9#&# zay1qsK3@?G>+<}8Z75Z?b}_e5sPwWQe<1#yb!bZfQICthwE^K3_JIq$HA4Z_yB}S1 zO~_Y3M5K)!?PLKd;j;Qidv)11(86*n#c#oKoU#$;s5Z0B`Sq^;kLlM66|4K45?|j z9g(G)wpiSs53PGvR_){SpM~wiIHRL%G3ah5?MvvjqNV>fQyNlk7Lc*?u;`Ve-5|Tz zoUUnxYGpO^*E zPk0-$?7QqW?lq73h&mz)Nm}w~4>?(6#$2x4EVhNNb-rttVN+GiYa^4ceBCf(H@9 z2GPp4Kl;eq%lHXtQnGxR<<2NBGi;J+85_~1`9dxLC?Ecr6;$D z6lzYCDl@!*>(*Qy?G%VVhd4criDD1Z@?;hz?K{7?k+hvUIQeK2=M~u|sgsz^RW}o5 z#&y^Pu4{Pb(P&HMQW>9BPEAmB%Tx~FjNh-Z+;XWk>I|e=I*qert^X*+w0rI;X`HfA zQow*PM^`ZCiEKTulkFK2_|~kag__ba7o5i38-_aMni|Xdbsa(`6^DNlx@QA9hgo$Et(0$(eHY^>v%b$m2$zm%{ z1haY*SCf~(X>_x!@Q7phW<{E2BEDoxKpRNvtXig0<)U~qE!YhYRdOQ+PI0KcS0%7w zPPO@jq;w4`t$vw&KWQ`0##h-14Aos9DxcipM_S|YdqFf7Kd&a}Cw&})6iuZY9rI3V z=}_wJ-o^ym3zgx%NU7$|mt32|0YLa0Rncw159@j#%lOsnfx?3y4BGj-lNVFIgE`N_ zEXM{l4?f)+wz)dq0pE^_g(vcvw9_#lD(y18apOLXZ(NU09mKE*Ue3p5-GdJZ%eu2G z+N|p9x|-khOf0(Ge{(K(vR)`^H`1#jvpq8~7&883+@(8ZeBjlo;CtpAts&ysy9sgW z8T6O}x`N3GVMJGjKE@P6jvk}?y6bbYxx+ac{}bpte5g8Z%g*T>u7mCXjT4SXF<(>9 z#8Edz;&-y`Dhrh)r6xoM*Lvy^vpX+ryVfddx!2aM`BQ<>Z_t7Gx`VAT(7Pbpr?Npv zH@IeidaYN%wwb4_m2b=Mj&rrYG;L2j=Dg+VmqysLcQ0v0uZc?a1+W-E*+QP`^{(ia zd{HicE|TX6cfkfPe@1un2^n}}#-?*H6_ezM_2Q`GMd4x*HET0j`DzGvQ$5$x4S#z~ zW_u0f>)bf#GV+5%9a_is$+F5o4pnqrczJV76_GsIrKB^)frcz!^H=Az)*+OX1}~oDJoq#r{L=s>i*n zn2AH%aSfyz6IZu{kIuKENCq)J2-nM&0N29P>7wc5NBj4>qX#(pKlS)R0WEZ%ee>=+ z+RP$L&N+u&j9e>0hhnSD$B49uMZ%1HG=L*n#g`r{*b;vE_PfwajKLZx$_J-9s!Q{T z7prd!h9XTD-p(yu3geX2Vhmkohv*QQvFel!Y?4x+GDB(Y%T5-5^t13BnvG?au-{&n z-$i3LQD#aXUzRd4^^`C@yGJl&CsNOyr&fms9jPtc_;<9B_wKasM?t=|t2K}2beMQ| z)PiPwjuH*Lv_tHduz((B7u{={G_LX|ad2c2-xA$$ipGn-!VbH#?t0_Eo~eez7FbEO=nLpaj>6DH!p z{_#L33k_E%XE#m@Cs)ufnOnhaQ<9KVF&4B`42i zt@UkP1ynqB_|Xn&4xP)NAfC~}Hd0`!5YwVVKtq#2IM{u;aEhpAc+#$zd47z4nLb+E z$Ui^BAl$G`<1Boetxum5>e+A#pJB$F18;-Hh}X7cgwepGMlEraYt}l>-FY5FIc*^$lNdd_M1XCp9Y4QGyE*nD5ME{A2J}!5-&s=Opmta zt~ka<1#-F0Tr<5YS?(+K;C6f8j76nXXu~xPFJxU2BmjX_yJH92-=PhUW1kGI5CO-awWGV#0k zKB|mIa(dPo8_a+qRo2s5>8vwD5t%R?mCq_K`*yR zz%2+}Z;0nbJVn+BF*F<41&o=5*sufU!xt-GMyC$fe0;U3 z?bkK6-c`mUWg{+)%^@<{<@hCoicN{UYjx3wrKtkXk`I{Iy?UyC2p)bLP+%a zMeJfNz|E;wHNpo>7qA~4LM@>-E`BFdj5~{GThGb*1dbgGn~B}&E?-Eot#A1yRbci8 zGFzkPv=JRrXdfVg?XjIOgwQsBW_TGZMc9Ia?wONTri&I|^l*FHk5xc|$n#0f*&u=| z+Bd>iM?3s?%GkiAgUdxH8n8p2uhsmchw50Fg7Zs(&H4!T{PVgDWt<${L5}XG&%K>N zZpIJm>e3Na@8H5+ykVs+WA>Z}&?HtP#Ne@LS^|G4o0B~SHs;@5%%D9d3Y45gWn^u+ zF`4=vKlHg$P2lB+Q5JauF*!8mf_e>5G*!yy??~w*at(m{eGC`XLgo+4`PXNXF9=^|Qgdx`M5mu9N4_pcI-G zwr!tqK=?%;39NEF#ap9Rv%cjY3_D8-h?nAbfAA@L`TR;zvNx4&@fDp3rn^kKbnCZ} zt=a&Y%Iv8nO;WO6Z58RmR_}(yT7K_I&$Hq)^oyt9kaQ08Hb1-CWI(`&3d#B+xTazk zH*>|HYpJCxV=PDOv*6Q5edK<0L)@e@otZxssK?(Pe~n&P5xf10+7NEAjI$Ul&U;Bu z;d*g}dKom?9fSp1@gktErxW4#M=gGIM^~qZIl%-8u5%MQw5XY#^JZ61cCu*+kKq}+ zWw&&CI4OcH3|1Rh^%)^QoD=|_5bk#?_RsF)Lo4<_?T^On-+_O2oF1B!zl{kt)L*U2 zzk~mjr5~D7zs(*-60qRkEvmmm{}e4AbnD+1i1Po({U0j!-&y{=qyJ}?9N2vRJ^Mei z{E|EV&hqCy_TfJN+b;2cW%)0+`@i%2$&L@V-rq(B`}wdPf8T-s&hRHxKd|U;!-OsG ze{kyG;eTS(1D^l3Hqt-B|Bdc{NB@Zg4> Date: Wed, 11 Dec 2024 16:27:15 +0100 Subject: [PATCH 21/30] Fix table loading --- scripts/otoferlin/common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py index bb6ba99..afc76e7 100644 --- a/scripts/otoferlin/common.py +++ b/scripts/otoferlin/common.py @@ -47,7 +47,9 @@ def get_all_tomograms(restrict_to_good_tomos=False): tomograms = sorted(tomograms) if restrict_to_good_tomos: table_path = "overview Otoferlin samples.xlsx" - table = pd.read_excel(table_path) + table_mut = pd.read_excel(table_path, sheet_name="Mut") + table_wt = pd.read_excel(table_path, sheet_name="Wt") + table = pd.concat([table_mut, table_wt]) table = table[table["Einschluss? "] == "ja"] fnames = [os.path.basename(row["File name"]) for _, row in table.iterrows()] tomograms = [tomo for tomo in tomograms if os.path.basename(tomo) in fnames] From 079444ffce73391945fc282c939f6625434f96fa Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Wed, 11 Dec 2024 16:34:14 +0100 Subject: [PATCH 22/30] Fix table --- scripts/otoferlin/common.py | 1 + .../otoferlin/overview Otoferlin samples.xlsx | Bin 8293 -> 8416 bytes 2 files changed, 1 insertion(+) diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py index afc76e7..d09466c 100644 --- a/scripts/otoferlin/common.py +++ b/scripts/otoferlin/common.py @@ -53,6 +53,7 @@ def get_all_tomograms(restrict_to_good_tomos=False): table = table[table["Einschluss? "] == "ja"] fnames = [os.path.basename(row["File name"]) for _, row in table.iterrows()] tomograms = [tomo for tomo in tomograms if os.path.basename(tomo) in fnames] + assert len(tomograms) == len(table), f"{len(tomograms), len(table)}" return tomograms diff --git a/scripts/otoferlin/overview Otoferlin samples.xlsx b/scripts/otoferlin/overview Otoferlin samples.xlsx index b4f7972cf148bbb5e729a88063a0ffe98770f83e..fcfad905167521c22b6a298b9d8e14f715eb0547 100644 GIT binary patch delta 5794 zcmZ9QbyQSw)5qyrx?H+;>244Za0LXUQ@TMKrDN$_Wnt-(PC-~ikd{zd0qO1rY3T<& z=Y1c2?qB!Z`#E#&Z|=-|XJ$=yQ4U{21s#JN1qTNQWyrq7N!k3ke}exU!SnFZ4O|8fziCiSZ}KU)=@!RTKab)p-h{6iKWEH~kO@?4m5VKm zR_V=xvHa3@os77~%1dA_!>4lWj0;BGti69vu-qcLtjo3PfuQEG>G=+&ts0AFk3DhC zc{W#Smoqu!Gmn?NrQKr>NzFVilW=(wiAmrlm4^RtMmpLggemjaXs|>NqNGuiiN-f6 z1z=UfF_P=1<$+PI2&_{+SI`uy-QOuK&Wp!>=bO01RH3~MmP0Z`P{e>5ZCQIP=#jaR zP0!}$ZL#ipXoVq-+drU1Nncor@w*i-bE7|*sR+OyZNnH-x>mNKnOmo=XfatyXVTvs zF*>LoJ$GuIAV|s>f;#1q$AowtWKdWSeU63)LLU)+8MdZ92`zfGd2#FaV2+OU-34Ed|JD{J4OUdm4;(Ak^#uX!O-$3~@XLptG z?I00={(T6p!SmA)D2IlZFwWVB2=aqX1wCG{Z$Pd90r)kw#lE*-Bs6G-6us&`Y})%#0# zC*AP*1pU_0=A?phT6zhn9_w0th4sj&28JCZSuH(chq!v*Az^T@A;?T>d-cxYsrfrC zRf#GyhC0J75p>H`RTRJ@Lnx?T$a~~a@zzJUR0-|bxQ+m!MA#`olK6b8)N$y_!^RSU zTu)w48gahjbrH*NCGB=qcU-nsiEAQ<9VWcKTCBRIwKkd%ZASw-AtpXJO8`;_x_+cv zR^Y=si?K3fUf6Td!sKt$lLNNq`^CZ(CTMN`c8wugoSTJkkL1da2>KK6NnAEuN$@RR zkXyK30giFBS+`^rXOet~bj`r4S5_YUb`+|~FRH9MIwcIwkN1DfRGcVxCCB{CjE;!e z6d_R|BM~N3vp#sI%!ac#nWbnuY;(MEI#>Gdu51D!W;PVkPC(cJGV|je^Gb`r3}rVUoaqLBj2(_O$V>-XW`zrP0Zj~G2~Sz(dsUT!y#)an z2i65%0t%H6)4_z}{jWk#1+#&E+2O(`)Mh6o$CFu)=D%_iH^&kvTI~FVU05EW7C;?`FxVUv^@n}Z1y!5ugT?CjiW6f0?-pGz}XXep^JOL7LRt@ zeje}IdUK{|Knbvg+0>Y`8c^Qh8(w=E4rqk&LQ}fC(vWFsW_=!8`p|^{efnSbTyfMr zhsNDcb#NDSg&GV_dU|o8h;m!>4JF{%(>s_5)d?7YXWqGJg>nabiPCmTcUw${{Ag_z zV3+lUyxgZwyI-|hXWp(+dewHq01|b4w6BeyhxcH8DT=;4zQyks0lNt*2E<>RD)%!>n?erao5Hrgd5UJK zT*384pkC>S1XwKgnZl`hcJuh$B!7a+Sm&=s75X~1dVBI@w%#CLqdGie#ZN5{BnzS$ zMh=WJ<~s*0y792cCI;)%Gu@dmEl8oPWQd;^FR5>41r1MgvXtP@GuaB=n-^oYm)FWQ z5uwhhqZC8?22Yc*`eB2!vV*Hiw@u(xs)9=(We>ZOu|sQCw6iFq2B&B6dDfftEB40u zwg;O5eFdePrl0-FO*(CsYWVdhPR-qnJQX+Y(0|K<3IoN~8YT*g1>LpJyw;VK>N)PtesDtN1KrYQ6JoTj()2#o#S ziByo+S(>~2XxUEvbXC}Ba3m-&qp^)X)l8wP6I1yy`6#*ioc_Sv``R{E?7o}}N~;Nk+;gh?xdSusUk zrdYYorrD7Ogd2oT9`jzI#R#R%Ms`7(Ip6F%``AA|VB5UxamqehH zZGs}a^jqpUNuBdf%M?W4Sea09i}naRhkx9xo6TLA;A%!gab$i@*LBvPRAYoT0T1u% z*SsLkn6c;Lq`Jx>qK>pHvWwNXvz=;P z8rC-%PWHxN9QSaq8aR0_t&L3p5e!=5!g&f~0uNfpNW8I@p`Bfb&I3Ni% z@HsE`y!LLR{EXw3r4NR=<~R&vH9ygaCdu@}2WC=u%M6tQg+ShFpF2E>A?FR|Lgfn^ z@sihVvkAk){OJurCeLeiRIr+PlTavv5E@P;`weSEH^bFtF&HWDs>x}^A45+FVC)Bg zEr+T@>G>7l_gEEatlr}?-`CT%xSPFW&@v~OAa4s-BGyA*do6h1+*BfeS6SV=^7PyX zi#TsVx^In}mAwmxd^7e-#~75|W$LX<*r#fec3<5o^}1*x9l55iSism%dP7Vmb0pE5 zXGg9<5VS#77(H$AZVzB%AF5yX1G)WC0V{?!(6XrP^2>?&I5KOt(pWblN&<3Td)76h z5{kJz4f*w~YZme<`zekDAo3%Q8}8f!CmHD2Zu8KQhs~{xqM^RV_vU0YDbSS-b8Zj>BrBlZ#+3U!E#;!sa6eH`4`Ql=RnO=F{=L1VA2|QU0d9|DkU0 zsZ7BaQ9tjgZ0mapVQwLhFLW9CgWA%etbi`6-VX7OTlH0&nmlz70q^bvwEbG)hq@pD z@~D@a_y#1z6loUr(#Yzbbv~V`eX2*v+GxhcM9>!b;LOpUGdJ6Mf->4Gagc}DIh9v6 zdTHY4u7x*c4KitXP#o7GO+eB{;SN!00E=Z%&iP^)ph>ynZSZR^ZUny8aNQ{2lNC^K z5kP3^Z@O0`WCfxiO5fg>FoIbd9Zd%MNqXg+^iaob z7o`avDQJf*OQ>Pm*6D9b+lFGS2dAD7x3Zi;a^2otYVVC#ZyHccgxYT72~TODGADT( zjh!WW2QDzwU5&lnfh6JGm(kPx`!bq`Un}C<>3df&o*j#`A`d;w4cbk7-M6|P*qTqW z_@G|5;;X0iG`QtT(!0U5618BG3Re9gg_x*1R1Nk7n3qLeCG z_d&c!uLbB^B=KZvn6Rs zuEjP^$L&w~pfQP}$OnVfW80z<7Rbu zn>Zbj~<4Xg6!gYFZ{1F-+)uonUAksEE$-bBCrumnjV&P zXp&vlWB~kBo2mx0KyOTEAFG7+tuFbPwFfxR_(tbwu?pxIygo`Y5l?1h;7EZxsTXgv zz-pjSH5&ud#!@Wj@tqH$^x_l&i;VE1F$W|{MgokM0_i*+X#O%yHcQQ3zOEfM)T>C_p^LCZ4+vv z6^Z!I_+2PEQIA)rLD~FaeDWLYcMTTrbPg`RDo>#6&GJW1*j?(^EBu<*Za5M)_LU@J zsHr6ovjECoo;<&ZWKDL@b~HGcR)?1N3yNtW}Pu9&pyLio3S5 zmK(>+9D}Yj0%^&d^&Vx$v0uQVim-g8SS~S|hoVjV1^ZV;SPgF`DO|}ewAu67jMB{{_ zyHMAYv?#rq2-^=6o#h$Ty5(0$Uj=_@<#ODa%>0Kf!lVQN)b%A?0N06`1M_b)OH82i z1HV_VCjB&4X(s*3e2C}veK>hw)sN4ZPYR~L!uI43U?tP?t16q0uCpKUTexzR0S{WO zZ@S6){p_T?MCHB3x!l`Hdex-7b0s)pv(?f8C8Q;;W}6ikqb(wVJDZ7 z9rk@{uUIw`_}+~LDAtNNE-X2`aK5}l`<>ddA{2O)_Y-p{)4!>W00P0#XXD5zd&!ZQ%v%$$*- z#b%nfR<%Jg&GsPMS;B7u%3HQ%*_|p|kq`V%30HZOh1D?&Qt-%4V%E&4b7l@9C#SZT zD=4seU-;GlW2W&Dz;Ydc)NK&@$##;ax>>1>+c4$iml+g(d7*MC+|&}U^|Gk59D`U? z3SSEVpp~=D$(^r%XiI;kkGM#xE-xKALiWXdZl?Xw{lo-6C+b8I8pt?1lWj8*R9iCj ztdr+kv0ey9g&y$>Qcycs+#*>HYDkG#K$~}MFs-_wN(&G0pkv}7C!BSQ61u|!EG&C` zt!iW^N*>y8IJ^mZoVa^Nd=<5i#BaQMVv-FV4g^Jq8TjZg$xZn>Wy~^mRGQ7pE}B|+ zG{459nT(2Ge_vul$ME|PU zo^~t5FnU(GDmwyekobGX-1pSazQFuYI5?DmK=R9`J`md-qw{fsQ)bpYLF89Z6uMX7 zdkm*6Ae3M-x&jFT7USrJ4%|>h2gOuqU-Rim9tN8EINH7SfqgV@UMB%|ry@NQcJ!#s z!wYW>hy4%gNTrIECe;G*e_q$3Bl3CoV_(#0du27AgSV8jO$c6XMX3{$Osp+i=saJi z=$_!e+=Oj-+0NY9svN$}|CPvk^(S<(qDv^GF76$Zc<-IRH$Dh58Zfjd;h}2>KXLH+ zEna_<6&pKGOd_LFa2Fsv-5zswm9Vj~r(@^%su$OXU`E7Z*}wSt@(sfR)Ba_mJSN zMoF%#%Jp!e=V&1}xL2~`>A*5Yuk93>#mg^=x>~NtwM#M47c|D%lR~D?jr~3iLZEb@1K?b z@lWjzP5B?$PNfsW9rBMe{IfbkOwbAcUXu~f)1o>eIO%B_f4};Bd;;Fv`tKEnjmX8O zMg-I2{{jE*5&i|M2<|}&0YP3LXD1C6R5Wsw|J&ro8M>`iZ%yCZMBXp%sQ+vG{*HnS zaYWC~@cSG7u2JOwKw=0929`g}-&sQP55tP^XJGup+y~76#|!a4%o?JRf#DDHx6jD_ cVQ3M*7?^)Mh?bES?Kv$%k`auBLH`^15Abu6?*IS* delta 5693 zcmY+Ibx@RF)W>(}Zjf9$q?az0Sn2K(q+29K7g#`g31N{Ek@x}9jdV9lNQbzTh#=kR zyE^YXuRb$(=K13@=iGblx!*JAtSEewC(_lx#G(S=;o$*P0;-8PffyY*Cmj zXx{`99A()^MP~CD)7fOZVW#Fu_IrCpOGeq6s3qgub8Vsp{rZxtH@(>-8KR=xQ!NIZ zZJ4xtb+Kmi#Ob)aQ%$-{=|MqOe#*$&L8Gw(W3pONQ-4XQbt!#_`X6xLf;miSy%GNIAW*KeEh7>{HO@U zqO={YG1QvH9=@J~ioC(eO=2r1qGRZRhveUW^Q<5_v|>0cq4sVXi%S)8ggKwhxk9KO zpGl54(@l%&c(}=Vgb$yB9qaOrM=!S$7?SMN2pP|mZ)g2UZ7pn(%BxbgpreywIzuE5 zuz%!+$pT!8e?%#(_v~I*>Q-4kXO}5gwnun(rv&O^@7$ z9_EX8%}t|mcS#FAki~5=SV7z@7a36>_p8e2m-BVDIRVqs3dop&nO_Z4J3&AS>C;)_ zFc$actKrN7&HF~7`cs#r)aps}jay@CZ(c&#ZO?9Rh3UESZKh*3Z7L}vM-;v*RX*A( zOD&})mNVm6pCL`oS9(8zu)MGkJMny=Q2h|Wp$)dk7J1_{FjQ1E!m*@xY;k`y7(-VB z7cUFvBM1)z04Tx)0RGOG+83BKSich{5tkpP<&!T(*0x`n)0lX9GMBPj9K*C!JgiwL zAu4MGHIPp*0`?LtXdmsEP^8OifFEiGs^y$+J!>mURd{fDl*Kg*stucPcZEKfYj{rc zIo&?d;CgQSTTzk+HKPMq-8r)8j8O2WP;Cbn?r?UjK>rsCk{VlAPwDYTvfAVxUa_$- z(5M%evP4QjG~D8Bi%PnO$C$JrX1!&vQ;mG+=VXtQe*$Uq!W%px=Q<6qehqFS?r8W4 zTwn;E|LOY$KMbsd3!bri7*BQYo9ga3p*ZY*46VRcnO z`!`GZIWZH%W{WW?Lr zCk{97K$~J_gj~Iot|T{bYzGxg_C$-MHG!`Oi<~N?=Tcqd1762XElfI)H`i&flTfbu z*R-A!f~z9f%J6vhZX%BnOQHRP%8DGF%E@!ZU{=B)GqsS8s5TyMzYykc#b@np?ZQ`fx)Ysy<5VznElG5w*=Bi+?x|`x)W`~;FZs=EZ$8{Fe7qVWKZxa z|B*mSC8q?A;txDhXK6`!SV%gYVoj1&esrfW4Oz!#%*o~~0}QJqF2-^!d`&3Iw3QKB zAK(xY(kCS;-|1?JwQPv{#88OXt25KePsWTPD^A`fE4=Q6K&Z^*qZ5vXQq)D27!jE% zz0D^pn712k#B^KtfjmBh=UjRTVew<}nBALD|NZF~T7+71;+?Gg)zhAeAe9vBn zBSba>i1DJv<`Zqt*n1=y?skD}=&K*vVr^*dRlMNUw%p`*(sM8-)}9>>-j@O8W!HF_ z8IfLKM7sz~UOuKLSTEM-EXd&z z6ZhsW;A+lOD!hhPn4Jd>6pgG2a2j0m8_A2>hPCL@Fy)Wr*?oQcZSuoi_Fegx^BjJh zI8q1jQw*LFI@B+#b6^N-iuUED3gBzOI6&^!=MyO@P?I)*;AwZK9biAs2T*y;5IjR{ z#!&M)SXA3i-6*TS*kMeyf8TosRMS8^)IMu*y>?jc2NXC@pDQX!NixawNED0|xW+and1dTJi%-+Gp zM7>ZtL`LdA19@+6jthSBPOz4F)A=n>x(U`*q07RJnEd+2g03sQe4S^!2fX(*@7KRk# zW2qWBiqzZtmz)*OV;EKoRC2HkRiaYe87=f#qKNKQnUCLQUzuT?Z|(SGNg%wkYrmtpE%^mXlr2A_>YKyr7HYPRBRLRS z`UBvbQm-?_9qv;I+cPA>z7yq}E~ju=EuBNHVpTV3=qlz_yeCdGApy$yPhv&&IBcX8 zFgA__hAzQNJ~z36XyrHE*3;2iD@!jv!e(N(R%cU|egZd!OzCGf4d+U_mx;zdWaEGQ z(NHLxeBKCf+%UcFPzcL$HgmQNf!zBw0U8> z!z-v0;esIwUSOhe(3&+7rz{Tbg1f>RlNFq|Kt7Q#YejXXQNgE!62GdwOF`EIR~h?V=O=JKq#PuSZxVAt@@c2Y$@^?7^up4*vF;Q zkNjIquX1IL(!7nIMX9KNuvgtYEzokMtLcXKyN z3&WYS5cK+I-(oAyr9Bq*w{3$|2?goOrhB`MIWEN|RZbdfv`)t6eHnR96L=^N1C(W} zD%3s~F1DI8V>EMVIqa{t(4gDU^GJ~{H6=H`=2(RX%IPp}HJ>1Z-P%35#t*^JQjP;L z7XIXkCOn7n37(opl!vnB$$to#r0S@aGp+v*vGkVDxG_V?_tBxsS1SU3@;NRxXSPDF zyk!Vd6EY9^k{+#136>;Z=CajexuMUSgYauXI$|2d$zKz!hN`^P+a+X}kqGw_-<_iw zl&>AepmVjNep9vg$??0^0}V$g2aOZChJjj6Oxpt(zDqi})qLDU;N*#iUFghBrn+uo%P}il47p`FAD3XJyzAT4AYZrc;2P@ueFPa!>*zc74}~^Vfu_jX z(Gw4YkoCk>=(!SuKOHy^tgg8y7d(#HL)V0!%C8Z(+ZnKq=qokH&@Ju^?7R>5 zT)hXSFP^y}1>_cut+TXh>Ucii%HX?P%yd*)2i0OMs`-0_I&&{fV|@1ifp#B!Iu7zj zAB8af8M=J*#C6>lZT{fi=dOFFcO*vL123U%&u2Rj;#HdTy7aNzlDbqrzLW_&_szQL zxx18E+`sfbf^}?v-@)@^vf3>Wb>g5rZ`$6MwurO0J1YNG_j2dKk0ffE84a+*##=`W zPJ=9bq-*5_6p0ms#IM}?FG$5*DMU(-iML}$2x|PTKBuWUEK^P&_*g2Tq2!QLWtoK* zD-=Zn-EBaHLT6J5u2W3>_Whd>`HZ#HFZ&+<%Ad{FhdpUKDtN>bWn?knXCDhKoVR;5 z2x^dOi82afkLzSJD)RlgnSvh~4OqpG&2D!%+!r7>%Abg#D>>a@%i=J|98Tp+ds1Jf zH*Kcf`SAX(>F6g_h))=L%ViZGhb^f)V}@Q4Q;+v%3EwFjMeMI%IQ}|I(NY#mfCsb5 zvDCXY^74z&tZp}JN;X+ z>NlhWL9pOv)S<7+S=0vziNYk?F&A3eQi`DenM;mX9R?TdSW3P*Z zm{LzBFKJ7LC>|wZCxDC-5??Ez4isR!-=_IZHOGeH6xw>93U5k>*<)O5$!F zQe3CWUK;t{yj#3K^=sM>sL*MQ<0MjV|KpuSm-n+JpwH;kPVOMQns&-Bace%9VpHh0U7kH+8yip?SHa@7Sv^771jGDSX z<<*}yHS*#_@*PjE{wU4D?eqi1d4j}3_QIKIX)JHmA+9tk!2!#=#;Y^>#e+rG(BtCQ z6vaAZ?b|1dWFE#NtT0bfzcR7bf$773#*`oQTeWtYZ`E6|IK7*g$8Bx{NZdMWnU!BT zcNWZsCj=7MQy3#V6N_&nTpvLqu&>E%fKB=@B3-jj_-bHPw?}yf9j#l_EJ8QJ!-|-@ z?!M)n1oiRh!9ZT~V*aQ{CWEWS)lWDSEA0ot4`MzaiTS=0 z_9@{S*n*0!ihv0 zYt@C`4)@oe=J?d8ceM)L7(ZkE_jMygXgOg|bh?G9g3e9&gZy`71{G7m6xdcNy^y&6 zPm5MLHGWzav^UU}ZZ2NZ_GRdJ)A!&_gpv^sbV7OBJ5H42+f zGM=SL@fD5)1Iy5YEb~~4F5T1EE&d@hp<@5$6O1eyo?HwUVlMJ7lqF%ahzX^fcUM{X z1KvjU-OlVF;XG~$ElP1r9lu-3n}&eEv;0WKhEm&**qbk^{bIU=a?ddqiY=pBN%o3H zqQNu*0| zS&RE>7c=mCnqkF@JtFL==Q77B52rzm|Md%g;VZL@X~~~zq}aJC8>@y9sYf%54GkXH zPH1|bQeceCtYCIWC5#qeh`zsjOJMi-1 zWqtX;nFh`44~XjLR1K^_U zK*G+b7bBj2r-im-_65=`$@t5xQxWPl3rIZSmf|Pghf!~Zz~zL%L_l9c57$b0U#_w9 z)sBhTgY}?b17`OPJ^j};$=EqqsL4evF1Or}REIKa>X!}11Y?SI;T3&}ug|AQF(eo7~tWqs{zEI0{q_`6mIr+qeF`} z|Nmb&VgS532w7 zgZ`sO)G@RFVg4lZ|K>9JKMWD#jG5&R^Y=B9{lj!2#95gCFn>pk@*f5l;lslA=U9~A c3He;mzASM7fPWbj1EQXV6DJ1U`~d*}1JKl32><{9 From 5a22bc435b27d4045886725d591307339f696e52 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Wed, 11 Dec 2024 16:40:50 +0100 Subject: [PATCH 23/30] More name fixes --- .../otoferlin/overview Otoferlin samples.xlsx | Bin 8416 -> 8421 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/scripts/otoferlin/overview Otoferlin samples.xlsx b/scripts/otoferlin/overview Otoferlin samples.xlsx index fcfad905167521c22b6a298b9d8e14f715eb0547..e825e34d9729c0cc6e9d412b0c30fdcaac805da3 100644 GIT binary patch delta 4377 zcmZ9QcQjmU_s4Zc3q~0cj5hj2?@9D1qmvLaYD5`5N}`-#2r(fsF*?zMFnW(RMD!LB ziB2TZ1(B#P_pbN%-upZM?6aQpdG=ZR*=v9I*=Jtus~WAbJ}DV95fv2`5o2g2tpG6z zV`#NBu{z0x4J2hHyRZqAa1x-W!;T3L4>M$NAL#Z#RJFit8~O!|`f8{{>%^YAxY3n5!j{Q>(MGt{VVhQwQ0DQd zbh~MG4D+=%c_%#WR5f~?9N>2{j;39&J|<;+a<41rnuF~Y2{Ol}5@2J}T`}vp6hGp| z98R-SYT08EWhzu5h)a$4M9{Cii70j$^hX3KP_Bi|W%`(fy~0=0r!(sNm@b%(5O{%4 zoQ;)=$f+Z>rD_aO%?4`+uMAEq$|#~^#j zXxKBC^rrJS(^prhGLZM%H$w=wB=0b~OM^8q*yquZJeoL3Dc3ILR1j#}&^dfQ6ZHZ9 z?zO*QbyvruRKp5nAy@-{&LNlLP9Jck@Ae#}kVhOy(vWRAE(O%SD|542e5Kx2scM{q zH+YW+!fFlRCEZ;JPVqcZ8Zws2PV}CMkGSB*9mJj5imr~fSJs(t3Q#Cxd`+nSK>a@Rj>ciA{&;YT z;v35jyTzsxIX`K5J`~aE zh*}!ex;Kcau&%5&anX8~o_;?DjP)t0ZU_WwuaFZFIdT#a{jCrbe@g^5fl>fFM+XKR zubZ`hAXC?|aF(id3MlynLn*#?utw^tCfc@%PeH6rp-*TP$(r}4Pxi9-y%XA6^JA+C z7;u__P1{5OXGW9^@6>qL$$rCcsRx{JW7ZtBbI@X|!)93KsqC!TuK{@o4JDIP$f76u zJzgc@2}cI+Y_8&v9e<&i7K!H+wg>QwHp%DG1LMa%58g7r7!a4TsZ3aY<%3XHDO!;0 zP8CX&$bb^>{fS;ae!d}aXxti;a}BZIUZm}V#F+m%E`M=VfG02GT`tR5O}_Jx@A9+D zy9h!8gFa*M?20YahaPK-{1pSC}@1H+&^)H|0tf|1PuzoR$VRK3%oVeg9~>M@!J z`~(ITlr_a!;Ssw5?iHC>*XE^u zOfi$Dg7z0p9cttFDvc0I3?S$U+ebhJ2JgkP8!{;xO@ah|(f>SXF5lI5ZK5_I5&5$b zuyOk2kxhc$wXIRn84;^KlSYn@FR%AMANYcU7hCCs`tb)E_ICO+Xa+VoNxzw6OHev) z>wK{=Dv{a{7Fyw)gQE?)D?%)C414hjF>TV%P4U)5;=6@ZxUjyM044B|A(T{<{-EG@ zjqx9@EpHN+?n17}y^990{{zDOb+zq-oSI_tW|A&JoJ~gDmE{zr!@MwX=hg&bl*Suv zlvr#8M;6>L{lRM>f!e1q%^DV;64nn86HxBp7G{4bKx*B)$t^r}RsYvUXTXH;JPVHX zJ`ag{vInkyib_zHPo1ELaYf_)taSy&fzZK`lrzl=Lf_8 zqM9q%ubop2t>+XLbxtu27{u6X4?n-7AtvTuv$XvSl~@oJyE&<=L%^R#odB!pb1{dy z&f(t$Q^|9)onT$trGTgV1zQ#tEY~~Msd=!*pT96U-x)@SAg{r$ej$2~jeGYr{G;N_Km#eNM+sacUV7+9r zhY3JJPcQd+_Hf>A7I0M%C=I)wp!#iL0;%MuEw%H!^farwLq`sla8gv{xTTx$rs6g7 zsW6|H3|J_20bzG5IBM1&rsF~%q$Rka>|jJjEm)#4;Yrt*W${qlrMz4fE6W1yT=2}h zZ2PnP+YuoEf9RK)VrqXbqnk5wfxa&t>v+8yz5^c)%_Qw@fcNkZ2_8+iQm1prZ_AM! z|EzP{Z-_@6wKM0%Y*zQs4RP76%ro&x_)J{OY*4%x!NY4O1)|PwyEnQ;$7r!T5(_EW z|Dm42FP+vWCzf1RN9$l6&!*H&_*|@#@{Md7C#@I*V9+vxmTxs8AyF|j1$- z>f$zPFqHGeL7G`;KWj->`!1>c&l9^8c=OwF9$&g#q%8O&GMPOFWkG)GH0 z@Lu1(9v^-6KxHI4CMMr>gmf#-4L=wBVd66poaL}xACtNDFDdsHYC`#`;T!F9$jRc>xZ;!CaY&6q0ueg3Zb z@7iXvMM$srrqmJMpUcvb^`7Ee71zzgoFnojYWsN$U0k*w#x@1qxo)cYUD)ER@mNFK z)Oi|}EyPM@>LR!l4lHL-FFA*VeF&^lb|y(hNS7VW*FazP!O+^Cr^A>GL+zVSwuaKw zL|O6en2ua`tWf8cV`W-Hn{d!liM?lohAK@B336$T%7 z^?tKTN&V(7o#HG~8Sug;=Cd&UO-!yXIuQOb4CQaHH8Y?u>z(vT(P+HHFwf=My!OJ= zXX7#VrhVf0^!ccSAl)u#YS42*F_Ju~<{e^w7)R+MAaT2pyN1jAhIcpX^Y`y$p1Rbx z0>Lc{p@8Qd4_w;9E%{QtPF!<~$kk!*QfplS`(SJUtVlaI(kO4m>Bqvx>i(ChpKIP6 ziRt%ezI^eRcKgYJYBZmITCR{~bBW+V^NjZgHx4c6>{kd*Bo_0+a9qeVXt|y zci-EzVD-jByNNAK&niFdlcreGK7rD(ay7ri!YJdZT|uU#pd=x&KW43xQKhs^oTQZs zlBG+i;F!&zvEbX=8(Fi=PUXvf>CaM+NR95gj~|g=6!*Pc1uZ-&5fP5(-^GoUf=B{v zR6HC)vD9p`l8kvfx4saUHqR7ts-C{}2!Gny@g=0?p|qE?HWptv^wBQUZ9nB@W`xr# zEzu-oeD8P&?vn1xbNAl0TZ9x;#|uz11Y{Qbu`|+x&eD8d$|(P?z(7h1U)obs!P%UJ z@+HR4f^20T3Xdk?#S4qrtKk804yqi0DqPX`GL;_|(wk-aRMYiVT|Ako@#`GoXYshq z#8W#))Y1@Z*8pRcNrS96>mwc5Ua}deVG5;`jzk~+(LeMB4j=_9dQU7sP*e?fJ`<}# z#DI!gt$W~mm4kAx73t0Q-cn7GI_OQPy<(OqVY;fQA>u@{&vivxeZjSeSnsUnlzG7Rvyl%be*-zsi z>){-8Tg-`0@<*R;JV?GbGIO;Ad8Ts^4yAXnFYAn~vPY#&Lp#f6udB`*+WWRVqjvuk zb!nw{yvpk$DOZ?}_8t=xF%l7-XXc-iEp~^UALutENHAy}glIYWrz>ch+_8ukeU+Q8 zS3#9<*J)kXeK{Fbq<6|in!AxUGu@acEuD;ll~sX%L(5!8Jxp?GY3tR~G~$p=Jt0w5 z45^F=D^l?^o>z2ld4^||woTOEGj*U@IUOIkV@PjAIOpip=?>Ywb(Yix+5;f`Ps+aL z0Tf^#3mjdva0DaNH;^LNcJYw;jD`gV%&{Sxrv$hJI9-Y?$@I1h)=^F+*vl@}h?8pG zz#-vYE5wVDx5wMtd)1`!T6e|b+$xiuGY$_d#^wD3WJij)uZQ2XNsc=*5qWW&%$D#z zaw0x8b#t~(IpU1DBU$DL+r`O%n{Kiw{vjeFN+kaG$y5t*sQfkbJ6|dJt0^&Y$zSv) zeJ5cV?NtlWL`+osz z#C?SX0>(;$uV7=jX}K@l{Iiaj|L5cUP6=DX%|~^?T%;8{$IVJCiv7h6{cGtw9z`q{ zl$CfDD+}fR%TtvvcNx}{q}LPF312?0ffp}_zV z32EtXaJ}FAKF|HnKKH$@v)9^ZpZ(ispEadbB~o2YJbYRJ5fKq!BBYrVhJ!m1(kh0d zjC*4P@aXYxYzmrmSKdLaAJ#Ery8oeWRMPK*2@)Zxc4< z!>7F*A&$dEFbAPOTpZD&4mQPWYzfl0xyZMv&Vwm_liW)#Ohu@R7L$ZOjxjDHvPiHV zlx^h8P>#IQJm%|b?<4F?r2|6rgxu~UNT8_AbQsjEMcRat9em=~?d8<7x#WUlzBW#AGhZ`-QS{&5;wsg7Grlxn zeW};mGN9ykRyjQ~|J4CW@(o)4+~oRg@X`3#W7nKih#(mVKU=lW@HRW#_AgfOFGJ{s#%kW3kjhPrMg@t*z-Umih`1|! z;WHNV&xr*_@tBe?41(K}CX-C_bHR26G2#PYOH*XpeXivZE%k^1Q4s#FLz%ybYJ*%Z zjC|(3Z`6@!Avm}&M*NV;`mpL?w&3pa&~3_|6f^cfHs+2x2~VT<)P!_rPk~V+VIIoi zM|B7fA3XtXr%e||OEdQBPI~V0lq5G9SB%}K+E^I*6>w?O!9!Mr?*}&6td-s^b7zbL zU^37VU2^ttR=B!93sz}8R6^42`KQ^<2^0$&juPP;jyg3WKE0KKd)0rE;`RFSSk06k z z1T5qJ>8Tpvs^SA6^uRA5f&#}K97_eRk-qF?uwi&$F zYBF4Q?z;2comVfc;pBL5D&!ZgIi$K?+vt^zmwYDF>Z75)o?P^j0f(6jc_88**E)zw zhZJ|H>&@ggax_aVNFb>4#VTkkP1BTesJme&zq&heH>Epz^Mjvcf#!Lti3Gww7nO!g zB0N?-(kbkjS(+72)0{^C>C%L>-Rf|m&Ac@d9%SA|Vxjh`mqE5Hf$h_-dEPR5%yy83 zfCe+(k(=+$0koy&ra}EaeS%=RTog4sDk@Mw*LiNp@X6zlf89XY1!YUCVt88=<3R)f-lC zf@>|&j$fLEO)y6`kIntmeq7_}Y7!D@veErq0|EfHtN_4&1OoV1Ay6rVFw`m;4dUPd zNlu<3QO&4UW!9MX^m2>$+lK)L<2QcXs2j(mQ2Q$UbOac6OI<0(%XMgjLOhiYM`9U z{IO?~Ofz6Ej^;(eGhpid0wMc>hV0f!}n~*y)^yWK^txtITyebB}A1Z6W2_7=ZgeAqEA#p5b{%p zCgvsun9k^8@z{=uVqo{WfZN7 zVUs1*MLuL*+$xf~>E$NgwOwjGsqp?wVA6>+wUI`oq#MgO#nq;(MYQ&SuES-XyNd;k z$UDZrIhD4A#nCa80$GLd8U7AZ59;j)nugxuCSS2ck8Wmgg zXatl`1m*2iPqS%wD|9$i@ys=6Z24ZRk9g{}xbe@ObHNDjjYL|`7RloEEPlFx!g0sB zkr0;3tzpoQ9TP;`=a$WORf0s8FuRJHvv-F!Gc5&+4HibR@iOp}*5iQ%%_!j4dHA0P z1B-Cqyqi8CNhQTyih zR&O1}WpuMzOZ8mx8lT7y&idRA_58qT%uQp>6%v4;{v=!$Vk_3Qt})f|6j-{;41%vn za}Qt-XOOK4nH5OSclXuN$mQ7aja<=3Rqxw?rL00vKyW#0`Wq_XU(^+r$`^eU{~Jpc zI$$Z3jjb}N*je0XCOg-fGS>JGXH@-dE#E$EWu|a4!7WTb;zISajwlFzcjPwZB~*+f z&N}*uxjmNkI9h1EZ%ohKWqpf-tS^q?*v*Bnq|gCFpWvT9Et?y!MsN zbtt6pBvb}*YJPMOmF<~oL#Jgz@8*q&+HFr~vpIc<`N&*|cDW(1f|AEyU|t+2J7 zPZ$4!XOLi3^k@gGqR&ZNC4$gF*JU$>Nkre2j}ax|Tj@_nzpsbAE~Q#!=rF1q@^hFc ztOUm%rYd-Pp8>=Ov%!&xb<00LkMq!wQsJNy^?qLnkRiMhs~^T|59>B)GOS7$O~EB? zU%9`mpj`0o`8=2DR<&=FM)lZyqMY^`OzfiC6dX?=;zO0KA#@9>hfh$UOegKbho-O% zY?ZkyI64;-XS*C4b7a^Z_g>x}(%ATGaJBuB=Z7o88wnMqnyKT!N+>(Se%2usfeY0B^wgRxr0N#Mh4^=8xqR^}R-D#c0YgWio~oal6y#HZ@P_eVX0 zBkuG=ymeYs7F@cLt4rL5i6iBC4lhFUyrlCuLZXFwB7jDD_r0lKGB*IK)g4CR{?r#D zkX~65MP^QYeRUPyvhtBq z^i={KE&i9681YN1DUB<|GmT~>v3~zpZH3n4!0I`*-;Z(yWB3N6(cPM>=TM-SXUhlL zWP9G12-I&~AMIFgn%4@<(j$dpky`W>GkA~n)yrFOp4a4G@^d*)?alq{uJgAT{(kHm z{MSY@*d#67UYd)esw1JM!A-y8>h>jO8R%o z76OlA*dJ|Lv%Hs*`HZl>)3)Al_iRW5gJ--b9EWi}GwD$Mvux10FK!VeOTpIMN+x9s zzLJtgJ*qzEYUFljLxqm;kcal?MxJC0N%wwvocs2O@!|lpVxuAzkw0Ak?2zthLW(*T zAhV+~#I_RjFBEpNXi_jhFV>Sg{(Ta*2#~eEjhHE3E@Q!W-uphO+(T+xPKtIv4mf$o zy_iLEGWj}uYAll8!6em?rRquiQp|Fa-NI!%=M@B<&t~boXig1yN^e>4STuX*wWP~I zyff~RhycsL54@FezZ0YSt3N?P0VBgR27A}SAqEs!l)|EU5Zb2jlykme=k6B5>o6zY zcp=v5B?eua*JMpL5) z`<;0Z41%JRJFw$!g*ZXvEafvx9U zKhlKir$g0RCEQlNxIXeYyT-jKZAA$>k_J2gAd2JPrHzsTiy#6-TqJd=?+I$yajJf6 zXvZ`5?|vXhbP=+Edvvhsp|ISOD!22+ zPO;y&4s*0N8Vra#=iGzy!(+})HP6JmdsFnERG@3|DJA7e^+6yOC8wg2zML9 zey4@B>-?T&A!0HNnh}b*RgP>oNh#_H zyNJL6Pyqng%=~kxMIAFj5#!ouVak|Y`FjpWdD8H-8onVq-APTDH2$c7=y9dQez9GZ z#xex2bUSxpVq$Y)>G`jbjV*i?-3iH!b{;(fW9gc$=SnZO4PL{|5*9TY3u6h!DL>{d zeawCn1TKwAg+-B($-dh#0TX)TqgB$}^9z>A;)bB{c>ZB8@ZAf*2(sCPx)vz7lE^q} z?2<7dJh9H;LdZ~dFU&g7&AB)b*>2ObP6Zy!Zt+X|Wz1+3Q+{PO8M50(EnlfVs~txA z`=S*ORVuiX^r+duzo6>`_EWvklFavKybdK5X6>u3;lp*hL5%R(2J(l$)54{b=3a5> zpLFi?zppDnLKU6-DF6US$NBe@X%%3W|2yxyvDE%;NemFFo6e*Qk-ITZAu?nWA<1U{ z9R^TS5E=HHlYjO$FxK(^RwEgzn2-)TAprSxHF0oh0ssGNA=&KlTGP*YEZKq!05JXU z4(t^j%A1Xs?Z)vxD~tADj1=k>8`odvKZ*XEXKc3rvA9v^Z0vs-Y{>tOnDQS40I-IV hWoNsI&YYbEhXobL&Uv#bz-}I*Ua@l%;6rZk{{s|a^V9$U From adbfdc9e0cf4d880c7208440643a0d2b2fd52ac9 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Wed, 11 Dec 2024 21:04:28 +0100 Subject: [PATCH 24/30] Update otoferlin analysis --- scripts/otoferlin/common.py | 12 +++++++----- scripts/otoferlin/correct_vesicle_pools.py | 12 +++++++++--- .../otoferlin/overview Otoferlin samples.xlsx | Bin 8421 -> 8357 bytes .../pool_assignments_and_measurements.py | 10 +++++++++- 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py index d09466c..a7d5906 100644 --- a/scripts/otoferlin/common.py +++ b/scripts/otoferlin/common.py @@ -40,7 +40,7 @@ def get_folders(): return root_in, OUTPUT_ROOT -def get_all_tomograms(restrict_to_good_tomos=False): +def get_all_tomograms(restrict_to_good_tomos=False, restrict_to_nachgeb=False): root, _ = get_folders() tomograms = glob(os.path.join(root, "**", "*.mrc"), recursive=True) tomograms += glob(os.path.join(root, "**", "*.rec"), recursive=True) @@ -51,6 +51,8 @@ def get_all_tomograms(restrict_to_good_tomos=False): table_wt = pd.read_excel(table_path, sheet_name="Wt") table = pd.concat([table_mut, table_wt]) table = table[table["Einschluss? "] == "ja"] + if restrict_to_nachgeb: + table = table[table["nachgebessert"] == "ja"] fnames = [os.path.basename(row["File name"]) for _, row in table.iterrows()] tomograms = [tomo for tomo in tomograms if os.path.basename(tomo) in fnames] assert len(tomograms) == len(table), f"{len(tomograms), len(table)}" @@ -73,9 +75,9 @@ def get_colormaps(): "Docked-V": (1, 1, 0), None: "gray", } - ribbon_map = {1: "red", None: "gray"} - membrane_map = {1: "purple", None: "gray"} - pd_map = {1: "magenta", None: "gray"} + ribbon_map = {1: "red", None: (0, 0, 0, 0)} + membrane_map = {1: "purple", None: (0, 0, 0, 0)} + pd_map = {1: "magenta", None: (0, 0, 0, 0)} return {"pools": pool_map, "membrane": membrane_map, "PD": pd_map, "ribbon": ribbon_map} @@ -104,5 +106,5 @@ def to_condition(mrc_path): if __name__ == "__main__": - tomos = get_all_tomograms(restrict_to_good_tomos=True) + tomos = get_all_tomograms(restrict_to_good_tomos=True, restrict_to_nachgeb=True) print("We have", len(tomos), "tomograms") diff --git a/scripts/otoferlin/correct_vesicle_pools.py b/scripts/otoferlin/correct_vesicle_pools.py index 977185d..6fd9798 100644 --- a/scripts/otoferlin/correct_vesicle_pools.py +++ b/scripts/otoferlin/correct_vesicle_pools.py @@ -11,14 +11,19 @@ from common import load_segmentations, get_seg_path, get_all_tomograms, get_colormaps, STRUCTURE_NAMES +# FIXME: adding vesicles to pool doesn't work / messes with color map def _create_pool_layer(seg, assignment_path): assignments = pd.read_csv(assignment_path) - pool_names = pd.unique(assignments.pool).tolist() pools = np.zeros_like(seg) pool_colors = get_colormaps()["pools"] - colormap = {} - for pool_id, pool_name in enumerate(pool_names): + colormap = {None: "gray", 0: (0, 0, 0, 0)} + + # Sorting of floats and ints by np.unique is weird. We better don't trust unique here + # It should not matter if one of the pools is empty. + pool_names = ["RA-V", "MP-V", "Docked-V"] + + for pool_id, pool_name in enumerate(pool_names, 1): if not isinstance(pool_name, str) and np.isnan(pool_name): continue pool_vesicle_ids = assignments[assignments.pool == pool_name].vesicle_id.values @@ -95,6 +100,7 @@ def update_pools(viewer: napari.Viewer): viewer.layers["vesicle_pools"].colormap = pool_colors v.window.add_dock_widget(update_pools) + v.title = os.path.basename(mrc_path) napari.run() diff --git a/scripts/otoferlin/overview Otoferlin samples.xlsx b/scripts/otoferlin/overview Otoferlin samples.xlsx index e825e34d9729c0cc6e9d412b0c30fdcaac805da3..afe5097c5d0ca23b70977cbb6574d34e0ce56e60 100644 GIT binary patch delta 6773 zcmZXZ1yGdl7w-wByQFK0MFfE*rMtVOq`Ny85b0iO0TJmGSVB@_kyr`o6p${ZyDt9z zbLZ;Zcjmk^&&+4eoTt8L&UrQ@izRV1lu@6MAYou&APJ|{;V>hk2&X|n$jsn*E`s0# zeVLFb<7N^9b6He7{oST1m`UDyD=A}YVOZ3H-ra>7&YV_b#dUIT-Zv-+#5&QY&D?=X z&fXAfIE$Nw{(PcEV<9Uz*wkBYsD40qq*#xj9%SGn5GWw(P_Nv$k4!fuK7C({`E2Ue?_6Cc@R02mK zv3Dt>pvj4!BO)SGS(>(HPraRS9y62T#MZT;ll(1P~(w46d`2o1TI zw8y|pmKv*64Nu}Mn~HCJ*=yZrIz)5>?Yg-?l9hti0 zH>b5_IQ~9m&QoT7mls?dKg~jb^OZHACfa|t!6M&pQb=+rrhn?8X<{!JnMmkt8aJHY z$?azZ9Y^bdZkX1@6+Wp_3T5;5h+?vTAi(1M?v9I+rOV2QOPO zv@%)@6eOflR3xOo&kMvCl^*=sBq>LbBzV~@%+Hm9LAobaW0g-gx8ES>3jq>>%bNW1IJE?Y)6-WFx!+J)v#BO!RR{QmY`!3iEUsm5D$$%j27Te}pNU!63F9E;EP>`9 zlMxLdHN6LV%_C`MX^;0CE+7croeAt=cQFn~`KYs%3|V}2ab%5I2r7bx5>>cxciHtP z^KRzLTow&|Lu*H`A_FH?uky|5CMuRF4g^M$%$Q4D^sNYQtn4Ed9|)A4->Rth2D40U zGCJazFHX%O^df;(NL$G%GQ3si_eVN0Qp^^cWn8MAbf=u;f*cvX;`n83a!@PHUg;mx zzwh7<9Ps@0Pr~fqK=8NT=e)+qX@dv?AQ1Ud zyRp$T)X~nj1pPWaH$gMnd_U&=A3)NlCL;r5sXLqsj2zgK z&(0s=u&Q`PNmjCBoAqAtw6(%3&jUC0SvzIG3hyOPqp33zR=J6Yx zbyf`ueABqTw^My*O6$H7=GKPVLrpm4!HLYxq3rg%vzZE61kiOob610U1xT)WN<;pZ zNfhjzb%s$NfCWM{Qv95LwTM2T#f?V8L@qg5d1N|t-Y#Lh@tww;?6a!gzQvtl&t#!k zKoaI!>#_PlikLPnF)Fd#{8?VYk-xF8<*C94v24tGZi^z5gBX)5zKt-tDEnWd)hP-r`eOGQapHrDswk}FlzbFY=&)*&-EE^zkR<{mJLB8E3Glx5?D%%XJ)C+! zpdK{_IuwD(kI2>c_eywh`L}QSBA`{vBmO47*AM*BUr_%DLS0lS_U@w?!03^1|5bz} zbapV*#B*MdB=X=w-!I7Qc2FOEo0lM8rFF+?{@2hFTN6h1Y=%Br^7_m{iW9Yw7WXqF z7b2qouaPl-X69*VP=F%wUZ|m@9e-m-o}-aqoSh1+wES-E<`_QEgp~**XXb@#-;h2# zFP)=<0IWy9;#sAj~?ta$lYAK+23n8E?^|Cj* zTC9RiCYcya+3EYQ3>iPk!r)ktDW$ofBz4bAAEuXT)26F&`(JTr%qvR$C>b~rdGmSC z9L}|fxL<$gIFRM7mYT2?^69JgFBxMFD~=XDIKh7-7&R!G8gZykQ7D?Du(q7l$PC8p z-)B#d%;ZqQ!U+}%=)uo=7_b(9Q0tV0moGq7a~%$yh=>(FCjYST6K)g_q!TcuCs4dq zJ*=}6G5VTFn@O-DCi!(R&&imZXgLK(n%(xM;^<{S?t~RCG_-hX{}Npqp@thT@{7J# zxP7BhF_D(eEgX=zw%L3ms@^pScLZN6K;|XlKihq9BSdK~+@A~yjV|?JIfjNRW@A6| z%w^KPnXa!^s5R@lr*EhhP@0IsU`}Qp^`<%iBYrwm4s6i`6bC{xD)jdw<&{3jLTdN0 z($`PSleZ;J5vf8}8sY=i~@L7aJ)DzEtm+Q_uF8g;v72J6p&vK%8tjIunP}pO0#k2F zL#p`r^p1%Hrx8XDML4@Mr9;Zq?r(1T{~2Z*Gp;Z*-=$`J)vJUmZ$Ng@kAck>YFAAi#+ zW_LM4UtpA$qJGwFKeiRh_Pbk%bqKf`tFfXdX>CuHp1}(GjfL7J!!~ zvy??{oiwZ(4;v&Sop9wraJD(maaY=S6uU5@7XD&HJ*jkFH@QodeyG5guXurGQ-uH* zfP(eYzqUz`)fkbzWj`aH@7jiK552T*Ie+)A6ufc~tV@PUTm~c0(n#M3SH`ZnneJIp zQoq``AX{pD$tiM&+Fr9{oJD@N+tXX-`)uiH<}=f+ukZ>gL#l$Ydk5n#rx8%GhuYc* zzb(_+MN3tJp>Vv6=0S(g$*JjQO(R+`SIt2h8^D!!4G3L}x14Sc6MNIASmY~cO}fi& zq5g!L71p|T9;w?=8nDTmsqRK3s)ZM~dD;Y#1sqLZw5M#fgm4`O3q)`mDYdkGD`z*m zSwv_i&Y}9lkIf1SG#64x>vl&LD^2$bqMb2`FH{1OcDQf8z~@AVi1fGsT#8^r>)d%E z>s*E&hQlYaqaW%ow-OIq2W-CL@a-BBs-+64%C^7=n9HluCuP;%V8(t|n+tM{u~VyU z*N&V^H#9^%G2otaVOXOuBhqVctH?Q6yF5Sq!fs1U*>BB(N2ahwL8}~R?RmH8g;wKT z?R!r5CnIdXc7tv{MM8Q>`~PPIX2=RICb&?LpyQw>zc~qcZ8Ysy`E25vyEVV|-jLeV z5($AEsYF%sFQ(3r*C@ov(Sr0;k6kBG)orup!B`lu=~(1`?+k@s@0!!y{z;f zYk#~E%*vh_nd)95KA_V&K(q)XBnq z5^rmu+Y-WjfX1!ZKC#ijN$H28_N>382>b_HvOP`Vk3I2HYe8Tvra8MUvJ~PAtM{A3! z-XYzVSOWwBzxza}R3GcTB(}!O3EWtU=)G_sH_)4o9|O5XJTRp^fbJRF37>D-FKU*G zlZ*$s47zB-7Bpac_bI6^n&ZyT=WYPZ=(2;7^|hjNC{>H&?SzbpHeh18Bs7T6`a92y zZ?BIHTHdF~G0J}svWIi+zRQxF9et-}j{6PC4yQJQG59qmF>H%aJ1 z<6Z~|@a|QTKi!L7pf=Yhc>Btt8W8E-D?r0t0pNdSVa)i#t*!wUXwKe7S@)$UcnOJu z-DWpA%!4sQs7POwumb#k>V@LO#};SVDsPrVsdFRYN9oi$`PNZu_UTgtV|P1pLHp}C zv}lVytth*YmgJI@vM@x z&IMR_?MK@u3O`k*gptJq=huwg#w9Qct_jFnL%FrgBA4yQa_ICxEevKsItmJ z8m0Md8TgdZpEY^q#}%E!(W2+^spuWVCmFJf-!^@=ozjrsLHk^-IE|~KqdY3*5Q6)1 ze?~)J!rtcuEqZ*P43}U-)^wL(xW?R3+)1{I3Q=a$@Sn?0xvkq>FwE$i=279WmO=bW zgvBi$c0c$F5Lc6vm_}{Q{rqO{#9#2>Nbu(9@Jgih;L77%{%GsOO_x@0gILI#>E}f` zwJarroJHg{|MP{8gu%%HvXf)tJf34E;>3cdYJxWD`1hzbxqjE->G*eF08Ic5!BjYJ z8|X!!<5#}Y@^%zeS~he-HQ$f9V=--C*5axAf473)y3G9U1|b)>`wu=xPq>*>(2SkS zXkQI_tqm<2M7H8CaEo2=z_6xd{3;5W3Jy@AmC9X&b9NKTy@7a?3G-_6^zRcfHr0>ob{2a5;STMOpw=(j zp)o%SYTmPdYwmg`V*3A@G;$J)k+^69zbCa@W zIOJ(E%8SuO%340-sHaGx7V0qRh1mvK9S!z@7$k9pW4FzTAb^J$OuT)Kt*UHXC6>76 zhn>HKED*r5N_=qVb-a~=K2JcHqJtH%`a}B{E_&x_BY&v$JFz1o%Z&bEa0wnUBF1Ha zne=j0>bV-uQblulW^AegYZ_%9|D9gt%z6YSz7!Pmt$AA%tyOsN(6-&p`r!c8DIe9T zfM()ZY6Lm;54#GzNh%csB}oFc8&%M@K=p=tRSiiTl%DOyer+@xC1sFV!SDBx>ZF7L zl2Z@V_r7vkHj7SR?H3qD;O}aEfI%VBzTwLN5igjqB??ppR2{X#OP_{diI8)-e~Jmo zLep96aJL*C_!{&|tfSchy&T6?eQPULHWU5ioElZ28SDLGkYFAmDT@=Ia9~{Lm5v7m zJ51%b?K#nH#`61w=u{9D0T+EnseySo$IK3VB97e-P8Ih=&6Z3FOn=Y?p4w?-m=zt4 zGN#^@dG04~`HiB|0)M{4!_K@u1JyvdJ;^m<6%!ORG1gX=>At#DYZ39{Jr6igo zwCK=Ym11#VSP6|CTu!u4XE&&rZzK+D*X4Z;0{Q6gRisI~eD>789xuD!T!EVstv_u^I*{$*c@@86i+71A+2@ zW?J~ns99Hu3x)udHQHyOuYn4*&~Na`AlWRTY_nKKWlSk}FC2R-IO&J0?CfnOdM#vC zf()%uT=oanIFnz@4;+R4pb&h}N&g5A8C0ptglVS0J!Y7&(2nRQBt=4WieM6~syyEcxY!1s=iRL)z|bqJdT>|I0bnOB{BH>%@h{Grwdsg!m2Z3<;T+dA$#U9*N>VkcMwFeT-~Yg+Tca) z!FjdLHM{EyR>?A{`lqOqSGo^Zpvn+i5j|FOL6b>UY7I`qGf(_%2rtKQo_J5tj`o)!e|!m7ES0Y=d(YKoGxvP>BIKN zVtRn#-FTMyfe`h^2>ov?= z(EAWd#nE0wBw?%R$=U{ zO1LF3|H;M+7Mk$R7CaUE8kWg{r}b&S%YHs@L^e9_Mr1TE9|+%=s-E2u-!}OLeUeWo z#;RNmg&nmJB5=9T8IMs;bv{X&23|~TP+R{fT1Zk9P(xGZ$sbH+d3(WqpHBBg-NzaR zUEX&-)!+id3tCd>y9RJ0ax;jwQC=QRY~x~q&X z^$o6weC2E0nWs+;^DH08(~_Af#!QpWqn^FxNAb2s6{Zm5p_-C467X{VO!uMdhkeoX zQ4{1-sCF~=Ly-OX(?7f6_q6OWUhWTOmGsXowom+ck?&Au>G`w(M!sjZlS)XZvsPrp5S9t*@O+p4fp!^A9j^!PbA{x@31A^fFV z66?-(Be64$?UQAA;`*;S$5$ar;{>H)$_efE_F0tMyN)f)jF$x%Us_Zj9+&1ZMd2^p zV?Hhw?9u*d3v*C0gWr$TVfAp4y!-j16k~2d2R&uHAx=j~>s_&+V)x+68`_Bc4YU_+J~PIEr_qDRaWCi{ zg-S^rif2bwa5IH0IRUb+T6ykTyon<{(EU|p$Sa*G&AE5PSGj_to7|5g){e;6f53mxD;=I=93_z$B1xuK)~ xUx0sYF2O$x5+y{Kp7uXSe=j8QKaf1ckDl?*TT(7PH8LHfg`Nd%kme8Ze*hVfTTlQ1 delta 6821 zcmZ9RbyO8!w8!c0y!53T=|)08y1QFSQc7YZ1!=Ba;u6vla_PE+lz?=JAYIa+fJi@d zz4hMj_d9>hto1o(&g|LyyZ6~sBEKb%r=yOFMuvoig@r^MT8qbuj6xg=5kh7KVEja( z$Hw3;b_*XDlISWuk_K+SZ?5%A(lnq58hhqopKlfS%g8;VhLshBuVgZWzK`IYB78Y> znL*qQI9V$fnW$W|SvrW6(9on{(ruGXt_?kIta)@Egn2tGzhR!;?3nJev8V&Oortd` zj6%of+Z;K^&&`Fmw7ahU)gSPHlMxCe6E(4u_F8kF0+V=tq5H^!Q;Xu7soes!Xx>=l zq13P66bzue+pQ@lQ#d|>hN0`MlSwTaS%|=*+kSCu?Tgo3r8_BG7G~gr=*OX2o$_yr zW7UUp5}1GMdwz|+!6-;&D#xR8>y3W}x!r=p@z*;<*A2NheIV4lj)M~bZS`y{$D1?H zJk6!|&+@*G1uo;H@9GaaD`vn>%OyONV$kuuq%-lPzRAQ|hBLx`Prs5Fgp{@EGSURS zh2q-Ra!$ST((^&9cnWG%yHM5@sz2I)Us{liSrwGF##pJp$0}dP5KSJ((&ofAXiH!B zE(W2wwCsd&KS(RA*tRzVl)U}KMugY@^y*z~tF^j7itJtj7}~e72f%jEJ5l0t-u{vq zcSGx;k~MShoWp;cJqEZJki~`jA7_(0jCIGRgaWbzpC%n>&mu}>cP{T<&Rp|@l3xCh zOe$ZSr6*8LnWg-yi!;I>|_5NjW$}Rtvi=MZ*_-h*&ZU83%30$8-8Q!sAFQe z$8kb2QIL?zP?3=S=`N5!R4`y_nG6#r3q6L*wR=N}z(l&+28*VEeo)emP1+*6w#rWQsWxO1aptq|-g^f%0j9jz|6$YLtSd zt!0Yc;#}8L&W#~&q^=W}Te5$pg-ujjlyWd!yxEvOC9@Wx{H$(fH(vvEoBq7qOJEc& zy^Ap%&*od4LK!|$v;=D??TS$hM^%@(H8O=fMP0C(fFlKW6OEMW;&zG?irV|OexxGa zCsUb`Bx^;2k7!N^YRvEe^M1FwN?M2pBqackv%2@yQF?dt=k9rWwG~gD=Ra^2MA=Po zu{NqHjG6=$ZPCSFC$JRSHqIpvsF9HolUMf$--b;fY*t%LmT#orm4Y18l!yt_+x?D@ zjGF~V{0~mr!6&zDci?T@de)&(nio=jEUHAHdJ*=5U1*&~6-6%~Wtdfk7al?KO4f)* z3bQ4~|5;nv%HR+v8nkUuZ2SJ}kWg8=!xM?Jk`{4W7Gm;N1ktX3?2kx`myfsTUYBr3 z(EmIwi4exGEmS=^EO|2{DrdN6ui%(cx|zNPFC89y@mg12O?>xvo-Tp@a#c>E1WtE( zQDYHaC}n}8H^reEz)ESBr01fy(a1|cc!Rnnr$n67DWD^RW6#dSWC}6)y+`+CbC_9E z=R1X4d!Y_Z027KEb&Z8GAE7b8tk23DYIhyZ0IEbA~{J79pOa z4o-qXSy>|*+a!YA$Ju#rEBg&=O)be`TOzgCVM0{TeqU8v0o$0zKkiE4KVIMDT&9fU zmUlG;T&22Kejoa^&vs*2Daz5bRkeU#G1~ZRx?<_9BB5yb@z4B5 z>~v1%^wv$L2UyzkI%ZVwr+txPgQg2>@5nr~Y7rkUm-@)#0jv7@>B{nF`BS==$=Dlh zKj9ZD`JBmd;n>WFFUJ?e>E|qIWTwiJoqDPgK`wK_cE)!aL1&qAv{$JZ;or+ceR2!E zs~m}7FHS?}M3jWQAz1qKS-U}IuxH;a7`5lZU9|Emh=6I8(R63(a#Oe6$^9?Dx_JI` zefq8G?*g-0_lb^ssUjrkevf5K5ykg`CA~XT zfq6sB$IA5seR4khLG9ZTTN+ua$4(nI{SF#JaYaxu9dG{AR3 zk|?yRt7qfFF5&V3DHlUwP-EeR$HKR)>(Z}G&UWUVb+1^u?y4O&zH*bFPNcoIfhPqR ze|JXzq9xnGQd?if^h`RpHLPr3QW!s|Z>lWodgFSp_p6_e>Qro>bcxVK`u0)TrS&u_ z<@hC3Z}(NCaDJ;fXMJ*B#%uFzAf>%mD7$AcCO%4~gkX3Tnnq?5V^yNqybiEE#esdYxYu%&J z?c14}o5Jqsy3NDBTr4ooBF$bK75m-t7c%WL{??}yLYEc3FTU#^-oU&>0UoG9$QtoZ z9iz9FEf2HyS@>s$Ec%RDKg*Lo~6uH z=Y|Xua;tbzLT+}nX*|*5k4)6Mx%#Vx??wr6>EYqnCT8d10H=<>FxIbzXtLuSWg|Y; zppbV0_^gT4wnqViF&u<&V2C-Hn~-=Z#gfO6fG9b>D3p+Me9?fII#N5Y9E3|f;96Ks z%5}ht2Kr_BJ@Istkwsu)GXT+e4ZresH;h_NHSU~le_K>ROD_Q!v28V1I!sOLpgmJe z*UF0CC#pGeNgX|G3A2{o+q`#CwW-q6kf^q1Xf)jwLAA@!K*E&;Oj(+R0;YZpblZ?5 zV32FR`<%@+#^_el8BQ^9o#tXy6Y}&z7^7jV)C2XRf3scr?2-&LdJgnI*Qje}vu+t? z)&AUo6WYY{#@Q9W(w=P--6u*Lx;Jjo$rVaA9SyTtq)H%@=@N5*thtF0_R#v64h<}Zo;GqSF&*2AC)EphS7Q)HSEx=Bo7UO?W%}! zyHP)lKyl3du;tvB!Rc0UErP^hq_zCS9c*?kOc8AB3yNp3*z`D%FtHeVq4^U$SZre) zNQAhV&CY^RTIYRQd4O8!JE6NJh4X|yHWd!iHD+E)3M>-9*HrGMAC}%}+YxKyhgcX2 z@k}nbdk$)VA~oF|okcJ*GGW$UCpTedVFGp*;fwUIh!m|bhKf6Dt~5xRn&%|w+U7(F zpY2(jz&o?3`0z=mdP{7>SEqKG_h&t4heTBeP(1&dipk5;Z zGh#O((kBU!lo(JEZoGh?OP^F0Hui~@Q}}AG`wZWw_kQg4ecdm!v}&wMNuR(UIIE5L zyBGAZ(}C+u^oR=qT(o2R4gbbZA6!k>-zjv}0&+S9aLqw@SfeIiMSq_L9d8jgE*kk< zPt=*NH!u64nTAJZ;YVsCyMsdTR`b0HR+~F}R~nvtfx0IgCroE5ki-35v#_C`uQkMCb9h-R>d%)y z2EO#PvKdBApDpL2JV-1{>%+}f<8lR_)dX{~;Ep20o%=%=*W=_@O`65qa&_m=IN+?~ zXXGK_GCtV2>w(R)xukw#C%4`KI=8K3F}^MgKP!HX^Z5#<>Hhe*{Qj;SGIvEo!{$@o z2C@zqD*Cla@uBc1%lsqHN~7?7j^+!6XJkK~T!pGn2WLuDnRMH%x7~{R3h@;KV2vIJ zJCetMR#Cn=Tjddi6kE^jEJ`?uQd&2(-J;P|ujbIFt!JK&yL`$ft2T?~eHQHy&>SUg zZ@|!tLWLg3J(@vUJBop(Rzh%+H$54`pfq=Vfs(5oL`HtgZKB=i*1{uAvAx$ukLOW& zw{!D1A;_`@C~u=9A=%R*;s3iT%*AAd9N}X>eNT9j72%03dyG$ik-%MHWR5&HF}eY)QN+ax2?h6 z1Q2sbRIG9TWNwn-&-{@%RVMyxE#?z|RT+DLA6mrT-m`98{i&b%7CXGRMvN|at9RO4 zQBJ`H(d#Ac-F0ak5C&$%`9atnY=!Xu9dkq}=LG|Y4VFbTCn#R9^b(P~n`7LUUQ=m8 z-=Xn*xcb-yVv#}v1uY=;2h+xLmB{UrYk6^a830GN+$0=zFE!_c%SjZ)mA?UsDmXex zbx%St6WBf2&NaZSqmFS@S|k$MbJ(oE2!385s!x?&+HiDHxGoR<5AR;Tv`AKyIiBQR z;xhfL+e-80?c?E;k#A7Ga#NL1UzPx^p&maX#efzEp{fE)# zC|;l?7c$okTk2PhMcrWrj1FCZf5%WToKu~L74r*GC@MF>Mal05olE*-FBIqgQu@fV zKdIn+7euzT*EJ(5FOhMSuG-5>Da`9ae&?=2wl-pBG^;U*>lLM)R<6xwRU)gm%cRNg zevYA+H_kiHsqW9iD%H)vN%fW$)qLoPfph+m`me(t|5?seawx?!MiRhT(&@!9zvENg z7O$>~5~$$>H8l&)SzLPQ9?f*5>9CJamyaPi{Wj5(<~Ms!c5G;@8|j)Du@LE`3)40m zlQWZthGKJ^NoK4Sskb?JK}4}D&Llflp49spQw(_zYBE=mB&BGDJ<-0)BAhp+z$qPF zaH^dPEWhMAItF_Q9RZqHJjR)zY^2uA!2u*w{LC#iRN<{tXV2}!cdkF7Oo}C83yffm zWqVW9HoTo26~}hTob0HfveZ{Te9_t@*RbK}6z=?ecrgm(^kV#96rzSo=7A#We4rri z4;0CWW~{C9c#7E*9v;7jjgw#QX(iD>+|it>N-z8_`gRYt5__fU6u!frgNd6tB&#_JrYUpf^Ac7EmU&>K@p`sa9-(8|oaHgE1y3RIrVT zoe^Isrfg&N?sCYcY&uh^KJ=crO4X4-7FJH@7KNl@GIZk zOg-BNK|NX$4e)tu-^2uXw(I~iSNa0B7K3~<$ZIN({U}Ulp#Ky_*a-2NICpJ8thyzue?<(Z52xWg3<;nQa zglA`-L@9~rT_~5%{EoH=6_SVr5~76JO*)=Lf}&$_OEO4P#W9p31q*O!CFptxbzjs= z@qOOSRMwpgi#j(z#0)dp6IWO7_pdg&dTQTe{;990)WMC+4|2*x`S1F=UYHd8wzAUkEbdAupt3+6**8bFUr?k zKyG-Gq{O`VHnrqC;?adA-^KjWopD}I4%cY0_(^tS45mSppdHiWlgCLhk1nJqVq#;9 z^(Ih{vt8jU!85Z9NPOg1AJr4G4(IkIH8=qFHnwL5tB>@{#RY!D-|W9E<%oNsP`>7} zcVEDfO#*Q*WSx1ZUfls)do6LPeapT0gG%a-Trt?V6&vzv4Yp2~R# zqc$#pz_hRJ3wxqQ2L_IRcnYRje0npIwqM(U*0pw=fT=iCi;o2&iq{+wu!OdrZ66_o zoO@{E(Fg`8)WBU4_Z71)3N@WD!XUu+{gd+5v`+}w4i3`(z+aUE z_14IFOZ@9f_(|ysFF+7|CL4KS>BLdDgIgj8j1jJ0T0Ks<@Gt4LFAlO0JHN>aE6WBh z9ZC<7QYq4kyX(_%bVD}q9{QjSBZRDzmm?Cl^@gw8q5`c9*{lZpz##b`7Y9f=j1dS3 zLcFYM;QsiA=F{R&{H`+bDuXwRx_k{qyn=QVBtIm)<2^&~5B13%iRo~Ee)6A^zZbF$(T($nG z7O@MQ1WywRRigs$vD0&#caj#R2Mf zj>5rCVdp_95=8t!W?`eLTnG4#OBovIy=1-mDN+VM3BPe}?gWC{*Fpggv**z4H6yV~ zwH_#qq@n2$D52_hirdh> zQE#SucPcglgsmUqurhbM_lO_T_@L>J8!rcFGUW#b3_xm9MQs z9)#t@hdw8|>b|P)tH09x);<-{=-FD1A}wxh>Sw zrqrCDvD6M48J0K1tF$|jEClvY*Ty_}VOOJ8PR)Qwvoy>Uouuk2N(TZ)=Ih3;#Jd`< z%e)~!2^&54*&6@IvMDx=rCQ*%nl06S0k1jXt!_GKOZ=&`dk{{ua!i)yAa?y#_C$6wL zNW@4;|7~mHQL_NUTD|;4N*5tY4t_9kMO`z)B<^>GFtr-2WJ`wwRkzIy_cFCRdep+h z?4`xlA|as+ceW38q`$!*Tqd9E7UJPGD`Y>3w`v;*iLN8cBGxcP<;`Y%hu^_CzASm7 zt1w8?9YsNB|H3T+RW`z@K&44_%%;OhKo$7t4`O-!VXFwhAoVtc;>U1C5QBXJFbb{L zugLCk$)Ugsr4?eHm7bN(xy%Gj?WE+u-Jt?P?Od;MThA0Q#yMoFkvh?Fb8>Q4k1D2Q z$rJBdo8gpmbzwLy=I1XmQO58%{HaAo{IxDusxg{%?_lI?QdZW{a+6fVJy~~#@Gj*a z%aFD`C6~BFLPAPI{*S-v)35?1sZyRj{6w!V?r???_Us(IacN+cSBF!~0#UmY>2Lsjl;XS>5u+PSS>{HK8Tk$tp&(Vot6Nko|{_UJYDCvb6 z{@neuhlc+!{r`s>#Fd^F{D=9+u?h+F|G>jlBqrnoCN<K8N{#h{m8-zpl>Dm7> zURyQ03lCH49-8;m|5^+L5n$l_%luQD$o|DhLV_5W|1$p@Dt{y8AzuD5xFHAz@ZXI; yIL-h04ADObB&0nm$Tb7~--Z5ZIMRPXj1X}!)1QA&7GPRrYe*274FjA05Ai>5HfxXo diff --git a/scripts/otoferlin/pool_assignments_and_measurements.py b/scripts/otoferlin/pool_assignments_and_measurements.py index bab0483..f4681b5 100644 --- a/scripts/otoferlin/pool_assignments_and_measurements.py +++ b/scripts/otoferlin/pool_assignments_and_measurements.py @@ -6,11 +6,18 @@ from synapse_net.distance_measurements import measure_segmentation_to_object_distances, load_distances from synapse_net.file_utils import read_mrc from synapse_net.imod.to_imod import convert_segmentation_to_spheres +from skimage.measure import label from tqdm import tqdm from common import STRUCTURE_NAMES, get_all_tomograms, get_seg_path, load_segmentations +def ensure_labeled(vesicles): + n_ids = len(np.unique(vesicles)) + n_ids_labeled = len(np.unique(label(vesicles))) + assert n_ids == n_ids_labeled, f"{n_ids}, {n_ids_labeled}" + + def measure_distances(mrc_path, seg_path, output_folder, force): result_folder = os.path.join(output_folder, "distances") if os.path.exists(result_folder) and not force: @@ -23,6 +30,7 @@ def measure_distances(mrc_path, seg_path, output_folder, force): # Load the segmentations. segmentations = load_segmentations(seg_path) vesicles = segmentations["vesicles"] + ensure_labeled(vesicles) structures = {name: segmentations[name] for name in STRUCTURE_NAMES} # Measure all the object distances. @@ -109,7 +117,7 @@ def process_tomogram(mrc_path, force): def main(): force = True - tomograms = get_all_tomograms() + tomograms = get_all_tomograms(restrict_to_good_tomos=True, restrict_to_nachgeb=True) for tomogram in tqdm(tomograms, desc="Process tomograms"): process_tomogram(tomogram, force) From 791f635ab1e571cfe208a87ef08c46b6cdacddb5 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Wed, 11 Dec 2024 21:15:51 +0100 Subject: [PATCH 25/30] Implement IMOD export --- scripts/otoferlin/export_results.py | 1 + scripts/otoferlin/export_to_imod.py | 35 ++++++++++++----------------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/scripts/otoferlin/export_results.py b/scripts/otoferlin/export_results.py index bdb1e3f..2883c97 100644 --- a/scripts/otoferlin/export_results.py +++ b/scripts/otoferlin/export_results.py @@ -96,6 +96,7 @@ def result_extraction(tomo): _export_results(tomograms, result_path, result_extraction) +# FIXME: update the counting and analysis of MP-V vesicles (include Docked-V, see Caro's mail for details) def main(): tomograms = get_all_tomograms() result_folder = get_output_folder() diff --git a/scripts/otoferlin/export_to_imod.py b/scripts/otoferlin/export_to_imod.py index b3943ea..a810eb5 100644 --- a/scripts/otoferlin/export_to_imod.py +++ b/scripts/otoferlin/export_to_imod.py @@ -1,14 +1,18 @@ import os +from subprocess import run import numpy as np import pandas as pd +from tqdm import tqdm from synapse_net.imod.to_imod import write_segmentation_to_imod, write_segmentation_to_imod_as_points from common import STRUCTURE_NAMES, get_all_tomograms, get_seg_path, load_segmentations -from tqdm import tqdm -# TODO check if we need to remove offset from mrc +def check_imod(mrc_path, mod_path): + run(["imod", mrc_path, mod_path]) + + def export_tomogram(mrc_path, force): seg_path = get_seg_path(mrc_path) output_folder = os.path.split(seg_path)[0] @@ -27,43 +31,32 @@ def export_tomogram(mrc_path, force): for name in STRUCTURE_NAMES: export_path = os.path.join(export_folder, f"{name}.mod") write_segmentation_to_imod(mrc_path, segmentations[name], export_path) + # check_imod(mrc_path, export_path) # Load the pool assignments and export the pools to IMOD. assignment_path = os.path.join(output_folder, "vesicle_pools.csv") assignments = pd.read_csv(assignment_path) pools = pd.unique(assignments.pool) - radius_factor = 1.0 # TODO! + # TODO: discuss this with Clara, not sure how to handle this with the irregular vesicles. + radius_factor = 1.0 for pool in pools: export_path = os.path.join(export_folder, f"{pool}.mod") - pool_ids = assignments[assignments.pool == pool].vesicle_ids + pool_ids = assignments[assignments.pool == pool].vesicle_id pool_seg = vesicles.copy() pool_seg[~np.isin(pool_seg, pool_ids)] = 0 write_segmentation_to_imod_as_points( mrc_path, pool_seg, export_path, min_radius=5, radius_factor=radius_factor ) - - # TODO: read measurements for ribbon and PD volume / surface from IMOD. - # - convert to meshes - # - smooth the meshes - # - run imodinfo to get the measurements - measures = pd.DataFrame({ - }) - return measures + # check_imod(mrc_path, export_path) def main(): - force = False - tomograms = get_all_tomograms() + force = True + tomograms = get_all_tomograms(restrict_to_good_tomos=True) - measurements = [] for tomogram in tqdm(tomograms, desc="Process tomograms"): - measures = export_tomogram(tomogram, force) - measurements.append(measures) - - save_path = "./data/structure_measurements.xlsx" - measurements = pd.concat(measurements) - measurements.to_excel(save_path, index=False) + export_tomogram(tomogram, force) if __name__ == "__main__": From c4fe694e9ad41eed7b714890de44cb65f3ad2ee9 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Thu, 12 Dec 2024 21:55:31 +0100 Subject: [PATCH 26/30] Update otoferlin analyse --- scripts/otoferlin/common.py | 24 ++++-- scripts/otoferlin/correct_vesicle_pools.py | 65 ++++++++++++-- .../otoferlin/ensure_labeled_all_vesicles.py | 20 +++++ scripts/otoferlin/export_results.py | 18 ++-- .../otoferlin/filter_objects_and_measure.py | 81 ++++++++++++++++++ .../otoferlin/overview Otoferlin samples.xlsx | Bin 8357 -> 8629 bytes .../pool_assignments_and_measurements.py | 7 +- .../otoferlin/update_radius_measurements.py | 31 +++++++ 8 files changed, 219 insertions(+), 27 deletions(-) create mode 100644 scripts/otoferlin/ensure_labeled_all_vesicles.py create mode 100644 scripts/otoferlin/filter_objects_and_measure.py create mode 100644 scripts/otoferlin/update_radius_measurements.py diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py index a7d5906..b6d178d 100644 --- a/scripts/otoferlin/common.py +++ b/scripts/otoferlin/common.py @@ -40,17 +40,22 @@ def get_folders(): return root_in, OUTPUT_ROOT +def load_table(): + table_path = "overview Otoferlin samples.xlsx" + table_mut = pd.read_excel(table_path, sheet_name="Mut") + table_wt = pd.read_excel(table_path, sheet_name="Wt") + table = pd.concat([table_mut, table_wt]) + table = table[table["Einschluss? "] == "ja"] + return table + + def get_all_tomograms(restrict_to_good_tomos=False, restrict_to_nachgeb=False): root, _ = get_folders() tomograms = glob(os.path.join(root, "**", "*.mrc"), recursive=True) tomograms += glob(os.path.join(root, "**", "*.rec"), recursive=True) tomograms = sorted(tomograms) if restrict_to_good_tomos: - table_path = "overview Otoferlin samples.xlsx" - table_mut = pd.read_excel(table_path, sheet_name="Mut") - table_wt = pd.read_excel(table_path, sheet_name="Wt") - table = pd.concat([table_mut, table_wt]) - table = table[table["Einschluss? "] == "ja"] + table = load_table() if restrict_to_nachgeb: table = table[table["nachgebessert"] == "ja"] fnames = [os.path.basename(row["File name"]) for _, row in table.iterrows()] @@ -75,13 +80,13 @@ def get_colormaps(): "Docked-V": (1, 1, 0), None: "gray", } - ribbon_map = {1: "red", None: (0, 0, 0, 0)} + ribbon_map = {1: "red", 2: "red", None: (0, 0, 0, 0), 0: (0, 0, 0, 0)} membrane_map = {1: "purple", None: (0, 0, 0, 0)} - pd_map = {1: "magenta", None: (0, 0, 0, 0)} + pd_map = {1: "magenta", 2: "magenta", None: (0, 0, 0, 0)} return {"pools": pool_map, "membrane": membrane_map, "PD": pd_map, "ribbon": ribbon_map} -def load_segmentations(seg_path): +def load_segmentations(seg_path, verbose=True): # Keep the typo in the name, as these are the hdf5 keys! seg_names = {"vesicles": "veiscles_postprocessed"} seg_names.update({name: name for name in STRUCTURE_NAMES}) @@ -93,7 +98,8 @@ def load_segmentations(seg_path): for out_name, name in seg_names.items(): correction_path = os.path.join(correction_folder, f"{name}.tif") if os.path.exists(correction_path): - print("Loading corrected", name, "segmentation from", correction_path) + if verbose: + print("Loading corrected", name, "segmentation from", correction_path) segmentations[out_name] = imageio.imread(correction_path) else: segmentations[out_name] = g[f"{name}"][:] diff --git a/scripts/otoferlin/correct_vesicle_pools.py b/scripts/otoferlin/correct_vesicle_pools.py index 6fd9798..e99d7a2 100644 --- a/scripts/otoferlin/correct_vesicle_pools.py +++ b/scripts/otoferlin/correct_vesicle_pools.py @@ -7,8 +7,13 @@ from magicgui import magicgui from synapse_net.file_utils import read_mrc +from synapse_net.distance_measurements import load_distances from skimage.measure import regionprops from common import load_segmentations, get_seg_path, get_all_tomograms, get_colormaps, STRUCTURE_NAMES +from tqdm import tqdm + +import warnings +warnings.filterwarnings("ignore") # FIXME: adding vesicles to pool doesn't work / messes with color map @@ -31,7 +36,7 @@ def _create_pool_layer(seg, assignment_path): pools[pool_mask] = pool_id colormap[pool_id] = pool_colors[pool_name] - return pools, colormap + return pools, colormap, assignments def _update_assignments(vesicles, pool_correction, assignment_path): @@ -52,8 +57,39 @@ def _update_assignments(vesicles, pool_correction, assignment_path): new_assignments.to_csv(assignment_path, index=False) -# TODO: also enable correcting vesicle segmentation??? -def correct_vesicle_pools(mrc_path): +def _create_outlier_mask(assignments, vesicles, output_folder): + distances = {} + for name in STRUCTURE_NAMES: + dist, _, _, ids = load_distances(os.path.join(output_folder, "distances", f"{name}.npz")) + distances[name] = {vid: dist for vid, dist in zip(ids, dist)} + + pool_criteria = { + "RA-V": {"ribbon": 80}, + "MP-V": {"PD": 100, "membrane": 50}, + "Docked-V": {"PD": 100, "membrane": 2}, + } + + vesicle_ids = assignments.vesicle_id.values + outlier_ids = [] + for pool in ("RA-V", "MP-V", "Docked-V"): + pool_ids = assignments[assignments.pool == pool].vesicle_id.values + for name in STRUCTURE_NAMES: + min_dist = pool_criteria[pool].get(name) + if min_dist is None: + continue + dist = distances[name] + assert len(dist) == len(vesicle_ids) + pool_outliers = [vid for vid in pool_ids if dist[vid] > min_dist] + if pool_outliers: + print("Pool:", pool, ":", name, ":", len(pool_outliers)) + outlier_ids.extend(pool_outliers) + + outlier_ids = np.unique(outlier_ids) + outlier_mask = np.isin(vesicles, outlier_ids).astype("uint8") + return outlier_mask + + +def correct_vesicle_pools(mrc_path, show_outliers, skip_if_no_outlier=False): seg_path = get_seg_path(mrc_path) output_folder = os.path.split(seg_path)[0] @@ -63,7 +99,7 @@ def correct_vesicle_pools(mrc_path): return data, _ = read_mrc(mrc_path) - segmentations = load_segmentations(seg_path) + segmentations = load_segmentations(seg_path, verbose=False) vesicles = segmentations["vesicles"] colormaps = get_colormaps() @@ -72,7 +108,14 @@ def correct_vesicle_pools(mrc_path): 1: pool_colors["RA-V"], 2: pool_colors["MP-V"], 3: pool_colors["Docked-V"], 4: "Gray", None: "Gray" } - vesicle_pools, pool_colors = _create_pool_layer(vesicles, assignment_path) + vesicle_pools, pool_colors, assignments = _create_pool_layer(vesicles, assignment_path) + if show_outliers: + outlier_mask = _create_outlier_mask(assignments, vesicles, output_folder) + else: + outlier_mask = None + + if skip_if_no_outlier and outlier_mask.sum() == 0: + return pool_correction_path = os.path.join(output_folder, "correction", "pool_correction.tif") os.makedirs(os.path.join(output_folder, "correction"), exist_ok=True) @@ -87,7 +130,11 @@ def correct_vesicle_pools(mrc_path): v.add_labels(pool_correction, colormap=correction_colors) v.add_labels(vesicles, visible=False) for name in STRUCTURE_NAMES: - v.add_labels(segmentations[name], name=name, visible=False, colormap=colormaps[name]) + # v.add_labels(segmentations[name], name=name, visible=False, colormap=colormaps[name]) + v.add_labels(segmentations[name], name=name, visible=False) + + if outlier_mask is not None: + v.add_labels(outlier_mask) @magicgui(call_button="Update Pools") def update_pools(viewer: napari.Viewer): @@ -95,7 +142,7 @@ def update_pools(viewer: napari.Viewer): vesicles = viewer.layers["vesicles"].data pool_correction = viewer.layers["pool_correction"].data _update_assignments(vesicles, pool_correction, assignment_path) - pool_data, pool_colors = _create_pool_layer(vesicles, assignment_path) + pool_data, pool_colors, _ = _create_pool_layer(vesicles, assignment_path) viewer.layers["vesicle_pools"].data = pool_data viewer.layers["vesicle_pools"].colormap = pool_colors @@ -107,8 +154,8 @@ def update_pools(viewer: napari.Viewer): def main(): tomograms = get_all_tomograms(restrict_to_good_tomos=True) - for tomo in tomograms: - correct_vesicle_pools(tomo) + for tomo in tqdm(tomograms): + correct_vesicle_pools(tomo, show_outliers=True, skip_if_no_outlier=True) if __name__ == "__main__": diff --git a/scripts/otoferlin/ensure_labeled_all_vesicles.py b/scripts/otoferlin/ensure_labeled_all_vesicles.py new file mode 100644 index 0000000..c32f8b9 --- /dev/null +++ b/scripts/otoferlin/ensure_labeled_all_vesicles.py @@ -0,0 +1,20 @@ +from common import get_all_tomograms, get_seg_path, load_segmentations +from tqdm import tqdm +from skimage.measure import label +import numpy as np + + +def ensure_labeled(vesicles): + n_ids = len(np.unique(vesicles)) + n_ids_labeled = len(np.unique(label(vesicles))) + assert n_ids == n_ids_labeled, f"{n_ids}, {n_ids_labeled}" + + +def main(): + tomograms = get_all_tomograms(restrict_to_good_tomos=True) + for tomogram in tqdm(tomograms, desc="Process tomograms"): + segmentations = load_segmentations(get_seg_path(tomogram)) + ensure_labeled(segmentations["vesicles"]) + + +main() diff --git a/scripts/otoferlin/export_results.py b/scripts/otoferlin/export_results.py index 2883c97..70f3a15 100644 --- a/scripts/otoferlin/export_results.py +++ b/scripts/otoferlin/export_results.py @@ -41,13 +41,22 @@ def _export_results(tomograms, result_path, result_extraction): res.to_excel(result_path, sheet_name=condition, index=False) +def load_measures(measure_path, min_radius=5): + measures = pd.read_csv(measure_path).dropna() + measures = measures[measures.radius > min_radius] + return measures + + def export_vesicle_pools(tomograms, result_path): def result_extraction(tomo): folder = os.path.split(get_seg_path(tomo))[0] measure_path = os.path.join(folder, "vesicle_pools.csv") - measures = pd.read_csv(measure_path).dropna() + measures = load_measures(measure_path) pool_names, counts = np.unique(measures.pool.values, return_counts=True) + pool_names, counts = pool_names.tolist(), counts.tolist() + pool_names.append("MP-V_all") + counts.append(counts[pool_names.index("MP-V")] + counts[pool_names.index("Docked-V")]) res = {"tomogram": [os.path.basename(tomo)]} res.update({k: v for k, v in zip(pool_names, counts)}) res = pd.DataFrame(res) @@ -60,7 +69,7 @@ def export_distances(tomograms, result_path): def result_extraction(tomo): folder = os.path.split(get_seg_path(tomo))[0] measure_path = os.path.join(folder, "vesicle_pools.csv") - measures = pd.read_csv(measure_path).dropna() + measures = load_measures(measure_path) measures = measures[measures.pool.isin(["MP-V", "Docked-V"])][["vesicle_id", "pool"]] @@ -86,7 +95,7 @@ def export_diameter(tomograms, result_path): def result_extraction(tomo): folder = os.path.split(get_seg_path(tomo))[0] measure_path = os.path.join(folder, "vesicle_pools.csv") - measures = pd.read_csv(measure_path).dropna() + measures = load_measures(measure_path) measures = measures[measures.pool.isin(["MP-V", "Docked-V"])][["pool", "diameter"]] measures.insert(0, "tomogram", len(measures) * [os.path.basename(tomo)]) @@ -96,9 +105,8 @@ def result_extraction(tomo): _export_results(tomograms, result_path, result_extraction) -# FIXME: update the counting and analysis of MP-V vesicles (include Docked-V, see Caro's mail for details) def main(): - tomograms = get_all_tomograms() + tomograms = get_all_tomograms(restrict_to_good_tomos=True) result_folder = get_output_folder() result_path = os.path.join(result_folder, "vesicle_pools.xlsx") diff --git a/scripts/otoferlin/filter_objects_and_measure.py b/scripts/otoferlin/filter_objects_and_measure.py new file mode 100644 index 0000000..d51b897 --- /dev/null +++ b/scripts/otoferlin/filter_objects_and_measure.py @@ -0,0 +1,81 @@ +import os +from tqdm import tqdm + +import numpy as np +from skimage.measure import label +from skimage.segmentation import relabel_sequential +from common import get_all_tomograms, get_seg_path, load_table, load_segmentations, STRUCTURE_NAMES +from synapse_net.distance_measurements import measure_segmentation_to_object_distances, load_distances +from synapse_net.file_utils import read_mrc + + +def _filter_n_objects(segmentation, num_objects): + # Create individual objects for all disconnected pieces. + segmentation = label(segmentation) + # Find object ids and sizes, excluding background. + ids, sizes = np.unique(segmentation, return_counts=True) + ids, sizes = ids[1:], sizes[1:] + # Only keep the biggest 'num_objects' objects. + keep_ids = ids[np.argsort(sizes)[::-1]][:num_objects] + segmentation[~np.isin(segmentation, keep_ids)] = 0 + # Relabel the segmentation sequentially. + segmentation, _, _ = relabel_sequential(segmentation) + # Ensure that we have the correct number of objects. + n_ids = int(segmentation.max()) + assert n_ids == num_objects + return segmentation + + +def filter_and_measure(mrc_path, seg_path, output_folder, force): + result_folder = os.path.join(output_folder, "distances") + if os.path.exists(result_folder) and not force: + return + + # Load the table to find out how many ribbons / PDs we have here. + table = load_table() + table = table[table["File name"] == os.path.basename(mrc_path)] + assert len(table) == 1 + + num_ribbon = int(table["#ribbons"].values[0]) + num_pd = int(table["PD?"].values[0]) + + segmentations = load_segmentations(seg_path) + vesicles = segmentations["vesicles"] + structures = {name: segmentations[name] for name in STRUCTURE_NAMES} + + # Filter the ribbon and the PD. + print("Filtering number of ribbons:", num_ribbon) + structures["ribbon"] = _filter_n_objects(structures["ribbon"], num_ribbon) + print("Filtering number of PDs:", num_pd) + structures["PD"] = _filter_n_objects(structures["PD"], num_pd) + + _, resolution = read_mrc(mrc_path) + resolution = [resolution[ax] for ax in "zyx"] + + # Measure all the object distances. + for name in ("ribbon", "PD"): + seg = structures[name] + assert seg.sum() != 0, name + print("Compute vesicle distances to", name) + save_path = os.path.join(result_folder, f"{name}.npz") + measure_segmentation_to_object_distances(vesicles, seg, save_path=save_path, resolution=resolution) + + +def process_tomogram(mrc_path, force): + seg_path = get_seg_path(mrc_path) + output_folder = os.path.split(seg_path)[0] + assert os.path.exists(output_folder) + + # Measure the distances. + filter_and_measure(mrc_path, seg_path, output_folder, force) + + +def main(): + force = True + tomograms = get_all_tomograms(restrict_to_good_tomos=True) + for tomogram in tqdm(tomograms, desc="Process tomograms"): + process_tomogram(tomogram, force) + + +if __name__: + main() diff --git a/scripts/otoferlin/overview Otoferlin samples.xlsx b/scripts/otoferlin/overview Otoferlin samples.xlsx index afe5097c5d0ca23b70977cbb6574d34e0ce56e60..c9e10d52f5c8659e82e68791744bc42baef49537 100644 GIT binary patch delta 5804 zcmY*d1yEI8*QP^2K~RAUm$-BzjdV##N?)YAQ~J=|mky;H=?3X;5V^E;Nk}90q2K)f z_r7c9%&a}<*?X_^tbNuqd*zGPiDN6tA)^o=U|?V%q}EnpQzIg!*1|XtseyDhT<;?d z@^)INqs8-}GL`4Gj4lh7HA?~@LVu7E#M5!18|715+;Z=Fc`+3)Ll_EnVmG1M+&-U?qu!}CJr z1~VcdSE_dKpj));ILcydBCGBQU)bGdMFq}U8~2(T^BR7p}Kw)=~vUfd;_q;W@QSHdqNVH zQ5F4Emc5E2O37QoI@wDpWsaJ|{i1^G81ypt_*IHh)jev_YOa5_F zcT3BrXb%Gw zT|~m#)?a6F64J&1>uiEBKi8wQ7sg|qp|Re8@YB9YW0EufeBqtzdyj87Y=qGszxbnz zS0@?L`h?9%LEnf3m4{byvVncflhGjcHInLcKGY`bbOnQh7D)D%YiaEjrji^whIJTy z5;_tBLLo8&!at8ojTiN~YG$DgH^mD2Ur>O^R`9KL}o0mV2EjJ$z3139(su#T3 zJ2&s-71x&Hi=V#7_em=B^hCyH{DA&Kg#ZuwgY-axw_HrsjL$i^+`V9u+ArT%fb>Lh z#D0wC()YQ9od~B#S{c}jp$f{->)rO>wbp#Ux7MRCmZX-1%;S3eM3+P%sdV4Wt3VaiHvg-NJ=`} zk1c{>FNRSV%eMjma=AHo->iN?EC;NQwcIkD-GcJ^5(bNoP9L){PaT1Wc%dJ8^|;-n zyi^JO9k&LKVBzZ6e%C(yG`4i!Z6Q-OnZApWy7JH(z0`n8Go1>;!b?19&9p_^28V}2 zH(l^1=92#_FI@NHPbZtlzQ6Fv)?8blH0(H^evlmJt4bQ^_(p*Zh(=A}t=kpV$KgN_ z)#^L*s_i<U(iJx6u2;Y75vaL zrj*o5#rxfLzM9H1dKI8Bv0Z!(=z1i|DXveQ0ugfUj=TG2*C0S3II&;psEXDVvG9HE zA{HVl46lii{xbxI#W=G_B>PYv$F5xnWe0ph7cbdjPpC zv{<&$#<1Y|hxC-vz!e%>wVYX+mkfBdVJ0%pAPG2)I>V$LzMh)l1kHEqRjyEjba)Z> zVnZIk{EoKG(GXziX=xmvqKZRV%r{LQpK<#9S9)mPw#~F`L20Rp%vqWg)u2Y25|Vex z9b@6V1_V~(P$HV=m|=li4Br0AH}jQ#1}loJxFlH$!Z;I3YeSiJL%n!= z4=eZMX4g(}ryQ2`<1|$6$1V0wKQ*y=^mR>K#Wm>W0&$6bp-@6oV;d=M2FfEsov1`5 zJ;KgJEQk^WRv%D|7FQpTiWaASBEa@aD$AT-F`edcZ(?1F^(?8_sBau#`+xk8G4g+_%WG(Sa@TJZg)Fq4*0m_B?q-uFzVnQO?ja z7q+8xfcgN`U>8MZp{P|NWjvz>3E%r03mGNz47!H8ch;8jyjvl$#Jrms!xT(~jVwit zj8JX%Dz3epE0n1vy!FyXu(g_4oXgEZJi`)~K86E_l6m@iqO5PCD7Y`C_WOT{sYd&6 z5XB2^u8^G40-5{UN%oV=m}nqQ*q4FE{Nj1u`=$DyJ3%vzPc~c%u-<+!C*EpH-1^Wa zoX5?9biML|6!OrXW3&NiGHe-XL2HJ zZ6&m$j8&ie6`PK(t3WYQhSY)pEnV)%Cf@AY3*6(}nH=@YUk7LM;HHpT+kGSd4-QhI zuLFpJ@*FVi0|RrUy?-Q_`K=(hON=;Bj=`HVeO&g`byYJW80&Kd^@qZbyc%A;{dmh_ zzvQ>?Dkyx`Y8%s5)13nBk@1~huTwNQ#)><}e8|4~%|MK9y#}r3ueUE<_zfMZZwz}w z+1h`Va0l=H89f?bj^Yoe>eV`ekFjT?kPjg#ikzKMM*s(SiYMXifUliLp5^P?&vmmid0oT%wj%D2gWg{DEd4N3(=To|}`}vz6Xo2fu7w z6s35hhw+uw$)l`XwX~T+_V)`oTy;J-D&8ai5tMHJaC|Tt0zw_xzXjz{NT^|lIB0;D zvQrK}flqt8|Ejm7lJc89h0;1(7rT#NmRdM2nSo7 zbn`~6q%gOFRoVO}yBuazf5DPvI$E^g9?b0xHy||Djx7(_q`Od-HWJ|0bFlQuyyfCo zy2wBybV7dKvkS-ORs6!lc#cGpT?5gV*Mrl=@e<*C&|Lbk;nC%aNm2H;!fUWV=!65Z{&x{31gej7F5IM^u8Y;;^bXp4IkI zT<~5D?}J?h0x9=rcfx`tkpMEg36>3iq=ej`ge%DFRYX{V;?gqs%7)?HxEH%A!L*et z+PG4TnuN>Y>j4E^+wH_ja$x{R-nXky}it#*iJh6udNLx zd1O)N7$XVWOv_yfKR~SzGEb2eRq5iZ6D1=y*UCA7DKf;8QVyL87If z&7%~&1G=&&*&dpfUwg5J>Dh1p{690m0L!=mEUC0!dTFftFvkN2oD#oQ6!ina}QEU>dAAIkppT_izOaGD}5KT}E$goo7@ zX==f-@s+4r0IOJYzO6D}_by3=b(1*MgK<{mo0V3Gd6FxL*+zot>~&iHXzZqzbZ^CS z(B0myEF-XN3s{nePfZhtI{q4S4&BmFbP{Lx{-FA{sqKgu#&kyYXU2``zzI{{TC=CY ztgi0c^R;rnqE~h}5^ZiSpGHy=o=DcyHRyPTk>0k51I?sXOn{wc~}+{r2fjkwBTsKDhza*0V6{Nn%8)oCYsT z3R}ekajxpVR&yIAsb%vKW!_%){<3cvb4Gn(g^HS59A|F~b&!I@=ZvizfZy&5?7$Xh zoMP4>{w8^s1Hr&@KH<5wr~ZOX;X1!U3zijk!pU#?QuB_(=MYg$Q>&BU8W#29UOG^X z`+nb8h@{gPr$jh9P3g@Ut}q&}kFTyjB(j6Xq{h>lCcz;X1AUF+91paRJeot0O~-5WfAYAwJY-6VvHfKX5P?^+w7 zh42p}e)Qd}S@j%Etesdt)+phl{JjXO6VL+Ms|pYv-20Wn6x;l8SYx2I*i&=q9z3zq zX++fd8+`hcLj_k|*Mn~t&v%f|5Pu_GzPxP)++jj8{c`hAMissWS$*oj=lvI6sn_36 zM|m@sb(6Z78AkJY&xq?tpD6{tUZWH`^JxaRE}x2kXeg6p{X&)nEh}d4N^|{a?zM@%|6#YKucM3`qv_{3RO?S$}_dRlACHX};wX`oky3U_RI;@J)IT;LCw;YJF zTvhOag`YaGYDYr?C{zu)>_TXdcI}**Ifxj!We!6wsIT7mShppoIa`Ye_kV znDH!9E1BZR%i2`-oV7|w?&$YEHA}wwLd_&6drIgkoiF;5z$0QrFGW6+8Znt{^gZ9m zfo9z8_0Cm@WGt~wMG$AuN$w1o2FrIOSX^;)uPBH6k@M(-1v5k$Hlqr1)fzK5+u8cp z56@BCy9w$qbP}{F;db51UtRzwb8p|bX>1P6l!2SlUXUk)q~aK}))C9CQ&og8Qs&W! z{K`G=_5#PLLX{O`VfD&r^-AI+NCPtx zrE|1+g=dJyR7_rtJ9;)SZjph;Dn?MREqg%?`cPp3)vERk1X_v|=md1r(G8*X{Omk# zgc5!%ebu-0BybXEeQNVyPcmQEuc*=BjP>_#s|HykgF#j%(pO0ew`SkY?iX?TA6&=m z`M!;!+P;H-=`xn(i|>~#@Ql$C*>s?&q6@gcz9G}soedbyiOo#586X|5hgN#YYqRC# zeTc>%FMQCrxIuGyNUH>{7h2ekpa>wHgTVJ&0i0CnzEp(L@m4wpwjV|dJ9#z2GG7fA z7iGrftV^p>5;tWW8O=#XYKOKGH(szMd8M1WJPqH%5~71vnGpUGaQaEHX@P?1uS^0d z!SP#@pkiZa$fVY1*J&axefo>2m?NXys>>4U9;?1N|&3+PN?oZO6?~;rpId?~pGTm?`^u z1lS^6Y7`L}qA^&hhFs#tchTD4jB5pjdUNw}VREFBz(@$2Kz^{?Wb0Li;+N7nTyDGS}UGT;#L`-?T|E)I}ET&2H^HpTXG$k6!a{ePJ@E|b^6x)lFso7 zIWSI;4kwa|K3x{d0cn9?czYArdY6TUbAYJk{k{GV$~x3H7t6)bjSB}JEk5EncdEDC zo?f<6QHftPs1Y{1RUz`4KfuNWDDWz1MC-*-|0(?te*9t&XP69sAw2|HYU+~WK2Z+~ z03Qr(!7AtWcd~yZy;Ut0@mI4da^jbLm;578tuJJF3+D=gd+WcyNs~d5j2lCXlaDon z&eC{VbNnPiY3&e~lci@~Xjt_pS%3<^;_tH0)jE#c=8O~X>nqjH-)<#vs=aBqU9b@n z61`fN&&0Q`@Ivgjx4L(qzQy{JnY_H0apUfY2nZ*SU7~+>h+x>n96*E;aV<_1)|BfG z+*U2AIkxy(DL&_7h1K_pcbvAp{v7Tz=oqW(jIbZ5PMtmsgi*&hX?Ff{TzNx4 z6=%QJ|9RApiFl#dXQeortR0emS0@HOi)J}bJNsJYV;QUq* zE&!Uw9P_FSwC#xrx}1IrrK-u3t{#*s@+oTHcFz=+`iSguwfd^DzsC7wD&k8RhyRhm z+mnl*T$w`D7NOG}`BES1IOU_9@9MPiy_8W_77Kts%!I9desPHL$V~O)^DkypLEj%+ zXElr@C4ZH6mXrrGwKhWQZ?6sZ3e5SZul5d1f@lS^1e1{edG*g*)1#RG!(!24<>~r=$yR7}xD-|ga>uF-+P-EcZ<6}sr)DVL(v7}PqU`!Bf zPLMq4z(hGX(!7zH+zNv2V6xjVg?yU(?zy5llN2&?-uU`Lhj`YYzU)_GckY)=FqnIy z#SqknP0wE!V>UyQfyXz|q&J@t6lCF}GF03Dc%;afycTTgEB;Dc#;I1be#t+j%4JBS zJ7D!nj6Z+1q1;(-owu%3VtmRa2nv9$G(EtqP7bEVQEFdRtRd>^{!JF~kT9uJ(f*2% z+CrXKEuniU4bk8t_8>ewT~mprYEQS7V~!x5;l$p(uAN?BW7f^GiszBty>!}=F#}e) z;$=3a-6+k0)*N8?THbQRZ`}MOj#6R<04g@5;CjRTBk7R^;HZqovuPwQP1p|Ra6av% zObs~~A8lrs1naswOS^^-o(b92Ax?&WZYKbeY}ARE&Q-3_zGT)=>mikI64oror=<)= zL%5hlG+n0N5GKN69a8zTEP&|xviG_#I#^}{_g7XVPY=`lTz{w zO%lfD^5R=K>x1Tl$DsxjKgnn`k{BDeN7NJjUjePpudf9exeBZ%qc^Rps3N{7Y$;XA zZC9j~(~wA;ajs2~B^M~Y`wBO|go>QH%P7>y!Z~$>pt-_{UVQ__)n7R0^-rL@!@*d3 znt1p*xFsz3SQr?^*ccf9_Lth1*g%{+he;*{!Gs?Axw^DvKJ}WDppH1WF!2kt{5@cT zw=F`Rr`5dUGWUIGiMIhiYbMQvHgSFWAjyRZY9K@hb)$p^c#n+vgFxuazyNisy$~}6 z2eJCLTxY06tb-P^xb%AM>KN7EK$w7}2Z^8zuW0Dbi)UHkK-(29&XsgV=d z+XO>psT>4C)hkC~G!04Z;b*I>lLU^lmgLc9TK2)yiBYplr7@VCoX^9}I7l*}i!9@|-&XEc zr(FS6I-jYX<8ZE9VU2?{v?qZjoqR=Bp=Ti1#axJT zIf+<_-~OuXNFX3*;yFoXNYT{(C7#lt4oRH!ceZY+){T1g1Qym8VZem7&BiO3jBdvO z%K4WXd`>>@lf(NL6j+S~`;);TQN_Mo$C)AOSwwVRIh=-9=-NuPYRirrwz^7jjfqHn zP$Fp52XFuz{N_~Nzr`F-^eQv0%w#`8RpUJbUcE<{x_)0d*o)IwhvtndyMxS%aF)PL z$*f*NeGIIK6H%L{mEk7)zS|5o2*GwTX$q14%_+33{oP{}Kyu}iET1e&OAwine?}56 z(PmOwY@2X!h{-MgMK8y(lD`(KK}TCVXgoO{I86Yk$wQVBaWrX8_I*MPD(JATY z?hhfh{|^in^r;!%8s4=}56v48Xacyb-gR{bgfmYf3I>W1`QlL z-Bl5@PfD{u@$ZRFau7`d@AOgqfT{m>$m){?Y6Lj3qADZo11u|SrqCXDd+tT+sQ#!K zSqPL---RVP%KsO?Sppr`FD|^(DYQnI?e2$8vb@fg{ETF@k@2(TUEEeH+3#$^)TeBs ze_1Z{!w7XJ6fE^eqd8ou*t*l>1!tIcO8{w#@=^x9?b~7PIOG5=&4jz~;KLTzS)p<} z&muPt?1Jwc*eB(#>rbzfm2S((RAFWJaqTJw5#S(`)Sec3+A1jRYyLB;xsGk*_K<*W z)A^e>#jur&pvSb>R3%9I487EiFioPWD|FY2hVIXe3)-c6frrx9*sWDd<{9*7yItKS zFX@)puKR*g#(*?- z^RxjD0Un_*T9dY#f&~wQ#KVQ48cj`KO8G6X76$bbX0iQI$Ce0${(KTm&F;uTxy2qL z%9ViXLM!0yj?mR-)U3=9jM7*TD5!2`n=>bAo5SA4eux7ZeP4UIm2lYHZ`VUCx@$(E zlPswXX+n~NN-I()Av(_pV!rCk2D(Q(=v22FM$D#~nGNEY3eCE)uQ6Cs8n?ETWgo0v zo*#bZx2Iz4vwc8HtG33#q8V%Jb-mz?Tjf*v@|^Wgb=ZFLB6E}K9tMU0%irZ8nUo)9 zs_j)ELEd&ymDl(db8R%`d+AKVnuo2J;ogwW)Dkth3XOb4;&;yW;3rsAiBSXWYnoV< zsuvTl5MNH$_q>=G@P>pH;iWs?E)yNmszJ&7A2vrLr_z%gpEH4EV;sVYH;G1us%qQ0 zR0ngB{~mjwzd{gHYq+ZhZrhT0-jikNN7VXGR12BOXO<*?%a7t42AlT zG>Ya*jg%3m8>PAX-6rQyngB(yDlbQ*p7jnXbHzHi1@;cF%)K{H8OY?Nl=D**e|4fSn83mlx5BtCp2 z*cay!+|KOfFFHaiQS`6R_-@>||7>KW30umiYff*bE?@QWD?C*XSGIu8aDHOP(Mj_2 zA*B5f$`;TenH_J~Hp(iBye$G%8$aTrG^x^Es#wp&tR(`;zp?Dz`Zmb~dzmvus0tmG-_jfmSopN9`bA~ zg?C?gjGG#x4~7B-l}@yv2q4uD|iR`10h~wCP=v3Ww@@Nk`=82AP?9o|g~y z(;t8PMvREel&K*Ndls6phhLmvx*g@lTr)}G#_Ul54DjjJP(9s?nrE^yA%FeIx)K=S z(=E;{R0b4#WNprI|3yt5@|6{T3uDdauAn6h27bHU#86L;G|55}m<&bPyQ%x?6ZuW9 zkP08J1jTcx)Lr5Dx*SSZuxU>~Zn}rvTLg!_E&RBHI z=e0N{Sl^H4*KX54I>DGlt^Naic=1=0YWgv=X|1b)PYg2)2QZ%tAzsAHd*a(Na`ec+ zr-A~sSQK*>P!BsPRGz^-OQb{%2PKrkv5)Zmb*H0m`8iv292sSmQjhW@3mg<|gqTOV zF$7Dao3#i6)d-xpMVC2Pv0VOXdJ4OhpZANK;Fs|82h$S(7in8WHRH3GU?>P1;hK#i?QfYps z4q~Lo-ye-3r*`~eb0QGvDT|bAUE{4N8P|%Ts#j%+Xjwm7BsZ&aJ7Ee4>%PCoQmrwoF#|TGc7ul8BYSVOf?k9b*{9* z+v1fQx)oK_v6*bV_xG!#co`W3EfGK7Md;E{1Sm}1GTppXF|b>3ff?S%hZTO+=>-l* z()12r21t7&rEIV=wZN5;Dx;*=dJssfqpzHZCkB0KfPzQ7nd-4^JDzK3SH9czC@xcvW9AJz4iRY!MR%U_U z0L}pI3-w;Vm=G>AL82Rm{;x)gLB6=%7b3l3hCfswvt_xGDFC_teL zxNbu$(_&P|Qv2Y0R!H}JeXeWxV3A7AsnpuQMfJgXrO_{b_Z7kq%QU(lBTpVZzWoU< z54M*!=C-nsnAB#{duVp%MV18@c`%$SmtNAnr)<@u>2F2&iNRY6*V@Cn0dM7eX7R}7 z(!TTJRe)h~Q_QFc`b=b!q9Q}JHer$xub)*wA>L`!t*Mhx!%3_#8=gFzjr&~8 zVeL(%T8>p*ll}K2FK!?Oo2|fw;j~%`rn+A<2xA_38yn8)NxbN+fL@xI{-k`VTIB&k zpPJ>`+|s8cf*8gu-kwL&y%xjrvBj2RkQD|@L7?K^uAf-ncPu&C~K z&+q+t8D~D)jZuG042+{Y+T-7p2b>uIhFS5D%tiFxbCjZPfTk8beC{NhQdn|*LBu9I zaxzfmD$`(z+_nWUcRb2}9NMcM!=YzBt@xd=j(2&DAtuMZ%&yW{FAtOH$ydB%$9l}Y zdyPEgDg0lTUV-~&j%3s}lfN40PdM;rl8yhUc4O?cD)r}9@Xsl7Ovt~;b1Jd%x(7pm zJ%SH!-=MQt54`3*1w&QU3C8P&utAq;X>+v=)RYc4OuHr!i#W%7_Ohmm)2n@5eU5m! zT=3^v&kO4Ho>~Tt9`nKCe&CiJq6A*N<=)w@r*dVs$5}=ttbd<%eiW=RPF@_U8Q<#Y zn8CQc>)ga~iNOEdq Date: Thu, 12 Dec 2024 21:56:40 +0100 Subject: [PATCH 27/30] Finish imodexport and implement napari vis --- scripts/otoferlin/common.py | 2 +- scripts/otoferlin/export_to_imod.py | 55 ++++++++++++++----- scripts/otoferlin/make_figure_napari.py | 73 +++++++++++++++++++++++++ synapse_net/imod/to_imod.py | 16 +++++- 4 files changed, 131 insertions(+), 15 deletions(-) create mode 100644 scripts/otoferlin/make_figure_napari.py diff --git a/scripts/otoferlin/common.py b/scripts/otoferlin/common.py index b6d178d..9dd0ca7 100644 --- a/scripts/otoferlin/common.py +++ b/scripts/otoferlin/common.py @@ -60,7 +60,7 @@ def get_all_tomograms(restrict_to_good_tomos=False, restrict_to_nachgeb=False): table = table[table["nachgebessert"] == "ja"] fnames = [os.path.basename(row["File name"]) for _, row in table.iterrows()] tomograms = [tomo for tomo in tomograms if os.path.basename(tomo) in fnames] - assert len(tomograms) == len(table), f"{len(tomograms), len(table)}" + # assert len(tomograms) == len(table), f"{len(tomograms), len(table)}" return tomograms diff --git a/scripts/otoferlin/export_to_imod.py b/scripts/otoferlin/export_to_imod.py index a810eb5..35e5a72 100644 --- a/scripts/otoferlin/export_to_imod.py +++ b/scripts/otoferlin/export_to_imod.py @@ -1,11 +1,14 @@ import os +from glob import glob + +from pathlib import Path from subprocess import run import numpy as np import pandas as pd from tqdm import tqdm -from synapse_net.imod.to_imod import write_segmentation_to_imod, write_segmentation_to_imod_as_points +from synapse_net.imod.to_imod import write_segmentation_to_imod, write_segmentation_to_imod_as_points from common import STRUCTURE_NAMES, get_all_tomograms, get_seg_path, load_segmentations @@ -18,7 +21,9 @@ def export_tomogram(mrc_path, force): output_folder = os.path.split(seg_path)[0] assert os.path.exists(output_folder) - export_folder = os.path.join(output_folder, "imod") + # export_folder = os.path.join(output_folder, "imod") + tomo_name = Path(mrc_path).stem + export_folder = os.path.join(f"./imod/{tomo_name}") if os.path.exists(export_folder) and not force: return @@ -27,35 +32,59 @@ def export_tomogram(mrc_path, force): os.makedirs(export_folder, exist_ok=True) - # Export the structures to IMOD. - for name in STRUCTURE_NAMES: - export_path = os.path.join(export_folder, f"{name}.mod") - write_segmentation_to_imod(mrc_path, segmentations[name], export_path) - # check_imod(mrc_path, export_path) - # Load the pool assignments and export the pools to IMOD. assignment_path = os.path.join(output_folder, "vesicle_pools.csv") assignments = pd.read_csv(assignment_path) - pools = pd.unique(assignments.pool) - # TODO: discuss this with Clara, not sure how to handle this with the irregular vesicles. - radius_factor = 1.0 + colors = { + "Docked-V": (255, 170, 127), # (1, 0.666667, 0.498039) + "RA-V": (0, 85, 0), # (0, 0.333333, 0) + "MP-V": (255, 170, 0), # (1, 0.666667, 0) + "ribbon": (255, 0, 0), + "PD": (255, 0, 255), # (1, 0, 1) + "membrane": (255, 170, 255), # 1, 0.666667, 1 + } + + pools = ['Docked-V', 'RA-V', 'MP-V'] + radius_factor = 0.85 for pool in pools: export_path = os.path.join(export_folder, f"{pool}.mod") pool_ids = assignments[assignments.pool == pool].vesicle_id pool_seg = vesicles.copy() pool_seg[~np.isin(pool_seg, pool_ids)] = 0 write_segmentation_to_imod_as_points( - mrc_path, pool_seg, export_path, min_radius=5, radius_factor=radius_factor + mrc_path, pool_seg, export_path, min_radius=5, radius_factor=radius_factor, + color=colors.get(pool), name=pool, ) # check_imod(mrc_path, export_path) + # Export the structures to IMOD. + for name in STRUCTURE_NAMES: + export_path = os.path.join(export_folder, f"{name}.mod") + color = colors.get(name) + write_segmentation_to_imod(mrc_path, segmentations[name], export_path, color=color) + # check_imod(mrc_path, export_path) + + # Join the model + all_mod_files = sorted(glob(os.path.join(export_folder, "*.mod"))) + export_path = os.path.join(export_folder, f"{tomo_name}.mod") + join_cmd = ["imodjoin"] + all_mod_files + [export_path] + run(join_cmd) + check_imod(mrc_path, export_path) + def main(): force = True tomograms = get_all_tomograms(restrict_to_good_tomos=True) - + tomograms_for_vis = [ + "Bl6_NtoTDAWT1_blockH_GridE4_1_rec.mrc", + "Otof_TDAKO1blockA_GridN5_6_rec.mrc", + ] for tomogram in tqdm(tomograms, desc="Process tomograms"): + fname = os.path.basename(tomogram) + if fname not in tomograms_for_vis: + continue + print("Exporting:", fname) export_tomogram(tomogram, force) diff --git a/scripts/otoferlin/make_figure_napari.py b/scripts/otoferlin/make_figure_napari.py new file mode 100644 index 0000000..6763212 --- /dev/null +++ b/scripts/otoferlin/make_figure_napari.py @@ -0,0 +1,73 @@ +import os + +import napari +import numpy as np +import pandas as pd + +from synapse_net.file_utils import read_mrc +from common import get_all_tomograms, get_seg_path, load_segmentations, STRUCTURE_NAMES + + +colors = { + "Docked-V": (255, 170, 127), # (1, 0.666667, 0.498039) + "RA-V": (0, 85, 0), # (0, 0.333333, 0) + "MP-V": (255, 170, 0), # (1, 0.666667, 0) + "ribbon": (255, 0, 0), + "PD": (255, 0, 255), # (1, 0, 1) + "membrane": (255, 170, 255), # 1, 0.666667, 1 +} + + +def plot_napari(mrc_path): + data, voxel_size = read_mrc(mrc_path) + voxel_size = tuple(voxel_size[ax] for ax in "zyx") + + seg_path = get_seg_path(mrc_path) + output_folder = os.path.split(seg_path)[0] + segmentations = load_segmentations(seg_path) + vesicles = segmentations["vesicles"] + + assignment_path = os.path.join(output_folder, "vesicle_pools.csv") + assignments = pd.read_csv(assignment_path) + + pools = np.zeros_like(vesicles) + pool_names = ["RA-V", "MP-V", "Docked-V"] + + pool_colors = {None: (0, 0, 0)} + for pool_id, pool_name in enumerate(pool_names, 1): + pool_vesicle_ids = assignments[assignments.pool == pool_name].vesicle_id.values + pool_mask = np.isin(vesicles, pool_vesicle_ids) + pools[pool_mask] = pool_id + color = colors.get(pool_name) + color = tuple(c / float(255) for c in color) + pool_colors[pool_id] = color + + v = napari.Viewer() + v.add_image(data, scale=voxel_size) + v.add_labels(pools, colormap=pool_colors, scale=voxel_size) + for name in STRUCTURE_NAMES: + color = colors[name] + color = tuple(c / float(255) for c in color) + cmap = {1: color, None: (0, 0, 0)} + v.add_labels(segmentations[name], colormap=cmap, scale=voxel_size, name=name) + v.scale_bar.visible = True + v.scale_bar.unit = "nm" + v.scale_bar.font_size = 18 + v.title = os.path.basename(mrc_path) + napari.run() + + +def main(): + tomograms = get_all_tomograms(restrict_to_good_tomos=True) + tomograms_for_vis = [ + "Bl6_NtoTDAWT1_blockH_GridE4_1_rec.mrc", + "Otof_TDAKO1blockA_GridN5_6_rec.mrc", + ] + for tomogram in tomograms: + fname = os.path.basename(tomogram) + if fname not in tomograms_for_vis: + continue + plot_napari(tomogram) + + +main() diff --git a/synapse_net/imod/to_imod.py b/synapse_net/imod/to_imod.py index 5832213..99f2407 100644 --- a/synapse_net/imod/to_imod.py +++ b/synapse_net/imod/to_imod.py @@ -37,6 +37,7 @@ def write_segmentation_to_imod( segmentation: Union[str, np.ndarray], output_path: str, segmentation_key: Optional[str] = None, + color: Optional[Tuple[int, int, int]] = None, ) -> None: """Write a segmentation to a mod file as closed contour object(s). @@ -45,6 +46,7 @@ def write_segmentation_to_imod( segmentation: The segmentation (either as numpy array or filepath to a .tif file). output_path: The output path where the mod file will be saved. segmentation_key: The key to the segmentation data in case the segmentation is stored in hdf5 files. + color: Optional color for the exported model. """ cmd = "imodauto" cmd_path = shutil.which(cmd) @@ -83,6 +85,10 @@ def write_segmentation_to_imod( # Run the command. cmd_list = [cmd, "-E", "1", "-u", tmp_path, output_path] + if color is not None: + assert len(color) == 3 + r, g, b = [str(co) for co in color] + cmd_list += ["-co", f"{r} {g} {b}"] run(cmd_list) @@ -172,6 +178,7 @@ def write_points_to_imod( min_radius: Union[float, int], output_path: str, color: Optional[Tuple[int, int, int]] = None, + name: Optional[str] = None, ) -> None: """Write point annotations to a .mod file for IMOD. @@ -182,6 +189,7 @@ def write_points_to_imod( min_radius: Minimum radius for export. output_path: Where to save the .mod file. color: Optional color for writing out the points. + name: Optional name for the exported model. """ cmd = "point2model" cmd_path = shutil.which(cmd) @@ -210,6 +218,8 @@ def _pad(inp, n=3): assert len(color) == 3 r, g, b = [str(co) for co in color] cmd += ["-co", f"{r} {g} {b}"] + if name is not None: + cmd += ["-name", name] run(cmd) @@ -222,6 +232,8 @@ def write_segmentation_to_imod_as_points( radius_factor: float = 1.0, estimate_radius_2d: bool = True, segmentation_key: Optional[str] = None, + color: Optional[Tuple[int, int, int]] = None, + name: Optional[str] = None, ) -> None: """Write segmentation results to .mod file with imod point annotations. @@ -237,6 +249,8 @@ def write_segmentation_to_imod_as_points( the radius will be computed only in 2d rather than in 3d. This can lead to better results in case of deformation across the depth axis. segmentation_key: The key to the segmentation data in case the segmentation is stored in hdf5 files. + color: Optional color for writing out the points. + name: Optional name for the exported model. """ # Read the resolution information from the mrcfile. @@ -254,7 +268,7 @@ def write_segmentation_to_imod_as_points( ) # Write the point annotations to imod. - write_points_to_imod(coordinates, radii, segmentation.shape, min_radius, output_path) + write_points_to_imod(coordinates, radii, segmentation.shape, min_radius, output_path, color=color, name=name) def _get_file_paths(input_path, ext=(".mrc", ".rec")): From a44ab059397f31754b9847826f79772a061905c7 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Thu, 12 Dec 2024 22:02:33 +0100 Subject: [PATCH 28/30] Update figure script --- scripts/otoferlin/make_figure_napari.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/otoferlin/make_figure_napari.py b/scripts/otoferlin/make_figure_napari.py index 6763212..c684d69 100644 --- a/scripts/otoferlin/make_figure_napari.py +++ b/scripts/otoferlin/make_figure_napari.py @@ -49,7 +49,8 @@ def plot_napari(mrc_path): color = colors[name] color = tuple(c / float(255) for c in color) cmap = {1: color, None: (0, 0, 0)} - v.add_labels(segmentations[name], colormap=cmap, scale=voxel_size, name=name) + seg = (segmentations[name] > 0).astype("uint8") + v.add_labels(seg, colormap=cmap, scale=voxel_size, name=name) v.scale_bar.visible = True v.scale_bar.unit = "nm" v.scale_bar.font_size = 18 From 5d4f7cb6329f439efb49f5c7ae30c74b5785df00 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sat, 14 Dec 2024 09:21:22 +0100 Subject: [PATCH 29/30] Update otoferlin analysis --- scripts/otoferlin/export_results.py | 26 +++++--- .../otoferlin/filter_objects_and_measure.py | 2 +- .../otoferlin/handle_ribbon_assignments.py | 57 ++++++++++++++++++ scripts/otoferlin/make_figure_napari.py | 9 ++- .../otoferlin/overview Otoferlin samples.xlsx | Bin 8629 -> 8612 bytes synapse_net/distance_measurements.py | 16 ++++- 6 files changed, 96 insertions(+), 14 deletions(-) create mode 100644 scripts/otoferlin/handle_ribbon_assignments.py diff --git a/scripts/otoferlin/export_results.py b/scripts/otoferlin/export_results.py index 70f3a15..f83ff3c 100644 --- a/scripts/otoferlin/export_results.py +++ b/scripts/otoferlin/export_results.py @@ -47,20 +47,30 @@ def load_measures(measure_path, min_radius=5): return measures +def count_vesicle_pools(measures, ribbon_id, tomo): + ribbon_measures = measures[measures.ribbon_id == ribbon_id] + pool_names, counts = np.unique(ribbon_measures.pool.values, return_counts=True) + pool_names, counts = pool_names.tolist(), counts.tolist() + pool_names.append("MP-V_all") + counts.append(counts[pool_names.index("MP-V")] + counts[pool_names.index("Docked-V")]) + res = {"tomogram": [os.path.basename(tomo)], "ribbon": ribbon_id} + res.update({k: v for k, v in zip(pool_names, counts)}) + return pd.DataFrame(res) + + def export_vesicle_pools(tomograms, result_path): def result_extraction(tomo): folder = os.path.split(get_seg_path(tomo))[0] measure_path = os.path.join(folder, "vesicle_pools.csv") measures = load_measures(measure_path) - pool_names, counts = np.unique(measures.pool.values, return_counts=True) - pool_names, counts = pool_names.tolist(), counts.tolist() - pool_names.append("MP-V_all") - counts.append(counts[pool_names.index("MP-V")] + counts[pool_names.index("Docked-V")]) - res = {"tomogram": [os.path.basename(tomo)]} - res.update({k: v for k, v in zip(pool_names, counts)}) - res = pd.DataFrame(res) - return res + ribbon_ids = pd.unique(measures.ribbon_id) + + results = [] + for ribbon_id in ribbon_ids: + res = count_vesicle_pools(measures, ribbon_id, tomo) + results.append(res) + return pd.concat(results) _export_results(tomograms, result_path, result_extraction) diff --git a/scripts/otoferlin/filter_objects_and_measure.py b/scripts/otoferlin/filter_objects_and_measure.py index d51b897..1479bbe 100644 --- a/scripts/otoferlin/filter_objects_and_measure.py +++ b/scripts/otoferlin/filter_objects_and_measure.py @@ -77,5 +77,5 @@ def main(): process_tomogram(tomogram, force) -if __name__: +if __name__ == "__main__": main() diff --git a/scripts/otoferlin/handle_ribbon_assignments.py b/scripts/otoferlin/handle_ribbon_assignments.py new file mode 100644 index 0000000..8ac3586 --- /dev/null +++ b/scripts/otoferlin/handle_ribbon_assignments.py @@ -0,0 +1,57 @@ +import os +import pandas as pd +from synapse_net.distance_measurements import load_distances + +from common import get_all_tomograms, get_seg_path, load_table + + +def _add_one_to_assignment(mrc_path): + seg_path = get_seg_path(mrc_path) + output_folder = os.path.split(seg_path)[0] + assert os.path.exists(output_folder) + assignment_path = os.path.join(output_folder, "vesicle_pools.csv") + + assignments = pd.read_csv(assignment_path) + assignments["ribbon_id"] = len(assignments) * [1] + assignments.to_csv(assignment_path, index=False) + + +def _update_assignments(mrc_path, num_ribbon): + print(mrc_path) + seg_path = get_seg_path(mrc_path) + output_folder = os.path.split(seg_path)[0] + assert os.path.exists(output_folder) + assignment_path = os.path.join(output_folder, "vesicle_pools.csv") + distance_path = os.path.join(output_folder, "distances", "ribbon.npz") + + _, _, _, seg_ids, object_ids = load_distances(distance_path, return_object_ids=True) + assert all(obj in range(1, num_ribbon + 1) for obj in object_ids) + + assignments = pd.read_csv(assignment_path) + assert len(assignments) == len(object_ids) + assert (seg_ids == assignments.vesicle_id.values).all() + assignments["ribbon_id"] = object_ids + assignments.to_csv(assignment_path, index=False) + + +def process_tomogram(mrc_path): + table = load_table() + table = table[table["File name"] == os.path.basename(mrc_path)] + assert len(table) == 1 + num_ribbon = int(table["#ribbons"].values[0]) + assert num_ribbon in (1, 2) + + if num_ribbon == 1: + _add_one_to_assignment(mrc_path) + else: + _update_assignments(mrc_path, num_ribbon) + + +def main(): + tomograms = get_all_tomograms(restrict_to_good_tomos=True) + for tomogram in tomograms: + process_tomogram(tomogram) + + +if __name__ == "__main__": + main() diff --git a/scripts/otoferlin/make_figure_napari.py b/scripts/otoferlin/make_figure_napari.py index c684d69..ef515d1 100644 --- a/scripts/otoferlin/make_figure_napari.py +++ b/scripts/otoferlin/make_figure_napari.py @@ -18,7 +18,7 @@ } -def plot_napari(mrc_path): +def plot_napari(mrc_path, rotate=False): data, voxel_size = read_mrc(mrc_path) voxel_size = tuple(voxel_size[ax] for ax in "zyx") @@ -42,6 +42,11 @@ def plot_napari(mrc_path): color = tuple(c / float(255) for c in color) pool_colors[pool_id] = color + if rotate: + data = np.rot90(data, k=3, axes=(1, 2)) + pools = np.rot90(pools, k=3, axes=(1, 2)) + segmentations = {name: np.rot90(segmentations[name], k=3, axes=(1, 2)) for name in STRUCTURE_NAMES} + v = napari.Viewer() v.add_image(data, scale=voxel_size) v.add_labels(pools, colormap=pool_colors, scale=voxel_size) @@ -68,7 +73,7 @@ def main(): fname = os.path.basename(tomogram) if fname not in tomograms_for_vis: continue - plot_napari(tomogram) + plot_napari(tomogram, rotate=fname.startswith("Otof")) main() diff --git a/scripts/otoferlin/overview Otoferlin samples.xlsx b/scripts/otoferlin/overview Otoferlin samples.xlsx index c9e10d52f5c8659e82e68791744bc42baef49537..6380dfbf3215d2357f3f081fd79f0e65dfe800fb 100644 GIT binary patch delta 5048 zcmZu#2T;>Z(+@>@NhmS2KmaLHq!W4-A|)WbNiQN@1ZnygL5egXQUgc_L0ae_MFfK& zC^ZTQi1aQ9QUpHqo%!D9`R2Qsn=A9%y}jGrx!rBhDAJ%c)FmNh0#Q&BD*kcoq-m86;_Z5ov=u7$K1)wP87@2`&s{MN|sk?R_E=404NIe-%h z!Ce+q9vJmyL1eFpvD}5dy}A>8O9^{A6_mo-N(`UwDu5Z zL|ZA7v1_ekTP>!?>*0u=Dy3-?%)Z_Q#p-X!9cHrRLNY^S7UxhAq#u~-l;X~?n~3NQ zfPHG0I5d+(PHC)$5{*MYf^jWQ;o{c* zkSuS(fgW#ho0t90E)-19LBC&4ewOQ!u2&dUIz^-33G|39bJJ?{qHX1XN?x{pIg#E$ zN}kxVzOM>yVC=!#%30o9f931+RQ3%eKw^Hc;L=Ulox(#oc+$~%bNad5Zotoe2Ixl7~6L>s& zpRAnRVM9}s*MouDuH6ZU344%7xi+m{QeNobkoEA%m)He57h?I9RrcLjBk$_bd7~Vw zT;u$xrF^4Oi6ABCG)Wec%s9e?o-3auCiDFVXHi7mZq@3I7AqTO?6_&Ir7a_N?#X-P zTf+T)36-FvU)LTas4Oghhz@&*Ira!G<$T{5lI8hP_FJ`813z>Mcr|I#u=r}Sx;${S zVrj7aR^H@mb^da>&4>VB8<~*4Kr>x*hU*qWG3=b2z{Sn5yTKjf7QLF+lR%o*h*)oeh0a;#M%8MZ7iIHE}?DhQE-D`^e zO62M{<=iVX@gl_lP+BYfl_6FobiNN?m0T1T7jr1OwQb3U|K93RsD1o%STCai!m}A- zu+gt#3|Ha`8$dIzN`@^TSbGR@>s0U%IHl8=(X3wNYK^=Np>F?f$$<00qsK=euLC@`CNMsBD} zP9a!ndvk^a1mficf&S(G7$Q1eKwLD)%d^nt^HYmk2{cW(nITweSJ=NIr9 zrYW6&pNJ#%^BaJ-D>}ihuW8)axU?BKs|&vIwYyt8FqXD;Dj9t3b`Z+9pPGjT9}~B; zr}TceuEB2j`T=(o+Od2&=ng{kDSxU`XT=v?RNrm%{nyMRMKt;^7W>$wmog*MNz=Fv z?)52Cyv@v*CW)m4%*0!P(OQYK1UUr;{|_A_s1=-sUt(_^jnaJR2f%qv?i}428@}h{ zBY)6OZ-9qD1fSy+Xtw#b+|IvGY@;+G0ApCc2LattT3Sr811|jSX+T>Ai*4lXb$ni@ zA$NFLY0&*OG!tBG3hdBB1r_MUcAGCFyniH`^T~xY54EjmT0?bZtnIjcF8 z`?W@N#sDl*rGMzVHe|O)-$mWY0pmJ?XUErOm#s&poN}}OY^&N+cfZS7U{Unmr^(|N zP@eSmX67f6kP`CL^xc;>bo9@a-y__cL|lwUpHU7yvypdUs*31Yfus+U?w`yTqR)H~ zH+IT1q-;8sJ%BnBX`UWYNd#^OqZy{+r!4{?S4;&Np1!L6&=g}4-$B7K^vfm9J^7o6 z^1Q{Tt0~7BVz=}@cm(ApUYGITH(!$0vb4pBDlTO6xuBlNgfd)5O1GLSGyc@=5jew` zoPIp28`E-a5gTP`3?a$PIw!aow$jjuJ~(e`-gqvmumo71ktLmuIL&UnFCyil0n>J`PQT?eS70m3AECm1(6t zmwuTGZUu}KnsJlia2Bw?tLmdHe-h#a8zxj~B1p++&CJO7Rwc2>sPeNgFN$I}bii`| z%dM5<@H^QoV8g@<=$hYfZE35M?*7OexygS29p_3m$}ZR4-o)w2Dml!9uyT**jr%_{oKdgeN73SE`(s_JDD=MY!bE>S62uDMNnK)gm* zzQ%i&OvT>7a#bsDtremlpm5hiTV|V^c0AhL>_OQw#PhWz39-yfOX58>DyT-=nnuMM znaa-B$w#VR0v0OouMek=!&cu!(E7Qms=}8qZaLc9SM{#1Jn9X-#kx>Ma)Y;~bwr@^ zp6JN4F(11&$neA{sU4t46Lu~Ri0@n~b8g+YB}R?wzfH8Xh^T)}(5FH{S*LUU{VfOK`rR z_7C<__PR+#7eo@Rej09`P4e*iMluDyR^HB|iEJ^}I(X2%q^gI0jJ*H)kDTp;wa@68 z{V-tVWHWD+{&Aprn5(uRAgi)lUWeCMau#^Ik^9M;Oa=4)*+(-Zb9wd>zGj zUoSXmMewu(znYU#Y(W_xbEDjUCKh}19SgS&DZFrJ)t~irn$ecSUml{-z}O4M|H$Us z?LwxzJ$C1L85bX8l$M`UcO=g+5^p~~ak)+ihclt{<;G3L#+Qa&zhQB~1O(RX9?Q4coxbkFag*vHx?52=bs_|f;Uclu2J%p)joBhyD53gwMC z>YJfTwvnUz)r3@kt;PtjzwOJyQBXF!utoFJ6sqb!RmXeCw4ozWo~0~RpSACP+Rr)+ z(mA~q>6FapfYc;j*%P^OC_bgTlOIN;L2j{@Y!E&##qoZx#f!&IGfYeFR$P=pVSH9g zf4HPTrR1)Py0H32aC%sH52f%{Y^3JvtBD6eWhv+O6IqeOb=y(^$ZkSl0rrle{*93E zL%usUiX)t0B%Q+de2hMvo`je|XG$cBz;l8PZMbL6Pyjw>2%CH25$Ch&Kt_5WF^>~a;T4Z6vu_hVq*v0rjU>Nf@4-xd_Ll35cvqh*HB)+!$nJAurAptJ2jLO(ucLD) z^841LLkb*nxw<@ndg>=dphW?NcfY!Vobg5>RhbfF>_%pXXXa zoKNynraYlr=Vqh(Wn=!awyWs(DGB|h-(pAHSW<$qBUc)zUiv&>U*^~VWl1D(B=EA6j}%x{i(oF=DKzH+W8cRopazTtYMZM?g2 zoMo;kt%M%|Fx*yj{%J4nqHTo?P^m6%G_4qxMHQ_`D(H{Prjb{qEiR1qo0?{PK(uno zMi1FCd;k;pxS__~F7O85#)wWc#-0$Ic*AvqHBGS{W3W4u|!Wg+bn^Fx|H2Vz1Q z#RYhv%!=rNfUcEkukCK{P(DfhrFE*9lD#700!k%7A7Se^di#m@Q_K1C2LaY`8CX3< zualW^jjeS=T-~#j;!0}kNr}Nv2R{QYX-PN-{TuWf`7#%`(v z32`02u_hX(w`Fu}o~aBab8}7U;M@1|N@>aL@ok6W%A$pZKDUln{Sw^>`RH7t#71BS zZz3~HRHbw;#P)4B50xQsFS)Y39SP}spyUkrL5*<3QAVepyIj_lLS^CLwodHT{%@+eqruSP*g{REWf zi;eO=Shn;mA8zT8+$1Bw<0AzX$ey#XH@h9VA6I&2jR;&H6 z_(1W!)F}86LE(K+dP@dA!i5U$u2G{R_-4B5y*&!3g-tJ=`@_!3{zUBFo>fB1mq4H< z*#C*x>z*)j{wHHEWqM8eH)D5Y5eH1JR@}I9J2YGe#T4Gm5{k{@OLS?g?!@1~8s?FE zw<*-seH;$5gAPMq8KX*|A|d5+Y7`SkSH54MG(%Kue>bL* z#-h6Gx*AG>Pw%PO3|mw!qmV$$`lTqkIUMfFuQv^HGB&S*>5vHLN4h&UcY%+7A}}l( zlEYiP!aMj5n(&ATMkSyY8ttAm?^8u?-5N(pd-t7istT+b<@F}9{eC&mR$$vq)vTe$ zIaB8#=L_sjr{Z-}O4TnbV-$?gW^$HGM1jHt?&{N%_l{AxNM2cSeMbq4PeeQ#T4dWi zzlA!Xij%2>B@T6jVU0H;Nr07_dPJ zZ87yYxoGp=-VW7(;rtd@O0;KEdBAvsGh*cIF~dY@{B$nuB$ZUEh^OgkGE1@k_)zs! zAU4SWe1k^;N0%oL6i2vy4#Y}T$FcAo}OZ-1ddKv!ASw_yFpsZw)B<o) zW9rN5b!AU&l4C-j2l+~B8{id4+LOw0Tu~}xUHfbkzM^Nt{u{_|Si+x9H?r%^-86bE zC9OvE)6$xXQVLsS@TTR?uBV<5axb5$6`}&2F)-c{X5p{n<@AbNJ6;P_t}MZ< zh#Eez`%Qe|x{0>(!xMh!9itay{{T=v)HhNAli!Fxq9!b6VXc$okWQeu}Fv#i55gz zEuyc^ljOYf|Nnb)&OLX&d#2r)Z@&3vij*3a$aOXG38-+0iHUJC8)9n7*>Lf%-u1$` z3V6Q}loviD@EDPhEl%Y>XBOo3oS9jvJ6d<}LBYBv3|Ts=QQ_~@399IY^xC2>PNp)p zZ~IPJ8`wcuMu`2YKetCL+M~NE-$|rRoZ!LZI47rKzP&~&B#)9MS=a+UTBb;c3QV9S z4iv8$OY#+)y_!&ixlbndi+L$6QhvRveWmSW%z2&A?WU$~M+xdK!95`20hc$jcwhca z-bf~|yLUQCVMiU`ZT^zXyj|*BeK<`oINhdj^fI8MX!!I1DC|INgS3;SPtik%tK^f# z2S@w+QPX}!nSr-nIf6TF(22sBJmsUJPag@}h$^WQsun~hVe#rE>V3Z9RNu_y4}0gO zmgj%s6mN-@Vb8R5di(*7aSV$+nfc84#DGCKXC=-`Vv@3OCH7+HQIsOP1*S~!VF%+r zKaDtFny1{&GA>A_rCyKH_piUs-`nxM|Ji%vGb!HO^M#uhvO>nm(hUlpP}O9DAh5A3 zMbK8z z5agC#QY(FuY#0rYqB$k!jW%D4&O@KMkgz;ds!U1t^{rrOB-e!S{Fd-$R#5W?CZ_}{EGqr)|w4#Rc!2{LJYk^eB}k-eaRt_)@WB`U%@6 zZ?ZfL+%u7co|aeoCBAO&35s9ynqZSV8++=QY>jUR#vVw%00A5UYvf~vDDsJFvcmi z4qbb`hh7-#Y`T8Ej)qM<j5-cpHz@wChAAMk0 z{xCV^>X+1>5lET6E|Cj~+}l-0e%|E5>Qhme%-`Eo7Ozr-K9g_@uktP#VGk+}l)ksC z^T}z5<1p}+vYB!$){-q2xc~JAz)qF?T->_lr@X#AmX|1Dn_=|jTQ6;3e#>G0Y$7u@ zXMMS!6T%y2;t|vNed1za)&cR7HTa0W3p(8n3cB;m+=W4|E)ng~OPM8@ExsaaEvVj${nS_$S#OpZQuEBXnx^!SLd_^^#LdQyF7Pyc{^~2l5xSK)r12~o;!bcyBMlsZX6iLU`{(|J<+yOUAG+# zgt7g2Xj8k=XgA}vcT)KoQ2atp*)~A)ITp`yL6BhUG0pHLe zti(BUnNPYqbi|LP3T)kylQXs}OEy?2^fKBltSf!^ zm67EdZ;%!xw@kA$DnjQt9AO3DIHQ8^EQUG0vi}f?G|!$W z6P;-;lGIv#jNBRk?0oGklQW>?EaehoOvyQWw|254i&l_h8YLAKR_gm%5Vn59EL}YR zSLZyXQ--XlSI(~#id=GRLFEi9?Ttz&P4r5|`d;o^L7(34;iIHxL?Mur+8b#?V{M6qh;p-R(HO_2 z>YCAXrINDhvZ9(o$_bO2LNW=H40fdj??CcwxW`vV)hdOP`d(!xHPTL&MP3eVA-$~B zl|&vK9~y-8bu`l{U$m1CCWk}R<9amWl#{;1Ru1-Il#1AGF9(vbFmREUj!0w!>7a1r zzPsR7Ho(3~-~2p|(N+ik3Sv19V~t+_V+$h--#C$>fqx!GZ_2BbPI=1f8TTkkUyv~a z#z+JEc9hgnB@)To8r^n?lxeG@QxfsARm!mgaU}?mv+#`_4_EdMmxcExG<^N*m^#A0 zMwCytd$1N%l&D{vjevI#6XJm+xj=3X8|J9Mo7D5t8ERChSv`Ua`{x{5zV%>5!{`}<%<9v{R2&*x>4oA zvl^4mN`WV$cLa?Q9@6D_Ign}T`<#WIZ^iQ)u%PY2@dAUxoy~nMhPM$7jvG&dO`RbM z4?<|eikyj^LPHDG{O41i1UqZR z^gU?9Lq=}D%`u_9{(T0p&M0wkyvW?_lBnd8V48n)!c9lyuwYUr+2m9)ePS}u^d?G* zr44+Xwj8#1J^2fN20vM}B{wJqFmMs7jEm$_VT&w;(`ShC>5v zOtxLSDpS9B#~X3&4G&HF(MLJO=h$E% z9f)}RM@l^drZ{^K_+#FPjg1l^5960_bT0l>lJ}C)lnjJ8IE{?|zml*eGhGAP;d-tG zl2id59l<~RRdn^F*0n1d9o?UMzW&}SbjTORnThw~s+s`jxdh5;W&8OXnwr}0a`u)_ z6!vsN3(@thv~+>>|RwZ<24=f~s4pUqE^l?88HqXPcbVe<`kJGwooOc(w2q z+!9xlduQVU2pOojwh4-?;oKgk!< z4PT`{a=N=RFs8L|dv*~toU9>1SYH1Eo;rY{=*t{M#xF3i!l)|U5U zL^oPl4HCEGs2)AEIF?$j?!L?HnpPrYb8w=V^_g#NiTy=Zvi3?s%FiWsDR=f(pr|N2 z-EAXmbAYFFfEO6#SL_+F>{;!4G8$j$gbC2oWT?=#&r9kXex{vOS&h8u!8d>)J+*=v zp_Iuq=Ou9*A7#hb1!7a z(>$U$l(aBX;kEb5X|fL*37Z}Y`|jap9sNm61f;TJOAaVg_@pVcEGrdKweUTQ+87!~ zuv+8`Ij1b1R?)i5k->Ar9`SCPOpL}mhP#wJw__T}&;2T&0+ktK&MJNZ(WIv!ZI89K z6A*;(L#%_`8NbY z^_10cCQkf`>kC5dg8GO8^aRaesHgWK7NXuL7 zO2>e^U{w%j+HV~`9XpPW|7#8KSGdL6d`@w@j;NbxpIrIl>oGr6;AC^==FI{cN-3Hj zV6S$>PuutRyeGDU}ijuBjFU&_K5FHC9D(gA2b@ty_)7~v$Hynf1M zrfz4dzS&}$0|(zH)5y|AN$plrd(o8L6RrxI&R@4!6-cb@cf;%X4az@o(${-$^bSR+ zxDE*`M-y>WobH3<2*m>e5y7mnog7y6z78BI&f&yF3(UK@)lV-C<-JBWMEt@vkg%H( zKT;qC_PNnRHh>9*dm8A-j;e%0`;+!*Zwy5DO7C)f0}zJEf|R8u25&U@eR^YqY8RM>pPvr5pUI6-GNrm z*3BbwY?Z37|NO(ojCzE+5}G;$+N7WgqE3C}lbN4>EXQ@$P)u;5hgJqzRht;u)B6UxF5gJW?sckIY5oeaPoHqA#b|btFmv&Dx%LAx z=*A}wk}7V=DFrBDdShWc{U@Ax1e^CtWTHFXGB~X9a|rj*)<3#1)T+NEOX>A zr-KsilNXbY6Y;;IE|E*RXAGIdO_(>?T3qYvcK>iJ0txaxuT+^&)&E=IJ>8Qu*gd;4iC9(WiH@N^iClGif{`r1K)4@bQze^&* z4QC_NfIW+YD_Y+9J8>h=$g>0L&w5MJQ1Xq2;9GI?!^yIVH!H}E1UglM@=T(zv)R{r z$U@0aKi;XZx+2G1biOER#?`_PpvglJrzGMfkVA}S{2QlMev%26tk~QwGki=7!N+_tZrB7~wC~(bDu|Btw2Rp^iNXVTWTkOxWs+)av>nWdUU! zE8Z{UAfEW{M?dboLD#$Oj>f!>5(?fjSKY<_6nP=bZW}q)Sqw346xND!J8yiyJGqO!T-}C@&_~KOFy#J;gudR~Q`C!o>7P`@er0^dA}udejLM!ymEq b%=Ea1C^2T1ze=uF0w^zL4#G)>-yHt~_4{nN diff --git a/synapse_net/distance_measurements.py b/synapse_net/distance_measurements.py index 4cf3181..8fa7ee8 100644 --- a/synapse_net/distance_measurements.py +++ b/synapse_net/distance_measurements.py @@ -226,6 +226,7 @@ def measure_segmentation_to_object_distances( resolution: Optional[Tuple[int, int, int]] = None, save_path: Optional[os.PathLike] = None, verbose: bool = False, + return_object_ids: bool = False, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Compute the distance betwen all objects in a segmentation and another object. @@ -238,6 +239,7 @@ def measure_segmentation_to_object_distances( resolution: The resolution / pixel size of the data. save_path: Path for saving the measurement results in numpy zipped format. verbose: Whether to print the progress of the distance computation. + return_object_ids: Whether to also return the object ids. Returns: The segmentation to object distances. @@ -262,7 +264,10 @@ def measure_segmentation_to_object_distances( seg_ids=seg_ids, object_ids=object_ids, ) - return distances, endpoints1, endpoints2, seg_ids + if return_object_ids: + return distances, endpoints1, endpoints2, seg_ids, objet_ids + else: + return distances, endpoints1, endpoints2, seg_ids def _extract_nearest_neighbors(pairwise_distances, seg_ids, n_neighbors, remove_duplicates=True): @@ -292,12 +297,13 @@ def _extract_nearest_neighbors(pairwise_distances, seg_ids, n_neighbors, remove_ def load_distances( - measurement_path: os.PathLike + measurement_path: os.PathLike, return_object_ids: bool = False, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Load the saved distacnes from a zipped numpy file. Args: measurement_path: The path where the distances where saved. + return_object_ids: Whether to also return the object ids. Returns: The segmentation to object distances. @@ -308,7 +314,11 @@ def load_distances( auto_dists = np.load(measurement_path) distances, seg_ids = auto_dists["distances"], list(auto_dists["seg_ids"]) endpoints1, endpoints2 = auto_dists["endpoints1"], auto_dists["endpoints2"] - return distances, endpoints1, endpoints2, seg_ids + if return_object_ids: + object_ids = auto_dists["object_ids"] + return distances, endpoints1, endpoints2, seg_ids, object_ids + else: + return distances, endpoints1, endpoints2, seg_ids def create_pairwise_distance_lines( From 6c3c431590c0ec419a1e99e613e5d8d39b0f556d Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Thu, 30 Jan 2025 13:27:38 +0100 Subject: [PATCH 30/30] Add object filter logic --- .../inner_ear/processing/filter_objects.py | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 scripts/inner_ear/processing/filter_objects.py diff --git a/scripts/inner_ear/processing/filter_objects.py b/scripts/inner_ear/processing/filter_objects.py new file mode 100644 index 0000000..258ec58 --- /dev/null +++ b/scripts/inner_ear/processing/filter_objects.py @@ -0,0 +1,134 @@ +import os +from pathlib import Path +from tqdm import tqdm + +import h5py +import imageio.v3 as imageio +import numpy as np +from skimage.measure import label +from skimage.segmentation import relabel_sequential + +from synapse_net.file_utils import get_data_path +from parse_table import parse_table, get_data_root, _match_correction_folder, _match_correction_file + + +def _load_segmentation(seg_path): + ext = Path(seg_path).suffix + assert ext in (".h5", ".tif"), ext + if ext == ".tif": + seg = imageio.imread(seg_path) + else: + with h5py.File(seg_path, "r") as f: + seg = f["segmentation"][:] + return seg + + +def _save_segmentation(seg_path, seg): + ext = Path(seg_path).suffix + assert ext in (".h5", ".tif"), ext + if ext == ".tif": + imageio.imwrite(seg_path, seg, compression="zlib") + else: + with h5py.File(seg_path, "a") as f: + f.create_dataset("segmentation", data=seg, compression="gzip") + return seg + + +def _filter_n_objects(segmentation, num_objects): + # Create individual objects for all disconnected pieces. + segmentation = label(segmentation) + # Find object ids and sizes, excluding background. + ids, sizes = np.unique(segmentation, return_counts=True) + ids, sizes = ids[1:], sizes[1:] + # Only keep the biggest 'num_objects' objects. + keep_ids = ids[np.argsort(sizes)[::-1]][:num_objects] + segmentation[~np.isin(segmentation, keep_ids)] = 0 + # Relabel the segmentation sequentially. + segmentation, _, _ = relabel_sequential(segmentation) + # Ensure that we have the correct number of objects. + n_ids = int(segmentation.max()) + assert n_ids == num_objects + return segmentation + + +def process_tomogram(folder, num_ribbon, num_pd): + data_path = get_data_path(folder) + output_folder = os.path.join(folder, "automatisch", "v2") + fname = Path(data_path).stem + + correction_folder = _match_correction_folder(folder) + + ribbon_path = _match_correction_file(correction_folder, "ribbon") + if not os.path.exists(ribbon_path): + ribbon_path = os.path.join(output_folder, f"{fname}_ribbon.h5") + assert os.path.exists(ribbon_path), ribbon_path + ribbon = _load_segmentation(ribbon_path) + + pd_path = _match_correction_file(correction_folder, "PD") + if not os.path.exists(pd_path): + pd_path = os.path.join(output_folder, f"{fname}_pd.h5") + assert os.path.exists(pd_path), pd_path + PD = _load_segmentation(pd_path) + + # Filter the ribbon and the PD. + print("Filtering number of ribbons:", num_ribbon) + ribbon = _filter_n_objects(ribbon, num_ribbon) + bkp_path_ribbon = ribbon_path + ".bkp" + os.rename(ribbon_path, bkp_path_ribbon) + _save_segmentation(ribbon_path, ribbon) + + print("Filtering number of PDs:", num_pd) + PD = _filter_n_objects(PD, num_pd) + bkp_path_pd = pd_path + ".bkp" + os.rename(pd_path, bkp_path_pd) + _save_segmentation(pd_path, PD) + + +def filter_objects(table, version): + for i, row in tqdm(table.iterrows(), total=len(table)): + folder = row["Local Path"] + if folder == "": + continue + + # We have to handle the segmentation without ribbon separately. + if row["PD vorhanden? "] == "nein": + continue + + n_pds = row["Anzahl PDs"] + if n_pds == "unklar": + n_pds = 1 + + n_pds = int(n_pds) + n_ribbons = int(row["Anzahl Ribbons"]) + if (n_ribbons == 2 and n_pds == 1): + print(f"The tomogram {folder} has {n_ribbons} ribbons and {n_pds} PDs.") + print("The structure post-processing for this case is not yet implemented and will be skipped.") + continue + + micro = row["EM alt vs. Neu"] + if micro == "beides": + process_tomogram(folder, n_ribbons, n_pds) + + folder_new = os.path.join(folder, "Tomo neues EM") + if not os.path.exists(folder_new): + folder_new = os.path.join(folder, "neues EM") + assert os.path.exists(folder_new), folder_new + process_tomogram(folder_new, n_ribbons, n_pds) + + elif micro == "alt": + process_tomogram(folder, n_ribbons, n_pds) + + elif micro == "neu": + process_tomogram(folder, n_ribbons, n_pds) + + +def main(): + data_root = get_data_root() + table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx") + table = parse_table(table_path, data_root) + version = 2 + filter_objects(table, version) + + +if __name__ == "__main__": + main()