-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
826 lines (645 loc) · 32.1 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
import os
import shutil
import pandas as pd
import numpy as np
import math
import tifffile
import pyclesperanto_prototype as cle # version 0.24.1
import napari_segment_blobs_and_things_with_membranes as nsbatwm # version 0.3.6
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from pathlib import Path
from tqdm import tqdm
from skimage import measure, io
from skimage.measure import regionprops
from skimage.color import rgb2gray
from apoc import ObjectSegmenter, ObjectClassifier
from ultralytics import YOLO
def read_images(directory_path):
"""Reads all the images in the input path and organizes them according to the well_id"""
# Define the directory containing your files
directory_path = Path(directory_path)
# Initialize a dictionary to store the grouped (per well) files
images_per_well = {}
# Iterate through the files in the directory
for file_path in directory_path.glob("*"):
# Check if the path is a file and ends with ".TIF"
if file_path.is_file() and file_path.suffix.lower() == ".tif":
# Get the filename without the extension
filename = file_path.stem
# Remove unwanted files (Plate_R files)
if "Plate_R" in filename:
pass
# Remove maximum projections
elif "_z" not in filename:
pass
else:
# Extract the last part of the filename (e.g., A06f00d0)
last_part = filename.split("_")[-1]
# Get the first three letters to create the group name (well_id)
well_id = last_part[:3]
# Check if the well_id exists in the dictionary, if not, create a new list
if well_id not in images_per_well:
images_per_well[well_id] = []
# Append the file to the corresponding group
images_per_well[well_id].append(str(file_path))
return images_per_well
def min_intensity_projection(image_paths):
"""Takes a collection of image paths containing one z-stack per file and performs minimum intensity projection"""
# Load images from the specified paths
image_collection = io.ImageCollection(image_paths)
# Stack images into a single 3D numpy array
stack = io.concatenate_images(image_collection)
# Perform minimum intensity projection along the z-axis (axis=0)
min_proj = np.min(stack, axis=0)
return min_proj
def save_min_projection_imgs(images_per_well, output_dir="./output/MIN_projections"):
"""Takes a images_per_well from read_images as input, performs minimum intensity projection and saves the resulting image on a per well basis"""
for well_id, files in images_per_well.items():
# Perform minimum intensity projection of the stack stored under well_id key
min_proj = min_intensity_projection(images_per_well[well_id])
# Create a directory to store the tif files if it doesn't exist
Path(output_dir).mkdir(parents=True, exist_ok=True)
# Construct the output file path
output_path = os.path.join(output_dir, f"{well_id}.tif")
# Save the resulting minimum projection
tifffile.imwrite(output_path, min_proj)
def predict_masks(
input_folder,
model_dir="./models/GPU_1px_cle_eroded_labels.pt",
conf_threshold=0.6,
output_dir="./output/predictions",
):
"""Takes a directory containing minimum intensity projections as input and outputs the predicted masks burnt-in on top of the input images"""
# Define the directory containing your files
directory_path = Path(input_folder)
# Load the pre-trained model you want to apply
model = YOLO(model_dir)
# Loop through all input images (minimum intensity projections)
for image_path in directory_path.glob("*.tif"):
# Get the filename without the extension
filename = image_path.stem
results = model.predict(
image_path, conf=conf_threshold
) # Adjust confidence (conf) threshold
im_array = results[0].plot(conf=False, labels=False, boxes=True, line_width=2)
# Create a directory to store the tif files if it doesn't exist
Path(output_dir).mkdir(parents=True, exist_ok=True)
# Construct the output file path
output_path = os.path.join(output_dir, f"{filename}.tif")
# Save the resulting minimum projection
tifffile.imwrite(output_path, im_array)
def extract_stats(
input_folder,
model_dir="./models/GPU_1px_cle_eroded_labels.pt",
conf_threshold=0.6,
output_dir="./output/predictions",
):
"""Takes a directory containing minimum intensity projections, performs inference and returns a df containing regionprops per object per class per well_id"""
# Define the directory containing your files
directory_path = Path(input_folder)
# Load the pre-trained model you want to apply
model = YOLO(model_dir)
# Initialize a list to store the properties for all images
all_props_list = []
# Loop through all input images (minimum intensity projections)
for image_path in directory_path.glob("*.tif"):
# Get the filename without the extension
filename = image_path.stem
# Perform inference on the image stored in image_path
results = model.predict(
image_path, conf=conf_threshold
) # Adjust confidence (conf) threshold
# Access the first position in the resulting YOLO results list
result = results[0]
extracted_masks = result.masks.data
# Extract the boxes, which contain class IDs
detected_boxes = result.boxes.data
# Extract class IDs from the detected boxes
class_labels = detected_boxes[:, -1].int().tolist()
# Initialize a dictionary to hold masks by class
masks_by_class = {name: [] for name in result.names.values()}
# Iterate through the masks and class labels
for mask, class_id in zip(extracted_masks, class_labels):
class_name = result.names[class_id] # Map class ID to class name
masks_by_class[class_name].append(mask.cpu().numpy())
# for class_name, masks in masks_by_class.items():
# print(f"Class Name: {class_name}, Number of Masks: {len(masks)}")
# Initialize a list to store the properties
props_list = []
# Iterate through all classes
for class_name, masks in masks_by_class.items():
# Iterate through the masks for this class
for mask in masks:
# Convert the mask to an integer type if it's not already
mask = mask.astype(int)
# Apply regionprops to the mask
props = regionprops(mask)
# Extract the properties you want (e.g., area, perimeter) and add them to the list
for prop in props:
area = prop.area
area_filled = prop.area_filled
perimeter = prop.perimeter
circularity = min(1, (4 * math.pi * prop.area) / prop.perimeter**2)
eccentricity = prop.eccentricity
label = prop.label
solidity = prop.solidity
# Add other properties as needed
# Append the properties and class name to the list
props_list.append(
{
"well_id": filename,
"Class Name": class_name,
"Area": area,
"Area_filled": area_filled,
"Perimeter": perimeter,
"Circularity": circularity,
"Eccentricity": eccentricity,
"Solidity": solidity,
}
)
# Extend all_props_list with the properties from this image
all_props_list.extend(props_list)
# Convert the aggregated list of dictionaries to a DataFrame
all_props_df = pd.DataFrame(all_props_list)
# Now props_df contains the properties and class names for all regions
return all_props_df
def copy_csv_results(results_directory):
"""Copy all .csv files from each plate folder into a per_organoid_stats folder under results_directory"""
# Create an empty list to store the subdirectories within results_directory
subdirectories = []
# Iterate over subfolders in the results_directory and add them to subdirectories list
for subfolder in results_directory.iterdir():
if subfolder.is_dir():
subdirectories.append(subfolder.name)
# Create the destination folder to copy the .csv files contained in each subdir
try:
csv_results_path = os.path.join(results_directory, "per_organoid_stats")
os.makedirs(csv_results_path)
except FileExistsError:
print(f"Directory already exists: {csv_results_path}")
# Iterate over each subdirectory to scan and copy .csv files contained within
for subdir in subdirectories:
# Build the path to each of the subdirectories
subdirectory_path = Path(results_directory) / subdir
# Check if the subdirectory exists
if subdirectory_path.exists():
# Scan for .csv files in each subdirectory
for file_path in subdirectory_path.glob("*.csv"):
# Copy the file to the destination subfolder
shutil.copy2(file_path, csv_results_path)
def extract_summary_stats(csv_path):
"""Processes a per_organoid .csv results file counting the number of occurrences and calculate the average of each property returning a summary_stats_df"""
# Read the .csv into a pandas DataFrame
df = pd.read_csv(csv_path)
# Grouping and counting occurrences
grouped_counts = df.groupby(["well_id", "Class Name"]).size().unstack(fill_value=0)
# Specifying the columns to calculate the average for
columns_to_average = [
"Area",
"Area_filled",
"Perimeter",
"Circularity",
"Eccentricity",
"Solidity",
]
# Grouping by 'well_id' and 'Class Name' and calculating the mean
average_values = (
df.groupby(["well_id", "Class Name"])[columns_to_average].mean().reset_index()
)
# Joining the counts back to the original dataframe
df_merged = average_values.merge(grouped_counts, on="well_id", how="left")
# Optionally, if you want to create a specific column for each count based on the class in each row
for class_name in grouped_counts.columns:
df_merged[class_name] = df_merged[class_name].where(
df_merged["Class Name"] == class_name, 0
)
# List of columns to update with their maximum value per well_id
columns_to_maximize = ["dead", "differentiated", "undifferentiated"]
# Apply transform to update each specified column with its max value per well_id
for column in columns_to_maximize:
df_merged[column] = df_merged.groupby("well_id")[column].transform("max")
# Renaming the columns
df_merged = df_merged.rename(
columns={
"dead": "nr_dead",
"differentiated": "nr_organoids",
"undifferentiated": "nr_spheroids",
}
)
# Calculate the dead to total ratio
df_merged["dead_ratio"] = df_merged["nr_dead"] / (
df_merged["nr_dead"] + df_merged["nr_organoids"] + df_merged["nr_spheroids"]
)
# Calculate the organoid to spheroid ratio
df_merged["organoid_ratio"] = df_merged["nr_organoids"] / (
df_merged["nr_organoids"] + df_merged["nr_spheroids"]
)
# Calculate the organoid to spheroid ratio
df_merged["spheroid_ratio"] = df_merged["nr_spheroids"] / (
df_merged["nr_organoids"] + df_merged["nr_spheroids"]
)
# Extract the plate_name from the csv_path
csv_path = Path(csv_path)
plate_name = csv_path.stem
# Adding the new column 'plate_name' to the left of df_merged
df_merged.insert(0, "plate_name", plate_name)
return df_merged
def save_summary_stats(results_directory):
"""Extracts summary stats from per object results and concatenates all df into a final_df (+save to disk as .csv) containing all plates data"""
# Define the directory containing the copied per_organoid_stats
csv_results_path = Path(os.path.join(results_directory, "per_organoid_stats"))
# Initialize an empty DataFrame to collect all summary dataframes
final_df = pd.DataFrame()
# Scan for .csv files in each subdirectory
for csv_path in csv_results_path.glob("*.csv"):
summary_df = extract_summary_stats(csv_path)
# Append the summary_df to the final_df
final_df = pd.concat([final_df, summary_df], ignore_index=True)
# Create the summary_stats directory if it doesn't exist
summary_stats_directory = Path(results_directory, "summary_stats")
summary_stats_directory.mkdir(parents=True, exist_ok=True)
# Save the final_df as a .csv file under the summary_stats directory
final_csv_path = summary_stats_directory / "summary_stats.csv"
final_df.to_csv(final_csv_path, index=False)
def find_focus(images_per_well):
"""Processes all the images and extract the number of organoids in focus from each image"""
nr_infocus_organoids = {}
for well_id in tqdm(images_per_well):
imgs_to_process = images_per_well[well_id]
for input_img in imgs_to_process:
# Check if the well_id exists in the dictionary, if not, create a new list
if well_id not in nr_infocus_organoids:
nr_infocus_organoids[well_id] = []
# Load one RGB image and transform it into grayscale (if needed) for APOC
rgb_img = tifffile.imread(input_img, is_ome=False)
if len(rgb_img.shape) < 3:
img = rgb_img
elif rgb_img.shape[2] == 3:
img = rgb2gray(rgb_img)
else:
print(
"Modify the loader to accommodate different file formats",
rgb_img.shape,
)
# Apply object segmenter from APOC
try:
segmenter = ObjectSegmenter(opencl_filename="./ObjectSegmenter.cl")
result = segmenter.predict(image=img)
except IndexError:
segmenter = ObjectSegmenter(
opencl_filename="./pretrained_APOC/ObjectSegmenter.cl"
)
result = segmenter.predict(image=img)
# Closing some holes in the organoid labels
closed_labels = cle.closing_labels(result, None, radius=4.0)
# Exclude small labels, cutout in pixel area seems to be below 1000px
exclude_small = cle.exclude_small_labels(closed_labels, None, 1000.0)
exclude_small = np.array(
exclude_small, dtype=np.int32
) # Change dtype of closed labels to feed array into nsbatwm.split
# Splitting organoids into a binary mask
split_organoids = nsbatwm.split_touching_objects(exclude_small, sigma=10.0)
# Connected component (cc) labeling
cc_split_organoids = nsbatwm.connected_component_labeling(
split_organoids, False
)
# Apply object classifier from APOC
try:
classifier = ObjectClassifier(opencl_filename="./ObjectClassifier.cl")
result = classifier.predict(labels=cc_split_organoids, image=img)
except AttributeError:
classifier = ObjectClassifier(
opencl_filename="./pretrained_APOC/ObjectClassifier.cl"
)
result = classifier.predict(labels=cc_split_organoids, image=img)
# Convert the resulting .cle image into a np.array to count objects within each class
image_array = np.array(result, dtype=np.int8)
# Create masks for each class
background_mask = image_array == 0
out_of_focus_mask = image_array == 1
in_focus_mask = image_array == 2
# Label connected components in each mask
background_labels = measure.label(background_mask, connectivity=2)
out_of_focus_labels = measure.label(out_of_focus_mask, connectivity=2)
in_focus_labels = measure.label(in_focus_mask, connectivity=2)
# Calculate the number of objects in each class
num_background_objects = np.max(background_labels)
num_out_of_focus_objects = np.max(out_of_focus_labels)
num_in_focus_objects = np.max(in_focus_labels)
# print(f"Number of Background Objects: {num_background_objects}")
# print(f"Number of Out-of-Focus Objects: {num_out_of_focus_objects}")
# print(f"Number of In-Focus Objects: {num_in_focus_objects}")
try:
in_focus_percentage = (
num_in_focus_objects
/ (num_in_focus_objects + num_out_of_focus_objects)
) * 100
# print(f"Percentage of In-Focus Objects: {in_focus_percentage}")
except ZeroDivisionError:
in_focus_percentage = 0
nr_infocus_organoids[well_id].append(in_focus_percentage)
return nr_infocus_organoids
def find_highest_infocus(nr_infocus_organoids):
"""Finds the z-stack with the most organoids in-focus in the dictionary containing said data"""
max_index_dict = (
{}
) # Create a new dictionary to store the maximum index for each key
for key, values in nr_infocus_organoids.items():
max_value = max(values) # Find the maximum value within the list
max_index = values.index(max_value) # Get the index of the maximum value
max_index_dict[key] = max_index # Store the index in the new dictionary
return max_index_dict
def store_imgs(
images_per_well, max_index_dict, output_dir="./output/in_focus_organoids"
):
"""Stores images in focus"""
# Create a directory to store the tif files if it doesn't exist
Path(output_dir).mkdir(parents=True, exist_ok=True)
# Iterate through keys in max_index_dict
for key, index in max_index_dict.items():
if key in images_per_well:
file_paths = images_per_well[key]
if 0 <= index < len(file_paths):
file_path = file_paths[index]
# Extract the file name (without the path), same as key
file_name = Path(file_path).name
# Construct the output file path
output_path = os.path.join(output_dir, f"{key}.tif")
# Copy the file using shutil.copy
shutil.copy(file_path, output_path)
# print(f"Saved {key}.tif to {output_path}")
# else:
# print(f"Invalid index for key {key}: {index}")
# else:
# print(f"Key {key} not found in images_per_well dictionary")
def plot_plate(
resolution, output_path, img_folder_path, show_fig=True, colormap="gray"
):
"""Plot images in a grid-like fashion according to the well_id position in the plate"""
# Initialize a dictionary to store images by rows (letters)
image_dict = {}
# Iterate through the image files in the folder
for filename in os.listdir(img_folder_path):
if filename.endswith(".tif"):
# Extract the first letter and the number from the filename
first_letter = filename[0]
number = int(filename[1:3])
# Create a dictionary entry for the first letter if it doesn't exist
if first_letter not in image_dict:
image_dict[first_letter] = {}
# Create a dictionary entry for the number if it doesn't exist
if number not in image_dict[first_letter]:
image_dict[first_letter][number] = []
# Append the image filename to the corresponding number
image_dict[first_letter][number].append(filename)
# Sort the dictionary by keys (letters) and nested dictionary by keys (numbers)
sorted_image_dict = {
letter: dict(sorted(images.items()))
for letter, images in sorted(image_dict.items())
}
# Calculate the number of rows based on the number of letters
num_rows = len(sorted_image_dict)
# Calculate the number of columns based on the maximum number
num_cols = max(max(images.keys()) for images in sorted_image_dict.values())
# Calculate the figsize based on the number of columns and rows
fig_width = num_cols * 3 # Adjust the multiplier as needed
fig_height = num_rows * 2.5 # Adjust the multiplier as needed
# Create a subplot for each image, using None for empty subplots
if num_rows == 1:
fig, axes = plt.subplots(
1, num_cols, figsize=(fig_width, fig_height), sharex=True, sharey=True
)
elif num_cols == 1:
fig, axes = plt.subplots(
num_rows, 1, figsize=(fig_width, fig_height), sharex=True, sharey=True
)
else:
fig, axes = plt.subplots(
num_rows,
num_cols,
figsize=(fig_width, fig_height),
sharex=True,
sharey=True,
)
for i, (letter, images) in enumerate(sorted_image_dict.items()):
for j, (number, filenames) in enumerate(images.items()):
if filenames:
image_filename = filenames[0] # Use the first filename in the list
image_path = os.path.join(img_folder_path, image_filename)
image = tifffile.imread(image_path, is_ome=False)
if num_rows == 1:
axes[j].imshow(image, cmap=colormap)
axes[j].set_title(f"{letter}{number:02d}")
axes[j].axis("off")
elif num_cols == 1:
axes[i].imshow(image, cmap=colormap)
axes[i].set_title(f"{letter}{number:02d}")
axes[i].axis("off")
else:
axes[i, j].imshow(image, cmap=colormap)
axes[i, j].set_title(f"{letter}{number:02d}")
axes[i, j].axis("off")
else:
# If there are no images for a specific letter-number combination, remove the empty subplot
if num_rows == 1:
fig.delaxes(axes[j])
elif num_cols == 1:
fig.delaxes(axes[i])
else:
fig.delaxes(axes[i, j])
# Adjust the spacing and set aspect ratio to be equal
plt.subplots_adjust(wspace=0.1, hspace=0.1)
# Save the plot at a higher resolution
plt.savefig(output_path, format="tif", dpi=resolution, bbox_inches="tight")
# If False plt.show() is not run to avoid loop stop upon display
if show_fig:
# Show the plot (optional)
plt.show()
def segment_organoids(in_focus_organoids):
"""Processes individual z-stacks inside a folder and returns a dictionary containing organoid object masks stored as StackViewNDArray arrays"""
segmented_organoids = {}
images = list(in_focus_organoids.glob("*.TIF"))
# Iterate through the files in the directory
for input_img in tqdm(images):
# Get the filename without the extension
filename = input_img.stem
# Load one RGB image and transform it into grayscale (if needed) for APOC
rgb_img = tifffile.imread(input_img, is_ome=False)
if len(rgb_img.shape) < 3:
img = rgb_img
elif rgb_img.shape[2] == 3:
img = rgb2gray(rgb_img)
else:
print(
"Modify the loader to accommodate different file formats",
rgb_img.shape,
)
# Apply object segmenter from APOC
try:
segmenter = ObjectSegmenter(opencl_filename="./ObjectSegmenter.cl")
result = segmenter.predict(image=img)
except IndexError:
segmenter = ObjectSegmenter(
opencl_filename="./pretrained_APOC/ObjectSegmenter.cl"
)
result = segmenter.predict(image=img)
# Closing some holes in the organoid labels
closed_labels = cle.closing_labels(result, None, radius=4.0)
# Exclude small labels, cutout in pixel area seems to be below 1000px
exclude_small = cle.exclude_small_labels(closed_labels, None, 1000.0)
exclude_small = np.array(
exclude_small, dtype=np.int32
) # Change dtype of closed labels to feed array into nsbatwm.split
# Splitting organoids into a binary mask
split_organoids = nsbatwm.split_touching_objects(exclude_small, sigma=10.0)
# Connected component (cc) labeling
cc_split_organoids = nsbatwm.connected_component_labeling(
split_organoids, False
)
# Remove labels on edges
edge_removed = nsbatwm.remove_labels_on_edges(cc_split_organoids)
segmented_organoids[filename] = edge_removed
return segmented_organoids
def segment_in_focus_organoids(in_focus_organoids):
"""Processes individual z-stacks inside a folder and returns a dictionary containing in-focus/out-of-focus organoid object masks stored as StackViewNDArray arrays"""
focus_masks = {}
images = list(in_focus_organoids.glob("*.TIF"))
# Iterate through the files in the directory
for input_img in tqdm(images):
# Get the filename without the extension
filename = input_img.stem
# Load one RGB image and transform it into grayscale (if needed) for APOC
rgb_img = tifffile.imread(input_img, is_ome=False)
if len(rgb_img.shape) < 3:
img = rgb_img
elif rgb_img.shape[2] == 3:
img = rgb2gray(rgb_img)
else:
print(
"Modify the loader to accommodate different file formats",
rgb_img.shape,
)
# Apply object segmenter from APOC
try:
segmenter = ObjectSegmenter(opencl_filename="./ObjectSegmenter.cl")
result = segmenter.predict(image=img)
except IndexError:
segmenter = ObjectSegmenter(
opencl_filename="./pretrained_APOC/ObjectSegmenter.cl"
)
result = segmenter.predict(image=img)
# Closing some holes in the organoid labels
closed_labels = cle.closing_labels(result, None, radius=4.0)
# Exclude small labels, cutout in pixel area seems to be below 1000px
exclude_small = cle.exclude_small_labels(closed_labels, None, 1000.0)
exclude_small = np.array(
exclude_small, dtype=np.int32
) # Change dtype of closed labels to feed array into nsbatwm.split
# Splitting organoids into a binary mask
split_organoids = nsbatwm.split_touching_objects(exclude_small, sigma=10.0)
# Connected component (cc) labeling
cc_split_organoids = nsbatwm.connected_component_labeling(
split_organoids, False
)
# Remove labels on edges
edge_removed = nsbatwm.remove_labels_on_edges(cc_split_organoids)
# Apply object classifier from APOC
try:
classifier = ObjectClassifier(opencl_filename="./ObjectClassifier.cl")
result = classifier.predict(labels=edge_removed, image=img)
except AttributeError:
classifier = ObjectClassifier(
opencl_filename="./pretrained_APOC/ObjectClassifier.cl"
)
result = classifier.predict(labels=edge_removed, image=img)
focus_masks[filename] = result
return focus_masks
def save_object_mask(segmented_organoids, output_directory):
# Iterate through the dictionary and save each array as a .tif file
for filename, object_mask in segmented_organoids.items():
# Create a filename for the .tif file within the output_directory
save_filename = os.path.join(output_directory, f"{filename}.tif")
# Ensure the output directory exists
os.makedirs(os.path.dirname(save_filename), exist_ok=True)
# Save the cc_split_organoid array as a grayscale .tif file
tifffile.imwrite(
save_filename,
object_mask,
photometric="minisblack",
dtype=object_mask.dtype,
)
def random_cmap():
np.random.seed(42)
cmap = ListedColormap(np.random.rand(256, 4))
# value 0 should just be transparent
cmap.colors[:, 3] = 0.5
cmap.colors[0, :] = 1
cmap.colors[0, 3] = 0
# if image is a mask, color (last value) should be red
cmap.colors[-1, 0] = 1
cmap.colors[-1, 1:3] = 0
return cmap
# --------------- PARALLELIZATION FUNCTIONS ---------------- #
def process_folder(folder, parent_folder, username):
"""Reads a folder, scans all z-stacks per well and stores a copy of the in-focus stack"""
directory_path = parent_folder.joinpath(folder)
print(directory_path)
# The following function will read all the images contained within the directory_path above
# and store them grouped by well_id.
images_per_well = read_images(directory_path)
# Compute the nr of organoids in focus per well
nr_infocus_organoids = find_focus(images_per_well)
# Store a .csv copy of the max_index_dict containing the percentages of organoids in focus
# Create a Pandas DataFrame
df = pd.DataFrame(nr_infocus_organoids)
# Specify the output directory path
directory = Path(f"./output/{username}")
output_directory = directory.joinpath(folder)
# Check if the output directory already exists and create it if it does not
try:
os.makedirs(output_directory)
print(f"Directory '{output_directory}' created successfully.")
except FileExistsError:
print(f"Directory '{output_directory}' already exists.")
# Save the DataFrame as a .csv file
df.to_csv(
f"./{str(output_directory)}/Percentage_in_focus_per_well_{str(folder)}.csv",
index=False,
)
# Finding the z-stack with the most organoids in-focus
max_index_dict = find_highest_infocus(nr_infocus_organoids)
# In case one of the wells has no detectable organoids in focus, this will substitute the focal plane
# with an average of all focal planes in the plate
# Calculate the average of all values in the dictionary
average_value = round(sum(max_index_dict.values()) / len(max_index_dict))
# Substitute the 0 values for the average focal plane
for well, in_focus_stack in max_index_dict.items():
if in_focus_stack == 0:
max_index_dict[well] = average_value
# Storing a copy of each z-stack with the most organoids in focus
store_imgs(
images_per_well,
max_index_dict,
output_dir=f"{output_directory}/in_focus_organoids",
)
def save_organoid_segmentation(in_focus_organoids):
"""Reads a folder containing grayscale images, segments the organoids and saves the resulting masks in a new folder"""
# segment_organoids() returns a dictionary where the organoid labels are stored under each well_id key
segmented_organoids = segment_organoids(Path(in_focus_organoids))
# Define the directory path where you want to save the segmented organoid masks
# Split the in_focus_organoids path to obtain the folder that is one level up (head)
head, tail = os.path.split(in_focus_organoids)
output_directory = os.path.join(head, "segmented_organoids")
# Save the segmented organoid masks contained in segmented_organoids in the above defined output directory
save_object_mask(segmented_organoids, output_directory)
def save_focus_segmentation(in_focus_organoids):
"""Reads a folder containing grayscale images, segments the organoids and saves the resulting masks in a new folder"""
# segment_in_focus_organoids() returns a dictionary where the organoid labels are stored under each well_id key
focus_classified_organoids = segment_in_focus_organoids(Path(in_focus_organoids))
# Define the directory path where you want to save the segmented organoid masks
# Split the in_focus_organoids path to obtain the folder that is one level up (head)
head, tail = os.path.split(in_focus_organoids)
output_directory = os.path.join(head, "in_out_focus_masks")
# Save the segmented organoid masks contained in segmented_organoids in the above defined output directory
save_object_mask(focus_classified_organoids, output_directory)