DiamondLightSource
diff --git a/‎httomo_backends/methods_database/packages/backends/httomolib/httomolib.yaml
Lines changed: 8 additions & 0 deletions b/‎httomo_backends/methods_database/packages/backends/httomolib/httomolib.yaml
Lines changed: 8 additions & 0 deletions
diff --git a/‎httomo_backends/methods_database/packages/backends/httomolib/httomolib_modules.yaml
Lines changed: 1 addition & 0 deletions b/‎httomo_backends/methods_database/packages/backends/httomolib/httomolib_modules.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎httomo_backends/methods_database/packages/backends/httomolibgpu/httomolibgpu.yaml
Lines changed: 9 additions & 0 deletions b/‎httomo_backends/methods_database/packages/backends/httomolibgpu/httomolibgpu.yaml
Lines changed: 9 additions & 0 deletions
diff --git a/‎httomo_backends/methods_database/packages/backends/httomolibgpu/supporting_funcs/misc/rescale.py
Lines changed: 5 additions & 2 deletions b/‎httomo_backends/methods_database/packages/backends/httomolibgpu/supporting_funcs/misc/rescale.py
Lines changed: 5 additions & 2 deletions
diff --git a/‎httomo_backends/methods_database/packages/backends/httomolibgpu/supporting_funcs/recon/algorithm.py
Lines changed: 230 additions & 17 deletions b/‎httomo_backends/methods_database/packages/backends/httomolibgpu/supporting_funcs/recon/algorithm.py
Lines changed: 230 additions & 17 deletions
@@ -1,4 +1,12 @@
 misc:
+  rescale:
+    rescale_to_int:
+      pattern: all
+      output_dims_change: False
+      implementation: cpu
+      memory_gpu: None
+      save_result_default: False
+      padding: False
   morph:
     data_reducer:
       pattern: all
 
@@ -1,3 +1,4 @@
+- httomolib.misc.rescale
 - httomolib.misc.morph
 - httomolib.misc.images
 - httomolib.misc.segm
 
@@ -154,6 +154,15 @@ recon:
       memory_gpu:
         multiplier: None
         method: module
+    LPRec3d_tomobar:
+      pattern: sinogram
+      output_dims_change: True
+      implementation: gpu_cupy
+      save_result_default: True
+      padding: False
+      memory_gpu:
+        multiplier: None
+        method: module
     SIRT3d_tomobar:
       pattern: sinogram
       output_dims_change: True
 
@@ -18,8 +18,11 @@ def _calc_memory_bytes_rescale_to_int(
         itemsize = 2
     else:
         itemsize = 4
-    safety = 128
+    safety_multiplier = 1.1
     return (
-        int(np.prod(non_slice_dims_shape)) * (dtype.itemsize + itemsize) + safety,
+        int(
+            safety_multiplier
+            * ((np.prod(non_slice_dims_shape)) * (dtype.itemsize + itemsize))
+        ),
         0,
     )
@@ -23,14 +23,16 @@
 import math
 from typing import Tuple
 import numpy as np
-from httomo_backends.cufft import CufftType, cufft_estimate_1d
+from httomo_backends.cufft import CufftType, cufft_estimate_1d, cufft_estimate_2d
 
 __all__ = [
     "_calc_memory_bytes_FBP3d_tomobar",
+    "_calc_memory_bytes_LPRec3d_tomobar",
     "_calc_memory_bytes_SIRT3d_tomobar",
     "_calc_memory_bytes_CGLS3d_tomobar",
     "_calc_output_dim_FBP2d_astra",
     "_calc_output_dim_FBP3d_tomobar",
+    "_calc_output_dim_LPRec3d_tomobar",
     "_calc_output_dim_SIRT3d_tomobar",
     "_calc_output_dim_CGLS3d_tomobar",
 ]
@@ -58,6 +60,10 @@ def _calc_output_dim_FBP3d_tomobar(non_slice_dims_shape, **kwargs):
     return __calc_output_dim_recon(non_slice_dims_shape, **kwargs)
 
 
+def _calc_output_dim_LPRec3d_tomobar(non_slice_dims_shape, **kwargs):
+    return __calc_output_dim_recon(non_slice_dims_shape, **kwargs)
+
+
 def _calc_output_dim_SIRT3d_tomobar(non_slice_dims_shape, **kwargs):
     return __calc_output_dim_recon(non_slice_dims_shape, **kwargs)
 
@@ -71,12 +77,17 @@ def _calc_memory_bytes_FBP3d_tomobar(
     dtype: np.dtype,
     **kwargs,
 ) -> Tuple[int, int]:
-    det_height = non_slice_dims_shape[0]
-    det_width = non_slice_dims_shape[1]
+    if "detector_pad" in kwargs:
+        detector_pad = kwargs["detector_pad"]
+    else:
+        detector_pad = 0
+
+    angles_tot = non_slice_dims_shape[0]
+    det_width = non_slice_dims_shape[1] + 2 * detector_pad
     SLICES = 200  # dummy multiplier+divisor to pass large batch size threshold
 
     # 1. input
-    input_slice_size = np.prod(non_slice_dims_shape) * dtype.itemsize
+    input_slice_size = (angles_tot * det_width) * dtype.itemsize
 
     ########## FFT / filter / IFFT (filtersync_cupy)
 
@@ -85,13 +96,13 @@ def _calc_memory_bytes_FBP3d_tomobar(
         cufft_estimate_1d(
             nx=det_width,
             fft_type=CufftType.CUFFT_R2C,
-            batch=det_height * SLICES,
+            batch=angles_tot * SLICES,
         )
         / SLICES
     )
 
     # 3. RFFT output size (proj_f in code)
-    proj_f_slice = det_height * (det_width // 2 + 1) * np.complex64().itemsize
+    proj_f_slice = angles_tot * (det_width // 2 + 1) * np.complex64().itemsize
 
     # 4. Filter size (independent of number of slices)
     filter_size = (det_width // 2 + 1) * np.float32().itemsize
@@ -101,7 +112,7 @@ def _calc_memory_bytes_FBP3d_tomobar(
         cufft_estimate_1d(
             nx=det_width,
             fft_type=CufftType.CUFFT_C2R,
-            batch=det_height * SLICES,
+            batch=angles_tot * SLICES,
         )
         / SLICES
     )
@@ -117,9 +128,7 @@ def _calc_memory_bytes_FBP3d_tomobar(
 
     # 6. we swap the axes before passing data to Astra in ToMoBAR
     # https://github.com/dkazanc/ToMoBAR/blob/54137829b6326406e09f6ef9c95eb35c213838a7/tomobar/methodsDIR_CuPy.py#L135
-    pre_astra_input_swapaxis_slice = (
-        np.prod(non_slice_dims_shape) * np.float32().itemsize
-    )
+    pre_astra_input_swapaxis_slice = (angles_tot * det_width) * np.float32().itemsize
 
     # 7. astra backprojection will generate an output array
     # https://github.com/dkazanc/ToMoBAR/blob/54137829b6326406e09f6ef9c95eb35c213838a7/tomobar/astra_wrappers/astra_base.py#L524
@@ -145,29 +154,227 @@ def _calc_memory_bytes_FBP3d_tomobar(
     #      so it does not add to the memory overall
 
     # We assume for safety here that one FFT plan is not freed and one is freed
-    tot_memory_bytes = (
+    tot_memory_bytes = int(
         projection_mem_size + filtersync_size - ifftplan_slice_size + recon_output_size
     )
 
     # this account for the memory used for filtration AND backprojection.
     return (tot_memory_bytes, fixed_amount)
 
 
+def _calc_memory_bytes_LPRec3d_tomobar(
+    non_slice_dims_shape: Tuple[int, int],
+    dtype: np.dtype,
+    **kwargs,
+) -> Tuple[int, int]:
+    # Based on: https://github.com/dkazanc/ToMoBAR/pull/112/commits/4704ecdc6ded3dd5ec0583c2008aa104f30a8a39
+
+    if "detector_pad" in kwargs:
+        detector_pad = kwargs["detector_pad"]
+    else:
+        detector_pad = 0
+
+    angles_tot = non_slice_dims_shape[0]
+    DetectorsLengthH_prepad = non_slice_dims_shape[1]
+    DetectorsLengthH = non_slice_dims_shape[1] + 2 * detector_pad
+    SLICES = 200  # dummy multiplier+divisor to pass large batch size threshold
+    _CENTER_SIZE_MIN = 192  # must be divisible by 8
+
+    n = DetectorsLengthH
+
+    odd_horiz = False
+    if (n % 2) != 0:
+        n = n + 1  # dealing with the odd horizontal detector size
+        odd_horiz = True
+
+    eps = 1e-4  # accuracy of usfft
+    mu = -np.log(eps) / (2 * n * n)
+    m = int(
+        np.ceil(
+            2 * n * 1 / np.pi * np.sqrt(-mu * np.log(eps) + (mu * n) * (mu * n) / 4)
+        )
+    )
+
+    center_size = 6144
+    center_size = min(center_size, n * 2 + m * 2)
+
+    oversampling_level = 2  # at least 2 or larger required
+    ne = oversampling_level * n
+    padding_m = ne // 2 - n // 2
+
+    if "angles" in kwargs:
+        angles = kwargs["angles"]
+        sorted_theta_cpu = np.sort(angles)
+        theta_full_range = abs(sorted_theta_cpu[angles_tot - 1] - sorted_theta_cpu[0])
+        angle_range_pi_count = 1 + int(np.ceil(theta_full_range / math.pi))
+        angle_range_pi_count += 1  # account for difference from actual algorithm
+    else:
+        angle_range_pi_count = 1 + int(
+            np.ceil(2)
+        )  # assume a 2 * PI projection angle range
+
+    chunk_count = 4
+
+    output_dims = __calc_output_dim_recon(non_slice_dims_shape, **kwargs)
+    if odd_horiz:
+        output_dims = tuple(x + 1 for x in output_dims)
+
+    in_slice_size = (angles_tot * DetectorsLengthH) * dtype.itemsize
+    padded_in_slice_size = angles_tot * n * np.float32().itemsize
+
+    theta_size = angles_tot * np.float32().itemsize
+    filter_size = (n // 2 + 1) * np.float32().itemsize
+    rfftfreq_size = filter_size
+    scaled_filter_size = filter_size
+
+    tmp_p_input_slice = angles_tot * n * np.float32().itemsize
+
+    padded_tmp_p_input_slice = angles_tot * (n + padding_m * 2) * np.float32().itemsize
+    rfft_plan_slice_size = (
+        cufft_estimate_1d(
+            nx=n + padding_m * 2,
+            fft_type=CufftType.CUFFT_R2C,
+            batch=angles_tot * SLICES,
+        )
+        / SLICES
+    )
+    rfft_result_size = angles_tot * (n + padding_m * 2) * np.complex64().itemsize
+    filtered_rfft_result_size = rfft_result_size
+    irfft_plan_slice_size = (
+        cufft_estimate_1d(
+            nx=(n + padding_m * 2),
+            fft_type=CufftType.CUFFT_C2R,
+            batch=angles_tot * SLICES,
+        )
+        / SLICES
+    )
+    irfft_scratch_memory_size = filtered_rfft_result_size * 2
+    irfft_result_size = angles_tot * (n + padding_m * 2) * np.float32().itemsize
+
+    datac_size = angles_tot * n * np.complex64().itemsize / 2
+    fde_size = (2 * m + 2 * n) * (2 * m + 2 * n) * np.complex64().itemsize / 2
+    fft_plan_slice_size = (
+        cufft_estimate_1d(nx=n, fft_type=CufftType.CUFFT_C2C, batch=angles_tot * SLICES)
+        / SLICES
+    )
+    fft_result_size = datac_size
+
+    sorted_theta_indices_size = angles_tot * np.int64().itemsize
+    sorted_theta_size = angles_tot * np.float32().itemsize
+    angle_range_size = (
+        center_size * center_size * (1 + angle_range_pi_count * 2) * np.int16().itemsize
+    )
+
+    recon_output_size = (
+        DetectorsLengthH_prepad * DetectorsLengthH_prepad * np.float32().itemsize
+    )
+    ifft2_plan_slice_size = (
+        cufft_estimate_2d(
+            nx=(2 * m + 2 * n), ny=(2 * m + 2 * n), fft_type=CufftType.CUFFT_C2C
+        )
+        / 2
+    )
+    circular_mask_size = np.prod(output_dims) / 2 * np.int64().itemsize * 4
+    after_recon_swapaxis_slice = recon_output_size
+
+    tot_memory_bytes = 0
+    current_tot_memory_bytes = 0
+
+    fixed_amount = 0
+    current_fixed_amount = 0
+
+    def add_to_memory_counters(amount, per_slice: bool):
+        nonlocal tot_memory_bytes
+        nonlocal current_tot_memory_bytes
+        nonlocal fixed_amount
+        nonlocal current_fixed_amount
+
+        if per_slice:
+            current_tot_memory_bytes += amount
+            tot_memory_bytes = max(tot_memory_bytes, current_tot_memory_bytes)
+        else:
+            current_fixed_amount += amount
+            fixed_amount = max(fixed_amount, current_fixed_amount)
+
+    add_to_memory_counters(in_slice_size, True)
+    add_to_memory_counters(padded_in_slice_size, True)
+
+    add_to_memory_counters(theta_size, False)
+    if center_size >= _CENTER_SIZE_MIN:
+        add_to_memory_counters(sorted_theta_indices_size, False)
+        add_to_memory_counters(sorted_theta_size, False)
+        add_to_memory_counters(angle_range_size, False)
+    add_to_memory_counters(filter_size, False)
+    add_to_memory_counters(rfftfreq_size, False)
+    add_to_memory_counters(scaled_filter_size, False)
+
+    add_to_memory_counters(tmp_p_input_slice, True)
+
+    add_to_memory_counters(rfft_plan_slice_size / chunk_count * 2, True)
+    add_to_memory_counters(irfft_plan_slice_size / chunk_count * 2, True)
+    # add_to_memory_counters(irfft_scratch_memory_size / chunk_count, True)
+    for _ in range(0, chunk_count):
+        add_to_memory_counters(padded_tmp_p_input_slice / chunk_count, True)
+
+        add_to_memory_counters(rfft_result_size / chunk_count, True)
+        add_to_memory_counters(filtered_rfft_result_size / chunk_count, True)
+        add_to_memory_counters(-rfft_result_size / chunk_count, True)
+        add_to_memory_counters(-padded_tmp_p_input_slice / chunk_count, True)
+
+        add_to_memory_counters(irfft_scratch_memory_size / chunk_count, True)
+        add_to_memory_counters(-irfft_scratch_memory_size / chunk_count, True)
+        add_to_memory_counters(irfft_result_size / chunk_count, True)
+        add_to_memory_counters(-filtered_rfft_result_size / chunk_count, True)
+
+        add_to_memory_counters(-irfft_result_size / chunk_count, True)
+
+    add_to_memory_counters(-padded_in_slice_size, True)
+    add_to_memory_counters(-filter_size, False)
+    add_to_memory_counters(-rfftfreq_size, False)
+    add_to_memory_counters(-scaled_filter_size, False)
+
+    add_to_memory_counters(datac_size, True)
+    add_to_memory_counters(fde_size, True)
+    add_to_memory_counters(-tmp_p_input_slice, True)
+    add_to_memory_counters(fft_plan_slice_size, True)
+    add_to_memory_counters(fft_result_size, True)
+    add_to_memory_counters(-datac_size, True)
+
+    add_to_memory_counters(-fft_result_size, True)
+
+    add_to_memory_counters(ifft2_plan_slice_size / chunk_count * 2, True)
+    for _ in range(0, chunk_count):
+        add_to_memory_counters(fde_size / chunk_count, True)
+        add_to_memory_counters(-fde_size / chunk_count, True)
+
+    add_to_memory_counters(recon_output_size, True)
+    add_to_memory_counters(-fde_size, True)
+    add_to_memory_counters(circular_mask_size, False)
+    add_to_memory_counters(after_recon_swapaxis_slice, True)
+
+    return (tot_memory_bytes * 1.05, fixed_amount + 250 * 1024 * 1024)
+
 def _calc_memory_bytes_SIRT3d_tomobar(
     non_slice_dims_shape: Tuple[int, int],
     dtype: np.dtype,
     **kwargs,
 ) -> Tuple[int, int]:
-    DetectorsLengthH = non_slice_dims_shape[1]
+
+    if "detector_pad" in kwargs:
+        detector_pad = kwargs["detector_pad"]
+    else:
+        detector_pad = 0
+    anglesnum = non_slice_dims_shape[0]
+    DetectorsLengthH = non_slice_dims_shape[1] + 2 * detector_pad
     # calculate the output shape
     output_dims = _calc_output_dim_SIRT3d_tomobar(non_slice_dims_shape, **kwargs)
 
-    in_data_size = np.prod(non_slice_dims_shape) * dtype.itemsize
+    in_data_size = (anglesnum * DetectorsLengthH) * dtype.itemsize
     out_data_size = np.prod(output_dims) * dtype.itemsize
 
     astra_projection = 2.5 * (in_data_size + out_data_size)
 
-    tot_memory_bytes = 2 * in_data_size + 2 * out_data_size + astra_projection
+    tot_memory_bytes = int(2 * in_data_size + 2 * out_data_size + astra_projection)
     return (tot_memory_bytes, 0)
 
 
@@ -176,14 +383,20 @@ def _calc_memory_bytes_CGLS3d_tomobar(
     dtype: np.dtype,
     **kwargs,
 ) -> Tuple[int, int]:
-    DetectorsLengthH = non_slice_dims_shape[1]
+    if "detector_pad" in kwargs:
+        detector_pad = kwargs["detector_pad"]
+    else:
+        detector_pad = 0
+
+    anglesnum = non_slice_dims_shape[0]
+    DetectorsLengthH = non_slice_dims_shape[1] + 2 * detector_pad
     # calculate the output shape
     output_dims = _calc_output_dim_CGLS3d_tomobar(non_slice_dims_shape, **kwargs)
 
-    in_data_size = np.prod(non_slice_dims_shape) * dtype.itemsize
+    in_data_size = (anglesnum * DetectorsLengthH) * dtype.itemsize
     out_data_size = np.prod(output_dims) * dtype.itemsize
 
     astra_projection = 2.5 * (in_data_size + out_data_size)
 
-    tot_memory_bytes = 2 * in_data_size + 2 * out_data_size + astra_projection
+    tot_memory_bytes = int(2 * in_data_size + 2 * out_data_size + astra_projection)
     return (tot_memory_bytes, 0)
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+- httomolib.misc.rescale`
`1`	`2`	`- httomolib.misc.morph`
`2`	`3`	`- httomolib.misc.images`
`3`	`4`	`- httomolib.misc.segm`