@@ -71,7 +71,9 @@ def linear_regression(
71
71
# what are effectively OLS residuals rather than matrix inverse
72
72
# to avoid need for MxM array; additionally, dask.lstsq fails
73
73
# with numpy arrays
74
- XLP = XL - XC @ da .linalg .lstsq (XC , XL )[0 ]
74
+ LS = XC @ da .linalg .lstsq (XC , XL )[0 ]
75
+ assert XL .chunksize == LS .chunksize
76
+ XLP = XL - LS
75
77
assert XLP .shape == (n_obs , n_loop_covar )
76
78
YP = Y - XC @ da .linalg .lstsq (XC , Y )[0 ]
77
79
assert YP .shape == (n_obs , n_outcome )
@@ -213,8 +215,10 @@ def gwas_linear_regression(
213
215
# Note: dask qr decomp (used by lstsq) requires no chunking in one
214
216
# dimension, and because dim 0 will be far greater than the number
215
217
# of covariates for the large majority of use cases, chunking
216
- # should be removed from dim 1
217
- X = X .rechunk ((None , - 1 ))
218
+ # should be removed from dim 1. Also, dim 0 should have the same chunking
219
+ # as G dim 1, so that when XLP is computed in linear_regression() the
220
+ # two arrays have the same chunking.
221
+ X = X .rechunk ((G .chunksize [1 ], - 1 ))
218
222
219
223
Y = da .asarray (concat_2d (ds [list (traits )], dims = ("samples" , "traits" )))
220
224
# Like covariates, traits must also be tall-skinny arrays
0 commit comments