diff --git a/benchmarks/test_pyts.py b/benchmarks/test_pyts.py index 2da3c57..09a3d96 100644 --- a/benchmarks/test_pyts.py +++ b/benchmarks/test_pyts.py @@ -34,9 +34,7 @@ def prepare(data: SequentialDataset, length: int) -> DataSplit: return X_pad[:, 0], data.y -def multivariate( - *, train_data: DataSplit, test_data: DataSplit, n_jobs: int -) -> None: +def run(*, train_data: DataSplit, test_data: DataSplit, n_jobs: int) -> None: """Fit and predict the classifier.""" # initialize model clf = KNeighborsClassifier( @@ -70,7 +68,7 @@ def multivariate( ) benchmark = timeit.timeit( - "func(train_data=train_data, test_data=test_data, n_jobs=args.n_jobs)", + "run(train_data=train_data, test_data=test_data, n_jobs=args.n_jobs)", globals=locals(), number=args.number, ) diff --git a/benchmarks/test_sequentia.py b/benchmarks/test_sequentia.py index 8ba7e45..521d222 100644 --- a/benchmarks/test_sequentia.py +++ b/benchmarks/test_sequentia.py @@ -21,7 +21,7 @@ random_state: np.random.RandomState = np.random.RandomState(0) -def multivariate( +def run( *, train_data: SequentialDataset, test_data: SequentialDataset, n_jobs: int ) -> None: """Fit and predict the classifier.""" @@ -52,7 +52,7 @@ def multivariate( train_data, test_data = load_dataset(multivariate=False) benchmark = timeit.timeit( - "func(train_data=train_data, test_data=test_data, n_jobs=args.n_jobs)", + "run(train_data=train_data, test_data=test_data, n_jobs=args.n_jobs)", globals=locals(), number=args.number, ) diff --git a/benchmarks/test_sktime.py b/benchmarks/test_sktime.py index e335a13..7fc5297 100644 --- a/benchmarks/test_sktime.py +++ b/benchmarks/test_sktime.py @@ -56,9 +56,7 @@ def prepare(data: SequentialDataset) -> DataSplit: return X_pd, data.y -def multivariate( - *, train_data: DataSplit, test_data: DataSplit, n_jobs: int -) -> None: +def run(*, train_data: DataSplit, test_data: DataSplit, n_jobs: int) -> None: """Fit and predict the classifier.""" # initialize model clf = KNeighborsTimeSeriesClassifier( @@ -89,7 +87,7 @@ def multivariate( train_data, test_data = prepare(train_data), prepare(test_data) benchmark = timeit.timeit( - "func(train_data=train_data, test_data=test_data, n_jobs=args.n_jobs)", + "run(train_data=train_data, test_data=test_data, n_jobs=args.n_jobs)", globals=locals(), number=args.number, ) diff --git a/sequentia/models/hmm/classifier.py b/sequentia/models/hmm/classifier.py index 5cfe488..a6bd06d 100644 --- a/sequentia/models/hmm/classifier.py +++ b/sequentia/models/hmm/classifier.py @@ -366,7 +366,7 @@ def fit( self.models = dict( zip( self.classes_, - joblib.Parallel(n_jobs=n_jobs, max_nbytes=None)( + joblib.Parallel(n_jobs=n_jobs, mmap_mode="r+")( joblib.delayed(self.models[c].fit)( X_c, lengths=lengths_c ) @@ -537,7 +537,7 @@ def predict_scores( n_jobs = _multiprocessing.effective_n_jobs(self.n_jobs, x=lengths) chunk_idxs = np.array_split(_data.get_idxs(lengths), n_jobs) return np.concatenate( - joblib.Parallel(n_jobs=n_jobs, max_nbytes=None)( + joblib.Parallel(n_jobs=n_jobs, mmap_mode="r+")( joblib.delayed(self._compute_scores_chunk)(X, idxs=idxs) for idxs in chunk_idxs ) diff --git a/sequentia/models/knn/base.py b/sequentia/models/knn/base.py index f09dda9..b1b34ce 100644 --- a/sequentia/models/knn/base.py +++ b/sequentia/models/knn/base.py @@ -143,7 +143,7 @@ def compute_distance_matrix( # multiprocessed DTW calculation return np.vstack( - joblib.Parallel(n_jobs=n_jobs, max_nbytes=None)( + joblib.Parallel(n_jobs=n_jobs, mmap_mode="r+")( joblib.delayed(self._distance_matrix_row_chunk)( row_idxs, col_chunk_idxs, X, n_jobs, dtw ) @@ -245,7 +245,7 @@ def _distance_matrix_row_chunk( columns. """ return np.hstack( - joblib.Parallel(n_jobs=n_jobs, max_nbytes=None)( + joblib.Parallel(n_jobs=n_jobs, mmap_mode="r+")( joblib.delayed(self._distance_matrix_row_col_chunk)( col_idxs, row_idxs, X, dtw ) diff --git a/sequentia/models/knn/classifier.py b/sequentia/models/knn/classifier.py index e67e721..5774cd8 100644 --- a/sequentia/models/knn/classifier.py +++ b/sequentia/models/knn/classifier.py @@ -398,7 +398,7 @@ def _find_max_labels( n_jobs = _multiprocessing.effective_n_jobs(self.n_jobs, x=scores) score_chunks = np.array_split(scores, n_jobs) return np.concatenate( - joblib.Parallel(n_jobs=n_jobs, max_nbytes=None)( + joblib.Parallel(n_jobs=n_jobs, mmap_mode="r+")( joblib.delayed(self._find_max_labels_chunk)(score_chunk) for score_chunk in score_chunks )