Skip to content

Commit 919ff4f

Browse files
committed
avoid materialization of dependencies
1 parent 9974bb0 commit 919ff4f

File tree

1 file changed

+5
-21
lines changed

1 file changed

+5
-21
lines changed

distributed/scheduler.py

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4619,7 +4619,7 @@ async def add_nanny(self, comm: Comm, address: str) -> None:
46194619
def _match_graph_with_tasks(
46204620
self,
46214621
dsk: dict[Key, T_runspec],
4622-
dependencies: dict[Key, set[Key]],
4622+
dependencies: DependenciesMapping,
46234623
keys: set[Key],
46244624
) -> set[Key]:
46254625
n = -1
@@ -4634,7 +4634,6 @@ def _match_graph_with_tasks(
46344634
lost_keys.add(k)
46354635
logger.info("User asked for computation on lost data, %s", k)
46364636
dsk.pop(k, None)
4637-
del dependencies[k]
46384637
if k in keys:
46394638
keys.remove(k)
46404639
del deps
@@ -4668,15 +4667,14 @@ def _match_graph_with_tasks(
46684667
stack.append(dep)
46694668
for anc in done:
46704669
dsk.pop(anc, None)
4671-
dependencies.pop(anc, None)
46724670
return lost_keys
46734671

46744672
def _create_taskstate_from_graph(
46754673
self,
46764674
*,
46774675
start: float,
46784676
dsk: dict[Key, T_runspec],
4679-
dependencies: dict,
4677+
dependencies: DependenciesMapping,
46804678
keys: set[Key],
46814679
ordered: dict[Key, int],
46824680
client: str,
@@ -4878,19 +4876,7 @@ async def update_graph(
48784876
logger.debug("Materialization done. Got %i tasks.", len(dsk))
48794877
del graph
48804878
if not internal_priority:
4881-
# Removing all non-local keys before calling order()
4882-
dsk_keys = set(
4883-
dsk
4884-
) # intersection() of sets is much faster than dict_keys
4885-
stripped_deps = {
4886-
k: v.intersection(dsk_keys)
4887-
for k, v in dependencies.items()
4888-
if k in dsk_keys
4889-
}
4890-
4891-
internal_priority = await offload(
4892-
dask.order.order, dsk=dsk, dependencies=stripped_deps
4893-
)
4879+
internal_priority = await offload(dask.order.order, dsk=dsk)
48944880
ordering_done = time()
48954881
logger.debug("Ordering done.")
48964882

@@ -9381,7 +9367,7 @@ def _materialize_graph(
93819367
global_annotations: dict[str, Any],
93829368
validate: bool,
93839369
keys: set[Key],
9384-
) -> tuple[dict[Key, T_runspec], dict[Key, set[Key]], dict[str, dict[Key, Any]]]:
9370+
) -> tuple[dict[Key, T_runspec], DependenciesMapping, dict[str, dict[Key, Any]]]:
93859371
dsk: dict = ensure_dict(graph)
93869372
if validate:
93879373
for k in dsk:
@@ -9410,7 +9396,5 @@ def _materialize_graph(
94109396
logger.debug(
94119397
"Removing aliases. Started with %i and got %i left", len(dsk2), len(dsk3)
94129398
)
9413-
# FIXME: There should be no need to fully materialize and copy this but some
9414-
# sections in the scheduler are mutating it.
9415-
dependencies = {k: set(v) for k, v in DependenciesMapping(dsk3).items()}
9399+
dependencies = DependenciesMapping(dsk3)
94169400
return dsk3, dependencies, annotations_by_type

0 commit comments

Comments
 (0)