Skip to content

Commit

Permalink
Remove ambiguity of calling ops.replicate with a ShardedTensor
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex-Vasile committed Feb 25, 2025
1 parent 0a12c71 commit b549558
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 7 deletions.
8 changes: 4 additions & 4 deletions sharktank/sharktank/ops/sharded_impls.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,21 +955,21 @@ def repeat_replicated(input: ReplicatedTensor, *sizes: List[int]) -> ReplicatedT


@replicate.override(ReplicatedTensor)
def replicate_replicated(input: ReplicatedTensor, *, count: int, devices: Tuple[int], devices_pinned: bool) -> ReplicatedTensor:
def replicate_replicated(input: ReplicatedTensor, *, count: int, devices: None, devices_pinned: None) -> ReplicatedTensor:
if input.shard_count != count:
raise ValueError(f"Number of shards not equal ({input.shard_count} != {count})")
return input


@replicate.override(SplitPrimitiveTensor)
def replicate_split(input: SplitPrimitiveTensor, *, count: int, devices: Tuple[int], devices_pinned: bool) -> ReplicatedTensor:
def replicate_split(input: SplitPrimitiveTensor, *, count: int, devices: None, devices_pinned: None) -> ReplicatedTensor:
if input.shard_count != count:
raise ValueError(f"Number of shards not equal ({input.shard_count} != {count})")
return all_gather(input) # TODO: What to do if input.devices and input.devices_pinned are different than the planend in values?
return all_gather(input)


@replicate.override(UnreducedTensor)
def replicate_unreduced(input: UnreducedTensor, *, count: int, devices: Tuple[int], devices_pinned: bool) -> ReplicatedTensor:
def replicate_unreduced(input: UnreducedTensor, *, count: int, devices: None, devices_pinned: None) -> ReplicatedTensor:
if input.shard_count != count:
raise ValueError(f"Number of shards not equal ({input.shard_count} != {count})")
return all_reduce(input)
Expand Down
13 changes: 10 additions & 3 deletions sharktank/sharktank/ops/signatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,7 +815,7 @@ def _repeat_trampoline(


@overridable
def replicate(input: AnyTensor, count: int,) -> ShardedTensor:
def replicate(input: AnyTensor, count: int, devices: Tuple[int] | None, devices_pinned: bool | None) -> ShardedTensor:
"""Replicate across devices.
Possibly reshards if required."""
Expand All @@ -824,10 +824,17 @@ def replicate(input: AnyTensor, count: int,) -> ShardedTensor:

@replicate.trampoline
def _replicate_trampoline(
d: SignatureDispatcher, input: AnyTensor, count: int, devices: Tuple[int] | None = None, devices_pinned: bool = False
d: SignatureDispatcher, input: AnyTensor, count: int, devices: Tuple[int] | None = None, devices_pinned: bool | None = None
) -> ShardedTensor:
tensors = (input,)
devices = devices if devices is not None else tuple(range(count))
if isinstance(input, torch.Tensor):
devices = devices if devices is not None else tuple(range(count))
devices_pinned = devices_pinned if devices_pinned is not None else False
else:
# TODO: Is this correct? Will use data on `input`.
assert devices is None
assert devices_pinned is None

for override in d.find_overrides(tensors):
result = override(input, count=count, devices=devices, devices_pinned=devices_pinned)
if result is not NotImplemented:
Expand Down

0 comments on commit b549558

Please sign in to comment.