Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .meta/mast/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,6 @@ async def main(cfg: DictConfig, mode: str = "detached", extra_args: list = None)
extra_args=extra_args or [],
)
await launcher.launch_mast_job()
print(f"MAST job {launcher.job_name} launched successfully with client role.")
print("The client is running inside MAST and will execute the training.")
else:
# In remote mode, we're already running inside MAST, so mount directory, init provisioner and run training
mount_mnt_directory("/mnt/wsfuse")
Expand Down Expand Up @@ -97,7 +95,6 @@ def _main(cfg):
# Override job name from CLI
if args.job_name:
cfg[JOB_NAME_KEY] = args.job_name
print(f"Using job name: {args.job_name}")
asyncio.run(main(cfg, mode=args.mode, extra_args=remaining))

_main() # @parse grabs the cfg from CLI
11 changes: 7 additions & 4 deletions src/forge/controller/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@
from monarch._src.actor.allocator import RemoteAllocator, TorchXRemoteAllocInitializer
from monarch.actor import Actor, endpoint, ProcMesh
from monarch.tools import commands
from monarch.tools.commands import info
from monarch.tools.components import hyperactor
from monarch.tools.commands import create, info
from monarch.tools.config import Config, Workspace

from forge.types import Launcher, LauncherConfig
Expand Down Expand Up @@ -259,8 +258,12 @@ async def launch_mast_job(self):
),
)

await commands.get_or_create(self.job_name, config)
return server_spec
job_handle = create(config, name=self.job_name)
print(
f"MAST job launched successfully:\n"
f"\033[34mhttps://www.internalfb.com/mlhub/pipelines/runs/mast/{self.job_name}\033[0m"
)
return job_handle

def add_additional_packages(self, packages: "Packages") -> "Packages":
packages.add_package("oil.oilfs:stable")
Expand Down
Loading