Skip to content

Commit

Permalink
Merge pull request #157 from mit-han-lab/dev
Browse files Browse the repository at this point in the history
Fix missing third_party & merge main branch of dev repo
  • Loading branch information
sxtyzhangzk authored Mar 8, 2025
2 parents e1c5f3e + 75dad57 commit 6772359
Show file tree
Hide file tree
Showing 12 changed files with 46 additions and 1 deletion.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def cond(s) -> list:
for target in sm_targets:
NVCC_FLAGS += ["-gencode", f"arch=compute_{target},code=sm_{target}"]

NVCC_MSVC_FLAGS = ["-Xcompiler", "/Zc:__cplusplus", "-Xcompiler", "/FS"]
NVCC_MSVC_FLAGS = ["-Xcompiler", "/Zc:__cplusplus", "-Xcompiler", "/FS", "-Xcompiler", "/bigobj"]

nunchaku_extension = CUDAExtension(
name="nunchaku._C",
Expand Down
40 changes: 40 additions & 0 deletions src/Module.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,11 @@ struct LayerOffloadHelper {
if (offload) {
streamCompute = std::make_unique<CUDAStreamWrapper>();
streamLoad = std::make_unique<CUDAStreamWrapper>();

needWorkaround = checkWorkaround();
if (needWorkaround) {
spdlog::debug("Offloading helper: use WDDM workaround");
}
}
}

Expand All @@ -240,6 +245,7 @@ struct LayerOffloadHelper {
funcCompute(layer);
nextComputeDone = std::make_unique<CUDAEventWrapper>();
checkCUDA(cudaEventRecord(nextComputeDone->event, getCurrentCUDAStream()));
workaroundFlush();
}

{
Expand All @@ -253,10 +259,13 @@ struct LayerOffloadHelper {
}
nextLoadDone = std::make_unique<CUDAEventWrapper>();
checkCUDA(cudaEventRecord(nextLoadDone->event, getCurrentCUDAStream()));
workaroundFlush();
}

eventComputeDone = std::move(nextComputeDone);
eventLoadDone = std::move(nextLoadDone);

workaroundSynchronize();
}
}

Expand All @@ -266,4 +275,35 @@ struct LayerOffloadHelper {
}
checkCUDA(cudaStreamWaitEvent(getCurrentCUDAStream(), event->event));
}

// WDDM prevents multiple streams run concurrently
// use flush and synchronize to work around
bool needWorkaround;
static bool checkWorkaround() {
if (char *env = getenv("NUNCHAKU_OFFLOAD_WDDM_WORKAROUND")) {
if (std::string(env) == "1") {
return true;
} else if (std::string(env) == "0") {
return false;
}
}

#ifdef _WIN32
return true;
#else
return false;
#endif
}
void workaroundFlush() {
if (!needWorkaround) {
return;
}
cudaStreamQuery(getCurrentCUDAStream());
}
void workaroundSynchronize() {
if (!needWorkaround) {
return;
}
checkCUDA(cudaEventSynchronize(eventComputeDone->event));
}
};
1 change: 1 addition & 0 deletions third_party/Block-Sparse-Attention
Submodule Block-Sparse-Attention added at 0d23f7
Empty file.
1 change: 1 addition & 0 deletions third_party/cutlass
Submodule cutlass added at a75b4a
Empty file removed third_party/cutlass/.gitkeep
Empty file.
1 change: 1 addition & 0 deletions third_party/json
Submodule json added at 632583
Empty file removed third_party/json/.gitkeep
Empty file.
1 change: 1 addition & 0 deletions third_party/mio
Submodule mio added at 8b6b7d
Empty file removed third_party/mio/.gitkeep
Empty file.
1 change: 1 addition & 0 deletions third_party/spdlog
Submodule spdlog added at 27cb4c
Empty file removed third_party/spdlog/.gitkeep
Empty file.

0 comments on commit 6772359

Please sign in to comment.