compile create_block_mask

BoyuanFeng · BoyuanFeng · commit fce44e549cfa · 2024-12-09T12:41:43.000-08:00
diff --git a/eval.py b/eval.py
@@ -15,7 +15,6 @@
 torch._dynamo.config.automatic_dynamic_shapes = True
 torch._inductor.config.triton.unique_kernel_names = True
 torch._inductor.config.epilogue_fusion = False
-torch._inductor.config.triton.cudagraphs = True
 torch._dynamo.config.cache_size_limit = 100000
 
 from tokenizer import get_tokenizer
diff --git a/generate.py b/generate.py
@@ -31,6 +31,8 @@ def device_sync(device):
 
 default_device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
+create_block_mask = torch.compile(create_block_mask)
+
 # support running without installing as a package
 wd = Path(__file__).parent.parent.resolve()
 sys.path.append(str(wd))