We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent c59198f commit fce44e5Copy full SHA for fce44e5
eval.py
@@ -15,7 +15,6 @@
15
torch._dynamo.config.automatic_dynamic_shapes = True
16
torch._inductor.config.triton.unique_kernel_names = True
17
torch._inductor.config.epilogue_fusion = False
18
-torch._inductor.config.triton.cudagraphs = True
19
torch._dynamo.config.cache_size_limit = 100000
20
21
from tokenizer import get_tokenizer
generate.py
@@ -31,6 +31,8 @@ def device_sync(device):
31
32
default_device = 'cuda' if torch.cuda.is_available() else 'cpu'
33
34
+create_block_mask = torch.compile(create_block_mask)
35
+
36
# support running without installing as a package
37
wd = Path(__file__).parent.parent.resolve()
38
sys.path.append(str(wd))
0 commit comments