Allow TF32 in CUDA for increased performance #279

b70b51cc · AUTOMATIC · 11e648f6 · b70b51cc · b70b51cc
Commit b70b51cc authored Sep 12, 2022 by AUTOMATIC
Hide whitespace changes
Inline Side-by-side

Showing with 21 additions and 0 deletions

modules/devices.py modules/devices.py +11 -0

modules/errors.py modules/errors.py +10 -0

No files found.
--- a/modules/devices.py
+++ b/modules/devices.py
 import torch

 # has_mps is only available in nightly pytorch (for now), `getattr` for compatibility
+from modules import errors
+
 has_mps = getattr(torch, 'has_mps', False)

 cpu = torch.device("cpu")
@@ -20,3 +22,12 @@ def torch_gc():
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()
+
+
+def enable_tf32():
+    if torch.cuda.is_available():
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.allow_tf32 = True
+
+
+errors.run(enable_tf32, "Enabling TF32")
--- a/modules/errors.py
+++ b/modules/errors.py
+import sys
+import traceback
+
+
+def run(code, task):
+    try:
+        code()
+    except Exception as e:
+        print(f"{task}: {type(e).__name__}", file=sys.stderr)
+        print(traceback.format_exc(), file=sys.stderr)