Merge pull request #12924 from catboxanon/fix/cudnn

More accurate check for enabling cuDNN benchmark on 16XX cards

Merge pull request #12924 from catboxanon/fix/cudnn
More accurate check for enabling cuDNN benchmark on 16XX cards
558baffa · AUTOMATIC1111 · GitHub · 4ebed495 · 5681bf80 · 558baffa
Commit 558baffa authored Sep 09, 2023 by AUTOMATIC1111 Committed by GitHub Sep 09, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 1 deletion

modules/devices.py modules/devices.py +2 -1

No files found.
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -60,7 +60,8 @@ def enable_tf32():

        # enabling benchmark option seems to enable a range of cards to do fp16 when they otherwise can't
        # see https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/4407
-        if any(torch.cuda.get_device_capability(devid) == (7, 5) for devid in range(0, torch.cuda.device_count())):
+        device_id = (int(shared.cmd_opts.device_id) if shared.cmd_opts.device_id.isdigit() else 0) or torch.cuda.current_device()
+        if torch.cuda.get_device_capability(device_id) == (7, 5) and torch.cuda.get_device_name(device_id).startswith("NVIDIA GeForce GTX 16"):
            torch.backends.cudnn.benchmark = True

        torch.backends.cuda.matmul.allow_tf32 = True