Commit b7f95869 authored by Elias Oenal's avatar Elias Oenal

Refactored Metal/mps fixes.

parent 5dc05c0d
# Metal backend fixes written and placed
# into the public domain by Elias Oenal <sd@eliasoenal.com>
import os import os
import sys import sys
import traceback import traceback
...@@ -50,25 +47,21 @@ def setup_codeformer(): ...@@ -50,25 +47,21 @@ def setup_codeformer():
def __init__(self): def __init__(self):
self.net = None self.net = None
self.face_helper = None self.face_helper = None
if shared.device.type == 'mps': # CodeFormer currently does not support mps backend
shared.device_codeformer = torch.device('cpu')
def create_models(self): def create_models(self):
if self.net is not None and self.face_helper is not None: if self.net is not None and self.face_helper is not None:
return self.net, self.face_helper return self.net, self.face_helper
if shared.device.type == 'mps': # CodeFormer currently does not support mps backend net = net_class(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9, connect_list=['32', '64', '128', '256']).to(shared.device_codeformer)
net = net_class(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9, connect_list=['32', '64', '128', '256']).to(torch.device('cpu'))
else:
net = net_class(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9, connect_list=['32', '64', '128', '256']).to(shared.device)
ckpt_path = load_file_from_url(url=pretrain_model_url, model_dir=os.path.join(path, 'weights/CodeFormer'), progress=True) ckpt_path = load_file_from_url(url=pretrain_model_url, model_dir=os.path.join(path, 'weights/CodeFormer'), progress=True)
checkpoint = torch.load(ckpt_path)['params_ema'] checkpoint = torch.load(ckpt_path)['params_ema']
net.load_state_dict(checkpoint) net.load_state_dict(checkpoint)
net.eval() net.eval()
if shared.device.type == 'mps': # CodeFormer currently does not support mps backend face_helper = FaceRestoreHelper(1, face_size=512, crop_ratio=(1, 1), det_model='retinaface_resnet50', save_ext='png', use_parse=True, device=shared.device_codeformer)
face_helper = FaceRestoreHelper(1, face_size=512, crop_ratio=(1, 1), det_model='retinaface_resnet50', save_ext='png', use_parse=True, device=torch.device('cpu'))
else:
face_helper = FaceRestoreHelper(1, face_size=512, crop_ratio=(1, 1), det_model='retinaface_resnet50', save_ext='png', use_parse=True, device=shared.device)
if not cmd_opts.unload_gfpgan: if not cmd_opts.unload_gfpgan:
self.net = net self.net = net
...@@ -90,10 +83,8 @@ def setup_codeformer(): ...@@ -90,10 +83,8 @@ def setup_codeformer():
for idx, cropped_face in enumerate(face_helper.cropped_faces): for idx, cropped_face in enumerate(face_helper.cropped_faces):
cropped_face_t = img2tensor(cropped_face / 255., bgr2rgb=True, float32=True) cropped_face_t = img2tensor(cropped_face / 255., bgr2rgb=True, float32=True)
normalize(cropped_face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True) normalize(cropped_face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
if shared.device.type == 'mps': # CodeFormer currently does not support mps backend cropped_face_t = cropped_face_t.unsqueeze(0).to(shared.device_codeformer)
cropped_face_t = cropped_face_t.unsqueeze(0).to(torch.device('cpu'))
else:
cropped_face_t = cropped_face_t.unsqueeze(0).to(shared.device)
try: try:
with torch.no_grad(): with torch.no_grad():
output = net(cropped_face_t, w=w if w is not None else shared.opts.code_former_weight, adain=True)[0] output = net(cropped_face_t, w=w if w is not None else shared.opts.code_former_weight, adain=True)[0]
......
# Metal backend fixes written and placed
# into the public domain by Elias Oenal <sd@eliasoenal.com>
import contextlib import contextlib
import json import json
import math import math
...@@ -109,17 +106,19 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see ...@@ -109,17 +106,19 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (shape[0], seed_resize_from_h//8, seed_resize_from_w//8) noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (shape[0], seed_resize_from_h//8, seed_resize_from_w//8)
# Pytorch currently doesn't handle seeting randomness correctly when the metal backend is used. # Pytorch currently doesn't handle seeting randomness correctly when the metal backend is used.
generator = torch
if shared.device.type == 'mps': if shared.device.type == 'mps':
g = torch.Generator(device='cpu') shared.device_seed_type = 'cpu'
generator = torch.Generator(device=shared.device_seed_type)
subnoise = None subnoise = None
if subseeds is not None: if subseeds is not None:
subseed = 0 if i >= len(subseeds) else subseeds[i] subseed = 0 if i >= len(subseeds) else subseeds[i]
if shared.device.type == 'mps': generator.manual_seed(subseed)
g.manual_seed(subseed)
subnoise = torch.randn(noise_shape, generator=g, device='cpu').to('mps') if shared.device.type != shared.device_seed_type:
else: # cpu or cuda subnoise = torch.randn(noise_shape, generator=generator, device=shared.device_seed_type).to(shared.device)
torch.manual_seed(subseed) else:
subnoise = torch.randn(noise_shape, device=shared.device) subnoise = torch.randn(noise_shape, device=shared.device)
# randn results depend on device; gpu and cpu get different results for same seed; # randn results depend on device; gpu and cpu get different results for same seed;
...@@ -128,12 +127,11 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see ...@@ -128,12 +127,11 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
# it will break everyone's seeds. # it will break everyone's seeds.
# When using the mps backend falling back to the cpu device is needed, since mps currently # When using the mps backend falling back to the cpu device is needed, since mps currently
# does not implement seeding properly. # does not implement seeding properly.
if shared.device.type == 'mps': generator.manual_seed(seed)
g.manual_seed(seed) if shared.device.type != shared.device_seed_type:
noise = torch.randn(noise_shape, generator=g, device='cpu').to('mps') noise = torch.randn(noise_shape, generator=generator, device=shared.device_seed_type).to(shared.device)
else: # cpu or cuda else:
torch.manual_seed(seed) noise = torch.randn(noise_shape, device=shared.device)
x = torch.randn(shape, device=shared.device)
if subnoise is not None: if subnoise is not None:
#noise = subnoise * subseed_strength + noise * (1 - subseed_strength) #noise = subnoise * subseed_strength + noise * (1 - subseed_strength)
...@@ -143,12 +141,10 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see ...@@ -143,12 +141,10 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
#noise = torch.nn.functional.interpolate(noise.unsqueeze(1), size=shape[1:], mode="bilinear").squeeze() #noise = torch.nn.functional.interpolate(noise.unsqueeze(1), size=shape[1:], mode="bilinear").squeeze()
# noise_shape = (64, 80) # noise_shape = (64, 80)
# shape = (64, 72) # shape = (64, 72)
generator.manual_seed(seed)
if shared.device.type == 'mps': if shared.device.type != shared.device_seed_type:
g.manual_seed(seed) x = torch.randn(shape, generator=generator, device=shared.device_seed_type).to(shared.device)
x = torch.randn(shape, generator=g, device='cpu').to('mps')
else: else:
torch.manual_seed(seed)
x = torch.randn(shape, device=shared.device) x = torch.randn(shape, device=shared.device)
dx = (shape[2] - noise_shape[2]) // 2 # -4 dx = (shape[2] - noise_shape[2]) // 2 # -4
dy = (shape[1] - noise_shape[1]) // 2 dy = (shape[1] - noise_shape[1]) // 2
...@@ -484,10 +480,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): ...@@ -484,10 +480,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
if self.image_mask is not None: if self.image_mask is not None:
init_mask = latent_mask init_mask = latent_mask
latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2])) latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2]))
precision = np.float64
if shared.device.type == 'mps': # mps backend does not support float64 if shared.device.type == 'mps': # mps backend does not support float64
latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255 precision = np.float32
else: latmask = np.moveaxis(np.array(latmask, dtype=precision), 2, 0) / 255
latmask = np.moveaxis(np.array(latmask, dtype=np.float64), 2, 0) / 255
latmask = latmask[0] latmask = latmask[0]
latmask = np.around(latmask) latmask = np.around(latmask)
latmask = np.tile(latmask[None], (4, 1, 1)) latmask = np.tile(latmask[None], (4, 1, 1))
......
...@@ -46,6 +46,8 @@ parser.add_argument("--ui-config-file", type=str, help="filename to use for ui c ...@@ -46,6 +46,8 @@ parser.add_argument("--ui-config-file", type=str, help="filename to use for ui c
cmd_opts = parser.parse_args() cmd_opts = parser.parse_args()
device = get_optimal_device() device = get_optimal_device()
device_codeformer = device
device_seed_type = device
batch_cond_uncond = cmd_opts.always_batch_cond_uncond or not (cmd_opts.lowvram or cmd_opts.medvram) batch_cond_uncond = cmd_opts.always_batch_cond_uncond or not (cmd_opts.lowvram or cmd_opts.medvram)
parallel_processing_allowed = not cmd_opts.lowvram and not cmd_opts.medvram parallel_processing_allowed = not cmd_opts.lowvram and not cmd_opts.medvram
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment