Merge pull request #2067 from victorca25/esrgan_mod

update ESRGAN architecture and model to support all ESRGAN models

Merge pull request #2067 from victorca25/esrgan_mod
update ESRGAN architecture and model to support all ESRGAN models
6bd6154a · AUTOMATIC1111 · GitHub · 696cb33e · 53154ba1 · 696cb33e
Commit 6bd6154a authored Oct 23, 2022 by AUTOMATIC1111 Committed by GitHub Oct 23, 2022
4 changed files
--- a/modules/bsrgan_model.py
+++ b/modules/bsrgan_model.py
-import os.path
-import sys
-import traceback
-
-import PIL.Image
-import numpy as np
-import torch
-from basicsr.utils.download_util import load_file_from_url
-
-import modules.upscaler
-from modules import devices, modelloader
-from modules.bsrgan_model_arch import RRDBNet
-
-
-class UpscalerBSRGAN(modules.upscaler.Upscaler):
-    def __init__(self, dirname):
-        self.name = "BSRGAN"
-        self.model_name = "BSRGAN 4x"
-        self.model_url = "https://github.com/cszn/KAIR/releases/download/v1.0/BSRGAN.pth"
-        self.user_path = dirname
-        super().__init__()
-        model_paths = self.find_models(ext_filter=[".pt", ".pth"])
-        scalers = []
-        if len(model_paths) == 0:
-            scaler_data = modules.upscaler.UpscalerData(self.model_name, self.model_url, self, 4)
-            scalers.append(scaler_data)
-        for file in model_paths:
-            if "http" in file:
-                name = self.model_name
-            else:
-                name = modelloader.friendly_name(file)
-            try:
-                scaler_data = modules.upscaler.UpscalerData(name, file, self, 4)
-                scalers.append(scaler_data)
-            except Exception:
-                print(f"Error loading BSRGAN model: {file}", file=sys.stderr)
-                print(traceback.format_exc(), file=sys.stderr)
-        self.scalers = scalers
-
-    def do_upscale(self, img: PIL.Image, selected_file):
-        torch.cuda.empty_cache()
-        model = self.load_model(selected_file)
-        if model is None:
-            return img
-        model.to(devices.device_bsrgan)
-        torch.cuda.empty_cache()
-        img = np.array(img)
-        img = img[:, :, ::-1]
-        img = np.moveaxis(img, 2, 0) / 255
-        img = torch.from_numpy(img).float()
-        img = img.unsqueeze(0).to(devices.device_bsrgan)
-        with torch.no_grad():
-            output = model(img)
-        output = output.squeeze().float().cpu().clamp_(0, 1).numpy()
-        output = 255. * np.moveaxis(output, 0, 2)
-        output = output.astype(np.uint8)
-        output = output[:, :, ::-1]
-        torch.cuda.empty_cache()
-        return PIL.Image.fromarray(output, 'RGB')
-
-    def load_model(self, path: str):
-        if "http" in path:
-            filename = load_file_from_url(url=self.model_url, model_dir=self.model_path, file_name="%s.pth" % self.name,
-                                          progress=True)
-        else:
-            filename = path
-        if not os.path.exists(filename) or filename is None:
-            print(f"BSRGAN: Unable to load model from {filename}", file=sys.stderr)
-            return None
-        model = RRDBNet(in_nc=3, out_nc=3, nf=64, nb=23, gc=32, sf=4)  # define network
-        model.load_state_dict(torch.load(filename), strict=True)
-        model.eval()
-        for k, v in model.named_parameters():
-            v.requires_grad = False
-        return model
-
--- a/modules/bsrgan_model_arch.py
+++ b/modules/bsrgan_model_arch.py
-import functools
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.nn.init as init
-
-
-def initialize_weights(net_l, scale=1):
-    if not isinstance(net_l, list):
-        net_l = [net_l]
-    for net in net_l:
-        for m in net.modules():
-            if isinstance(m, nn.Conv2d):
-                init.kaiming_normal_(m.weight, a=0, mode='fan_in')
-                m.weight.data *= scale  # for residual block
-                if m.bias is not None:
-                    m.bias.data.zero_()
-            elif isinstance(m, nn.Linear):
-                init.kaiming_normal_(m.weight, a=0, mode='fan_in')
-                m.weight.data *= scale
-                if m.bias is not None:
-                    m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm2d):
-                init.constant_(m.weight, 1)
-                init.constant_(m.bias.data, 0.0)
-
-
-def make_layer(block, n_layers):
-    layers = []
-    for _ in range(n_layers):
-        layers.append(block())
-    return nn.Sequential(*layers)
-
-
-class ResidualDenseBlock_5C(nn.Module):
-    def __init__(self, nf=64, gc=32, bias=True):
-        super(ResidualDenseBlock_5C, self).__init__()
-        # gc: growth channel, i.e. intermediate channels
-        self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias)
-        self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias)
-        self.conv3 = nn.Conv2d(nf + 2 * gc, gc, 3, 1, 1, bias=bias)
-        self.conv4 = nn.Conv2d(nf + 3 * gc, gc, 3, 1, 1, bias=bias)
-        self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias=bias)
-        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
-
-        # initialization
-        initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1)
-
-    def forward(self, x):
-        x1 = self.lrelu(self.conv1(x))
-        x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
-        x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
-        x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1)))
-        x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
-        return x5 * 0.2 + x
-
-
-class RRDB(nn.Module):
-    '''Residual in Residual Dense Block'''
-
-    def __init__(self, nf, gc=32):
-        super(RRDB, self).__init__()
-        self.RDB1 = ResidualDenseBlock_5C(nf, gc)
-        self.RDB2 = ResidualDenseBlock_5C(nf, gc)
-        self.RDB3 = ResidualDenseBlock_5C(nf, gc)
-
-    def forward(self, x):
-        out = self.RDB1(x)
-        out = self.RDB2(out)
-        out = self.RDB3(out)
-        return out * 0.2 + x
-
-
-class RRDBNet(nn.Module):
-    def __init__(self, in_nc=3, out_nc=3, nf=64, nb=23, gc=32, sf=4):
-        super(RRDBNet, self).__init__()
-        RRDB_block_f = functools.partial(RRDB, nf=nf, gc=gc)
-        self.sf = sf
-
-        self.conv_first = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True)
-        self.RRDB_trunk = make_layer(RRDB_block_f, nb)
-        self.trunk_conv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
-        #### upsampling
-        self.upconv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
-        if self.sf==4:
-            self.upconv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
-        self.HRconv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
-        self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True)
-
-        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
-
-    def forward(self, x):
-        fea = self.conv_first(x)
-        trunk = self.trunk_conv(self.RRDB_trunk(fea))
-        fea = fea + trunk
-
-        fea = self.lrelu(self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest')))
-        if self.sf==4:
-            fea = self.lrelu(self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest')))
-        out = self.conv_last(self.lrelu(self.HRconv(fea)))
-
-        return out
\ No newline at end of file
--- a/modules/esrgan_model.py
+++ b/modules/esrgan_model.py
@@ -11,62 +11,109 @@ from modules.upscaler import Upscaler, UpscalerData
 from modules.shared import opts


-def fix_model_layers(crt_model, pretrained_net):
-    # this code is adapted from https://github.com/xinntao/ESRGAN
-    if 'conv_first.weight' in pretrained_net:
-        return pretrained_net
-
-    if 'model.0.weight' not in pretrained_net:
-        is_realesrgan = "params_ema" in pretrained_net and 'body.0.rdb1.conv1.weight' in pretrained_net["params_ema"]
-        if is_realesrgan:
-            raise Exception("The file is a RealESRGAN model, it can't be used as a ESRGAN model.")
-        else:
-            raise Exception("The file is not a ESRGAN model.")

-    crt_net = crt_model.state_dict()
-    load_net_clean = {}
-    for k, v in pretrained_net.items():
-        if k.startswith('module.'):
-            load_net_clean[k[7:]] = v
-        else:
-            load_net_clean[k] = v
-    pretrained_net = load_net_clean
-
-    tbd = []
-    for k, v in crt_net.items():
-        tbd.append(k)
-
-    # directly copy
-    for k, v in crt_net.items():
-        if k in pretrained_net and pretrained_net[k].size() == v.size():
-            crt_net[k] = pretrained_net[k]
-            tbd.remove(k)
-
-    crt_net['conv_first.weight'] = pretrained_net['model.0.weight']
-    crt_net['conv_first.bias'] = pretrained_net['model.0.bias']
-
-    for k in tbd.copy():
-        if 'RDB' in k:
-            ori_k = k.replace('RRDB_trunk.', 'model.1.sub.')
-            if '.weight' in k:
-                ori_k = ori_k.replace('.weight', '.0.weight')
-            elif '.bias' in k:
-                ori_k = ori_k.replace('.bias', '.0.bias')
-            crt_net[k] = pretrained_net[ori_k]
-            tbd.remove(k)
-
-    crt_net['trunk_conv.weight'] = pretrained_net['model.1.sub.23.weight']
-    crt_net['trunk_conv.bias'] = pretrained_net['model.1.sub.23.bias']
-    crt_net['upconv1.weight'] = pretrained_net['model.3.weight']
-    crt_net['upconv1.bias'] = pretrained_net['model.3.bias']
-    crt_net['upconv2.weight'] = pretrained_net['model.6.weight']
-    crt_net['upconv2.bias'] = pretrained_net['model.6.bias']
-    crt_net['HRconv.weight'] = pretrained_net['model.8.weight']
-    crt_net['HRconv.bias'] = pretrained_net['model.8.bias']
-    crt_net['conv_last.weight'] = pretrained_net['model.10.weight']
-    crt_net['conv_last.bias'] = pretrained_net['model.10.bias']
-
-    return crt_net
+def mod2normal(state_dict):
+    # this code is copied from https://github.com/victorca25/iNNfer
+    if 'conv_first.weight' in state_dict:
+        crt_net = {}
+        items = []
+        for k, v in state_dict.items():
+            items.append(k)
+
+        crt_net['model.0.weight'] = state_dict['conv_first.weight']
+        crt_net['model.0.bias'] = state_dict['conv_first.bias']
+
+        for k in items.copy():
+            if 'RDB' in k:
+                ori_k = k.replace('RRDB_trunk.', 'model.1.sub.')
+                if '.weight' in k:
+                    ori_k = ori_k.replace('.weight', '.0.weight')
+                elif '.bias' in k:
+                    ori_k = ori_k.replace('.bias', '.0.bias')
+                crt_net[ori_k] = state_dict[k]
+                items.remove(k)
+
+        crt_net['model.1.sub.23.weight'] = state_dict['trunk_conv.weight']
+        crt_net['model.1.sub.23.bias'] = state_dict['trunk_conv.bias']
+        crt_net['model.3.weight'] = state_dict['upconv1.weight']
+        crt_net['model.3.bias'] = state_dict['upconv1.bias']
+        crt_net['model.6.weight'] = state_dict['upconv2.weight']
+        crt_net['model.6.bias'] = state_dict['upconv2.bias']
+        crt_net['model.8.weight'] = state_dict['HRconv.weight']
+        crt_net['model.8.bias'] = state_dict['HRconv.bias']
+        crt_net['model.10.weight'] = state_dict['conv_last.weight']
+        crt_net['model.10.bias'] = state_dict['conv_last.bias']
+        state_dict = crt_net
+    return state_dict
+
+
+def resrgan2normal(state_dict, nb=23):
+    # this code is copied from https://github.com/victorca25/iNNfer
+    if "conv_first.weight" in state_dict and "body.0.rdb1.conv1.weight" in state_dict:
+        crt_net = {}
+        items = []
+        for k, v in state_dict.items():
+            items.append(k)
+
+        crt_net['model.0.weight'] = state_dict['conv_first.weight']
+        crt_net['model.0.bias'] = state_dict['conv_first.bias']
+
+        for k in items.copy():
+            if "rdb" in k:
+                ori_k = k.replace('body.', 'model.1.sub.')
+                ori_k = ori_k.replace('.rdb', '.RDB')
+                if '.weight' in k:
+                    ori_k = ori_k.replace('.weight', '.0.weight')
+                elif '.bias' in k:
+                    ori_k = ori_k.replace('.bias', '.0.bias')
+                crt_net[ori_k] = state_dict[k]
+                items.remove(k)
+
+        crt_net[f'model.1.sub.{nb}.weight'] = state_dict['conv_body.weight']
+        crt_net[f'model.1.sub.{nb}.bias'] = state_dict['conv_body.bias']
+        crt_net['model.3.weight'] = state_dict['conv_up1.weight']
+        crt_net['model.3.bias'] = state_dict['conv_up1.bias']
+        crt_net['model.6.weight'] = state_dict['conv_up2.weight']
+        crt_net['model.6.bias'] = state_dict['conv_up2.bias']
+        crt_net['model.8.weight'] = state_dict['conv_hr.weight']
+        crt_net['model.8.bias'] = state_dict['conv_hr.bias']
+        crt_net['model.10.weight'] = state_dict['conv_last.weight']
+        crt_net['model.10.bias'] = state_dict['conv_last.bias']
+        state_dict = crt_net
+    return state_dict
+
+
+def infer_params(state_dict):
+    # this code is copied from https://github.com/victorca25/iNNfer
+    scale2x = 0
+    scalemin = 6
+    n_uplayer = 0
+    plus = False
+
+    for block in list(state_dict):
+        parts = block.split(".")
+        n_parts = len(parts)
+        if n_parts == 5 and parts[2] == "sub":
+            nb = int(parts[3])
+        elif n_parts == 3:
+            part_num = int(parts[1])
+            if (part_num > scalemin
+                and parts[0] == "model"
+                and parts[2] == "weight"):
+                scale2x += 1
+            if part_num > n_uplayer:
+                n_uplayer = part_num
+                out_nc = state_dict[block].shape[0]
+        if not plus and "conv1x1" in block:
+            plus = True
+
+    nf = state_dict["model.0.weight"].shape[0]
+    in_nc = state_dict["model.0.weight"].shape[1]
+    out_nc = out_nc
+    scale = 2 ** scale2x
+
+    return in_nc, out_nc, nf, nb, plus, scale
+

 class UpscalerESRGAN(Upscaler):
    def __init__(self, dirname):
@@ -109,20 +156,39 @@ class UpscalerESRGAN(Upscaler):
            print("Unable to load %s from %s" % (self.model_path, filename))
            return None

-        pretrained_net = torch.load(filename, map_location='cpu' if devices.device_esrgan.type == 'mps' else None)
-        crt_model = arch.RRDBNet(3, 3, 64, 23, gc=32)
+        state_dict = torch.load(filename, map_location='cpu' if devices.device_esrgan.type == 'mps' else None)
+
+        if "params_ema" in state_dict:
+            state_dict = state_dict["params_ema"]
+        elif "params" in state_dict:
+            state_dict = state_dict["params"]
+            num_conv = 16 if "realesr-animevideov3" in filename else 32
+            model = arch.SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=num_conv, upscale=4, act_type='prelu')
+            model.load_state_dict(state_dict)
+            model.eval()
+            return model
+
+        if "body.0.rdb1.conv1.weight" in state_dict and "conv_first.weight" in state_dict:
+            nb = 6 if "RealESRGAN_x4plus_anime_6B" in filename else 23
+            state_dict = resrgan2normal(state_dict, nb)
+        elif "conv_first.weight" in state_dict:
+            state_dict = mod2normal(state_dict)
+        elif "model.0.weight" not in state_dict:
+            raise Exception("The file is not a recognized ESRGAN model.")
+
+        in_nc, out_nc, nf, nb, plus, mscale = infer_params(state_dict)

-        pretrained_net = fix_model_layers(crt_model, pretrained_net)
-        crt_model.load_state_dict(pretrained_net)
-        crt_model.eval()
+        model = arch.RRDBNet(in_nc=in_nc, out_nc=out_nc, nf=nf, nb=nb, upscale=mscale, plus=plus)
+        model.load_state_dict(state_dict)
+        model.eval()

-        return crt_model
+        return model


 def upscale_without_tiling(model, img):
    img = np.array(img)
    img = img[:, :, ::-1]
-    img = np.moveaxis(img, 2, 0) / 255
+    img = np.ascontiguousarray(np.transpose(img, (2, 0, 1))) / 255
    img = torch.from_numpy(img).float()
    img = img.unsqueeze(0).to(devices.device_esrgan)
    with torch.no_grad():

--- a/modules/esrgan_model_arch.py
+++ b/modules/esrgan_model_arch.py