Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
S
Stable Diffusion Webui
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
novelai-storage
Stable Diffusion Webui
Commits
329c8bac
Commit
329c8bac
authored
Nov 04, 2023
by
v0xie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactor: use same updown for both kohya OFT and LyCORIS diag-oft
parent
1dd25be0
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
74 additions
and
17 deletions
+74
-17
extensions-builtin/Lora/network_oft.py
extensions-builtin/Lora/network_oft.py
+74
-17
No files found.
extensions-builtin/Lora/network_oft.py
View file @
329c8bac
...
...
@@ -2,6 +2,7 @@ import torch
import
network
from
lyco_helpers
import
factorization
from
einops
import
rearrange
from
modules
import
devices
class
ModuleTypeOFT
(
network
.
ModuleType
):
...
...
@@ -24,12 +25,14 @@ class NetworkModuleOFT(network.NetworkModule):
# kohya-ss
if
"oft_blocks"
in
weights
.
w
.
keys
():
self
.
is_kohya
=
True
self
.
oft_blocks
=
weights
.
w
[
"oft_blocks"
]
self
.
oft_blocks
=
weights
.
w
[
"oft_blocks"
]
# (num_blocks, block_size, block_size)
self
.
alpha
=
weights
.
w
[
"alpha"
]
self
.
dim
=
self
.
oft_blocks
.
shape
[
0
]
self
.
dim
=
self
.
oft_blocks
.
shape
[
0
]
# lora dim
#self.oft_blocks = rearrange(self.oft_blocks, 'k m ... -> (k m) ...')
elif
"oft_diag"
in
weights
.
w
.
keys
():
self
.
is_kohya
=
False
self
.
oft_blocks
=
weights
.
w
[
"oft_diag"
]
self
.
oft_blocks
=
weights
.
w
[
"oft_diag"
]
# (num_blocks, block_size, block_size)
# alpha is rank if alpha is 0 or None
if
self
.
alpha
is
None
:
pass
...
...
@@ -51,12 +54,57 @@ class NetworkModuleOFT(network.NetworkModule):
raise
ValueError
(
"sd_module must be Linear or Conv"
)
if
self
.
is_kohya
:
self
.
num_blocks
=
self
.
dim
self
.
block_size
=
self
.
out_dim
//
self
.
num_blocks
#self.num_blocks = self.dim
#self.block_size = self.out_dim // self.num_blocks
#self.block_size = self.dim
#self.num_blocks = self.out_dim // self.block_size
self
.
constraint
=
self
.
alpha
*
self
.
out_dim
self
.
num_blocks
,
self
.
block_size
=
factorization
(
self
.
out_dim
,
self
.
dim
)
else
:
self
.
block_size
,
self
.
num_blocks
=
factorization
(
self
.
out_dim
,
self
.
dim
)
self
.
constraint
=
None
self
.
block_size
,
self
.
num_blocks
=
factorization
(
self
.
out_dim
,
self
.
dim
)
if
is_other_linear
:
self
.
lin_module
=
self
.
create_module
(
weights
.
w
,
"oft_diag"
,
none_ok
=
True
)
def
create_module
(
self
,
weights
,
key
,
none_ok
=
False
):
weight
=
weights
.
get
(
key
)
if
weight
is
None
and
none_ok
:
return
None
is_linear
=
type
(
self
.
sd_module
)
in
[
torch
.
nn
.
Linear
,
torch
.
nn
.
modules
.
linear
.
NonDynamicallyQuantizableLinear
,
torch
.
nn
.
MultiheadAttention
]
is_conv
=
type
(
self
.
sd_module
)
in
[
torch
.
nn
.
Conv2d
]
if
is_linear
:
weight
=
weight
.
reshape
(
weight
.
shape
[
0
],
-
1
)
module
=
torch
.
nn
.
Linear
(
weight
.
shape
[
1
],
weight
.
shape
[
0
],
bias
=
False
)
elif
is_conv
and
key
==
"lora_down.weight"
or
key
==
"dyn_up"
:
if
len
(
weight
.
shape
)
==
2
:
weight
=
weight
.
reshape
(
weight
.
shape
[
0
],
-
1
,
1
,
1
)
if
weight
.
shape
[
2
]
!=
1
or
weight
.
shape
[
3
]
!=
1
:
module
=
torch
.
nn
.
Conv2d
(
weight
.
shape
[
1
],
weight
.
shape
[
0
],
self
.
sd_module
.
kernel_size
,
self
.
sd_module
.
stride
,
self
.
sd_module
.
padding
,
bias
=
False
)
else
:
module
=
torch
.
nn
.
Conv2d
(
weight
.
shape
[
1
],
weight
.
shape
[
0
],
(
1
,
1
),
bias
=
False
)
elif
is_conv
and
key
==
"lora_mid.weight"
:
module
=
torch
.
nn
.
Conv2d
(
weight
.
shape
[
1
],
weight
.
shape
[
0
],
self
.
sd_module
.
kernel_size
,
self
.
sd_module
.
stride
,
self
.
sd_module
.
padding
,
bias
=
False
)
elif
is_conv
and
key
==
"lora_up.weight"
or
key
==
"dyn_down"
:
module
=
torch
.
nn
.
Conv2d
(
weight
.
shape
[
1
],
weight
.
shape
[
0
],
(
1
,
1
),
bias
=
False
)
else
:
raise
AssertionError
(
f
'Lora layer {self.network_key} matched a layer with unsupported type: {type(self.sd_module).__name__}'
)
with
torch
.
no_grad
():
if
weight
.
shape
!=
module
.
weight
.
shape
:
weight
=
weight
.
reshape
(
module
.
weight
.
shape
)
module
.
weight
.
copy_
(
weight
)
module
.
to
(
device
=
devices
.
cpu
,
dtype
=
devices
.
dtype
)
module
.
weight
.
requires_grad_
(
False
)
return
module
def
merge_weight
(
self
,
R_weight
,
org_weight
):
R_weight
=
R_weight
.
to
(
org_weight
.
device
,
dtype
=
org_weight
.
dtype
)
...
...
@@ -77,7 +125,8 @@ class NetworkModuleOFT(network.NetworkModule):
else
:
new_norm_Q
=
norm_Q
block_Q
=
block_Q
*
((
new_norm_Q
+
1e-8
)
/
(
norm_Q
+
1e-8
))
m_I
=
torch
.
eye
(
self
.
block_size
,
device
=
oft_blocks
.
device
)
.
unsqueeze
(
0
)
.
repeat
(
self
.
num_blocks
,
1
,
1
)
m_I
=
torch
.
eye
(
self
.
num_blocks
,
device
=
oft_blocks
.
device
)
.
unsqueeze
(
0
)
.
repeat
(
self
.
block_size
,
1
,
1
)
#m_I = torch.eye(self.block_size, device=oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
block_R
=
torch
.
matmul
(
m_I
+
block_Q
,
(
m_I
-
block_Q
)
.
inverse
())
block_R_weighted
=
multiplier
*
block_R
+
(
1
-
multiplier
)
*
m_I
...
...
@@ -97,25 +146,33 @@ class NetworkModuleOFT(network.NetworkModule):
is_other_linear
=
type
(
self
.
sd_module
)
in
[
torch
.
nn
.
MultiheadAttention
]
if
not
is_other_linear
:
if
is_other_linear
and
orig_weight
.
shape
[
0
]
!=
orig_weight
.
shape
[
1
]:
orig_weight
=
orig_weight
.
permute
(
1
,
0
)
#if is_other_linear and orig_weight.shape[0] != orig_weight.shape[1]:
# orig_weight=orig_weight.permute(1, 0)
oft_blocks
=
self
.
oft_blocks
.
to
(
orig_weight
.
device
,
dtype
=
orig_weight
.
dtype
)
# without this line the results are significantly worse / less accurate
oft_blocks
=
oft_blocks
-
oft_blocks
.
transpose
(
1
,
2
)
R
=
oft_blocks
.
to
(
orig_weight
.
device
,
dtype
=
orig_weight
.
dtype
)
R
=
R
*
multiplier
+
torch
.
eye
(
self
.
block_size
,
device
=
orig_weight
.
device
)
R
=
self
.
oft_blocks
.
to
(
orig_weight
.
device
,
dtype
=
orig_weight
.
dtype
)
merged_weight
=
rearrange
(
orig_weight
,
'(k n) ... -> k n ...'
,
k
=
self
.
num_blocks
,
n
=
self
.
block_size
)
merged_weight
=
torch
.
einsum
(
'k n m, k n ... -> k m ...'
,
R
*
multiplier
+
torch
.
eye
(
self
.
block_size
,
device
=
orig_weight
.
device
)
,
R
,
merged_weight
)
merged_weight
=
rearrange
(
merged_weight
,
'k m ... -> (k m) ...'
)
if
is_other_linear
and
orig_weight
.
shape
[
0
]
!=
orig_weight
.
shape
[
1
]:
orig_weight
=
orig_weight
.
permute
(
1
,
0
)
#
if is_other_linear and orig_weight.shape[0] != orig_weight.shape[1]:
#
orig_weight=orig_weight.permute(1, 0)
updown
=
merged_weight
.
to
(
orig_weight
.
device
,
dtype
=
orig_weight
.
dtype
)
-
orig_weight
output_shape
=
orig_weight
.
shape
else
:
# FIXME: skip MultiheadAttention for now
#up = self.lin_module.weight.to(orig_weight.device, dtype=orig_weight.dtype)
updown
=
torch
.
zeros
([
orig_weight
.
shape
[
1
],
orig_weight
.
shape
[
1
]],
device
=
orig_weight
.
device
,
dtype
=
orig_weight
.
dtype
)
output_shape
=
(
orig_weight
.
shape
[
1
],
orig_weight
.
shape
[
1
])
...
...
@@ -123,10 +180,10 @@ class NetworkModuleOFT(network.NetworkModule):
def
calc_updown
(
self
,
orig_weight
):
multiplier
=
self
.
multiplier
()
*
self
.
calc_scale
()
if
self
.
is_kohya
:
return
self
.
calc_updown_kohya
(
orig_weight
,
multiplier
)
else
:
return
self
.
calc_updown_kb
(
orig_weight
,
multiplier
)
#
if self.is_kohya:
#
return self.calc_updown_kohya(orig_weight, multiplier)
#
else:
return
self
.
calc_updown_kb
(
orig_weight
,
multiplier
)
# override to remove the multiplier/scale factor; it's already multiplied in get_weight
def
finalize_updown
(
self
,
updown
,
orig_weight
,
output_shape
,
ex_bias
=
None
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment