Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
S
Stable Diffusion Webui
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
novelai-storage
Stable Diffusion Webui
Commits
eb667e71
Commit
eb667e71
authored
Nov 15, 2023
by
v0xie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat: LyCORIS/kohya OFT network support
parent
d6d0b22e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
26 additions
and
82 deletions
+26
-82
extensions-builtin/Lora/network_oft.py
extensions-builtin/Lora/network_oft.py
+26
-82
No files found.
extensions-builtin/Lora/network_oft.py
View file @
eb667e71
...
...
@@ -11,8 +11,8 @@ class ModuleTypeOFT(network.ModuleType):
return
None
#
adapted from kohya-ss' implementation
https://github.com/kohya-ss/sd-scripts/blob/main/networks/oft.py
# and KohakuBlueleaf's implementation https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/diag_oft.py
#
Supports both kohya-ss' implementation of COFT
https://github.com/kohya-ss/sd-scripts/blob/main/networks/oft.py
# and KohakuBlueleaf's implementation
of OFT/COFT
https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/diag_oft.py
class
NetworkModuleOFT
(
network
.
NetworkModule
):
def
__init__
(
self
,
net
:
network
.
Network
,
weights
:
network
.
NetworkWeights
):
...
...
@@ -25,117 +25,61 @@ class NetworkModuleOFT(network.NetworkModule):
if
"oft_blocks"
in
weights
.
w
.
keys
():
self
.
is_kohya
=
True
self
.
oft_blocks
=
weights
.
w
[
"oft_blocks"
]
# (num_blocks, block_size, block_size)
self
.
alpha
=
weights
.
w
[
"alpha"
]
self
.
alpha
=
weights
.
w
[
"alpha"
]
# alpha is constraint
self
.
dim
=
self
.
oft_blocks
.
shape
[
0
]
# lora dim
#self.oft_blocks = rearrange(self.oft_blocks, 'k m ... -> (k m) ...')
# LyCORIS
elif
"oft_diag"
in
weights
.
w
.
keys
():
self
.
is_kohya
=
False
self
.
oft_blocks
=
weights
.
w
[
"oft_diag"
]
# (num_blocks, block_size, block_size)
# alpha is rank if alpha is 0 or None
if
self
.
alpha
is
None
:
pass
self
.
dim
=
self
.
oft_blocks
.
shape
[
1
]
# FIXME: almost certainly incorrect, assumes tensor is shape [*, m, n]
else
:
raise
ValueError
(
"oft_blocks or oft_diag must be in weights dict"
)
self
.
oft_blocks
=
weights
.
w
[
"oft_diag"
]
# self.alpha is unused
self
.
dim
=
self
.
oft_blocks
.
shape
[
1
]
# (num_blocks, block_size, block_size)
is_linear
=
type
(
self
.
sd_module
)
in
[
torch
.
nn
.
Linear
,
torch
.
nn
.
modules
.
linear
.
NonDynamicallyQuantizableLinear
]
is_conv
=
type
(
self
.
sd_module
)
in
[
torch
.
nn
.
Conv2d
]
is_other_linear
=
type
(
self
.
sd_module
)
in
[
torch
.
nn
.
MultiheadAttention
]
is_other_linear
=
type
(
self
.
sd_module
)
in
[
torch
.
nn
.
MultiheadAttention
]
# unsupported
if
is_linear
:
self
.
out_dim
=
self
.
sd_module
.
out_features
elif
is_other_linear
:
self
.
out_dim
=
self
.
sd_module
.
embed_dim
elif
is_conv
:
self
.
out_dim
=
self
.
sd_module
.
out_channels
el
se
:
raise
ValueError
(
"sd_module must be Linear or Conv"
)
el
if
is_other_linear
:
self
.
out_dim
=
self
.
sd_module
.
embed_dim
if
self
.
is_kohya
:
self
.
constraint
=
self
.
alpha
*
self
.
out_dim
self
.
num_blocks
,
self
.
block_size
=
factorization
(
self
.
out_dim
,
self
.
dim
)
self
.
num_blocks
=
self
.
dim
self
.
block_size
=
self
.
out_dim
//
self
.
dim
else
:
self
.
constraint
=
None
self
.
block_size
,
self
.
num_blocks
=
factorization
(
self
.
out_dim
,
self
.
dim
)
def
merge_weight
(
self
,
R_weight
,
org_weight
):
R_weight
=
R_weight
.
to
(
org_weight
.
device
,
dtype
=
org_weight
.
dtype
)
if
org_weight
.
dim
()
==
4
:
weight
=
torch
.
einsum
(
"oihw, op -> pihw"
,
org_weight
,
R_weight
)
else
:
weight
=
torch
.
einsum
(
"oi, op -> pi"
,
org_weight
,
R_weight
)
return
weight
def
get_weight
(
self
,
oft_blocks
,
multiplier
=
None
):
if
self
.
constraint
is
not
None
:
constraint
=
self
.
constraint
.
to
(
oft_blocks
.
device
,
dtype
=
oft_blocks
.
dtype
)
block_Q
=
oft_blocks
-
oft_blocks
.
transpose
(
1
,
2
)
norm_Q
=
torch
.
norm
(
block_Q
.
flatten
())
if
self
.
constraint
is
not
None
:
new_norm_Q
=
torch
.
clamp
(
norm_Q
,
max
=
constraint
)
else
:
new_norm_Q
=
norm_Q
block_Q
=
block_Q
*
((
new_norm_Q
+
1e-8
)
/
(
norm_Q
+
1e-8
))
m_I
=
torch
.
eye
(
self
.
num_blocks
,
device
=
oft_blocks
.
device
)
.
unsqueeze
(
0
)
.
repeat
(
self
.
block_size
,
1
,
1
)
#m_I = torch.eye(self.block_size, device=oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
block_R
=
torch
.
matmul
(
m_I
+
block_Q
,
(
m_I
-
block_Q
)
.
inverse
())
def
calc_updown_kb
(
self
,
orig_weight
,
multiplier
):
oft_blocks
=
self
.
oft_blocks
.
to
(
orig_weight
.
device
,
dtype
=
orig_weight
.
dtype
)
oft_blocks
=
oft_blocks
-
oft_blocks
.
transpose
(
1
,
2
)
# ensure skew-symmetric orthogonal matrix
block_R_weighted
=
multiplier
*
block_R
+
(
1
-
multiplier
)
*
m_I
R
=
torch
.
block_diag
(
*
block_R_weighted
)
return
R
R
=
oft_blocks
.
to
(
orig_weight
.
device
,
dtype
=
orig_weight
.
dtype
)
R
=
R
*
multiplier
+
torch
.
eye
(
self
.
block_size
,
device
=
orig_weight
.
device
)
def
calc_updown_kohya
(
self
,
orig_weight
,
multiplier
):
R
=
self
.
get_weight
(
self
.
oft_blocks
,
multiplier
)
merged_weight
=
self
.
merge_weight
(
R
,
orig_weight
)
# This errors out for MultiheadAttention, might need to be handled up-stream
merged_weight
=
rearrange
(
orig_weight
,
'(k n) ... -> k n ...'
,
k
=
self
.
num_blocks
,
n
=
self
.
block_size
)
merged_weight
=
torch
.
einsum
(
'k n m, k n ... -> k m ...'
,
R
,
merged_weight
)
merged_weight
=
rearrange
(
merged_weight
,
'k m ... -> (k m) ...'
)
updown
=
merged_weight
.
to
(
orig_weight
.
device
,
dtype
=
orig_weight
.
dtype
)
-
orig_weight
output_shape
=
orig_weight
.
shape
orig_weight
=
orig_weight
return
self
.
finalize_updown
(
updown
,
orig_weight
,
output_shape
)
def
calc_updown_kb
(
self
,
orig_weight
,
multiplier
):
is_other_linear
=
type
(
self
.
sd_module
)
in
[
torch
.
nn
.
MultiheadAttention
]
if
not
is_other_linear
:
oft_blocks
=
self
.
oft_blocks
.
to
(
orig_weight
.
device
,
dtype
=
orig_weight
.
dtype
)
# ensure skew-symmetric matrix
oft_blocks
=
oft_blocks
-
oft_blocks
.
transpose
(
1
,
2
)
R
=
oft_blocks
.
to
(
orig_weight
.
device
,
dtype
=
orig_weight
.
dtype
)
R
=
R
*
multiplier
+
torch
.
eye
(
self
.
block_size
,
device
=
orig_weight
.
device
)
merged_weight
=
rearrange
(
orig_weight
,
'(k n) ... -> k n ...'
,
k
=
self
.
num_blocks
,
n
=
self
.
block_size
)
merged_weight
=
torch
.
einsum
(
'k n m, k n ... -> k m ...'
,
R
,
merged_weight
)
merged_weight
=
rearrange
(
merged_weight
,
'k m ... -> (k m) ...'
)
updown
=
merged_weight
.
to
(
orig_weight
.
device
,
dtype
=
orig_weight
.
dtype
)
-
orig_weight
output_shape
=
orig_weight
.
shape
else
:
# FIXME: skip MultiheadAttention for now
#up = self.lin_module.weight.to(orig_weight.device, dtype=orig_weight.dtype)
updown
=
torch
.
zeros
([
orig_weight
.
shape
[
1
],
orig_weight
.
shape
[
1
]],
device
=
orig_weight
.
device
,
dtype
=
orig_weight
.
dtype
)
output_shape
=
(
orig_weight
.
shape
[
1
],
orig_weight
.
shape
[
1
])
return
self
.
finalize_updown
(
updown
,
orig_weight
,
output_shape
)
def
calc_updown
(
self
,
orig_weight
):
# if alpha is a very small number as in coft, calc_scale will return a almost zero number so we ignore it
#multiplier = self.multiplier() * self.calc_scale()
# if alpha is a very small number as in coft, calc_scale() will return a almost zero number so we ignore it
multiplier
=
self
.
multiplier
()
return
self
.
calc_updown_kb
(
orig_weight
,
multiplier
)
# override to remove the multiplier/scale factor; it's already multiplied in get_weight
def
finalize_updown
(
self
,
updown
,
orig_weight
,
output_shape
,
ex_bias
=
None
):
#return super().finalize_updown(updown, orig_weight, output_shape, ex_bias)
if
self
.
bias
is
not
None
:
updown
=
updown
.
reshape
(
self
.
bias
.
shape
)
updown
+=
self
.
bias
.
to
(
orig_weight
.
device
,
dtype
=
orig_weight
.
dtype
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment