Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
B
Basedformer
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
novelai-storage
Basedformer
Commits
7eebf8ad
Commit
7eebf8ad
authored
Feb 20, 2022
by
novelailab
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add more stuff
parent
4a13de3a
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
278 additions
and
48 deletions
+278
-48
hfport.py
hfport.py
+65
-0
main.py
main.py
+107
-42
test.py
test.py
+102
-3
test_pyfra.py
test_pyfra.py
+4
-3
No files found.
hfport.py
0 → 100644
View file @
7eebf8ad
from
main
import
*
state_dict
=
SplitCheckpoint
(
"/home/xuser/models/j6b_ckpt_14001"
,
device
=
"cpu"
)
# ORIGINAL
'''
transformer.ln_f.weight
transformer.ln_f.bias
lm_head.weight
lm_head.bias
transformer.h.9.ln_1.weight
transformer.h.9.ln_1.bias
transformer.h.9.mlp.c_proj.weight
transformer.h.9.mlp.c_proj.bias
transformer.h.9.mlp.c_fc.weight
transformer.h.9.mlp.c_fc.bias
transformer.h.9.attn.attention.out_proj.weight
transformer.h.9.attn.attention.k_proj.weight
transformer.h.9.attn.attention.v_proj.weight
transformer.h.9.attn.attention.q_proj.weight
transformer.wte.weight
'''
new_state_dict
=
{}
module_map
=
{
"ln_1"
:
"ln_preattn"
,
"mlp.c_proj"
:
"ff.ff2"
,
"mlp.c_fc"
:
"ff.ff1"
,
"attn.attention.out_proj"
:
"attn.out_proj"
,
"attn.attention.k_proj"
:
"attn.k_proj"
,
"attn.attention.v_proj"
:
"attn.v_proj"
,
"attn.attention.q_proj"
:
"attn.q_proj"
,
"wte"
:
"vocab_embed"
,
'ln_f'
:
'ln_final'
,
'lm_head'
:
'lm_head'
,
}
print
(
type
(
state_dict
))
for
key
in
state_dict
.
keys
():
dotlist
=
key
.
split
(
'.'
)
if
len
(
dotlist
)
>
3
:
layer
=
dotlist
[
2
]
for
x
in
module_map
:
if
x
in
key
:
new_state_dict
[
f
"layers.{layer}.{module_map[x]}.{dotlist[-1]}"
]
=
state_dict
[
key
]
print
(
f
"{key} -> layers.{layer}.{module_map[x]}.{dotlist[-1]}"
)
else
:
for
x
in
module_map
:
if
x
in
key
:
new_state_dict
[
f
"{module_map[x]}.{dotlist[-1]}"
]
=
state_dict
[
key
]
print
(
f
"{key} -> {module_map[x]}.{dotlist[-1]}"
)
#print(new_state_dict)
def
save
(
state_dict
,
path
):
try
:
os
.
mkdir
(
path
)
except
:
pass
checkpoint
=
{}
for
i
,
x
in
enumerate
(
state_dict
.
items
()):
checkpoint
[
x
[
0
]]
=
f
"{path}/b{i}.pt"
torch
.
save
(
x
[
1
],
f
"{path}/b{i}.pt"
)
torch
.
save
(
checkpoint
,
f
"{path}/m.pt"
)
save
(
new_state_dict
,
"models/6b"
)
\ No newline at end of file
main.py
View file @
7eebf8ad
...
...
@@ -8,6 +8,14 @@ except ImportError:
from
collections
import
MutableMapping
import
os
from
pathlib
import
Path
import
math
def
defaults
():
# Easily accessible defaults
D_LAYER
=
GPTLayer
D_ATTN
=
SelfAttention
D_FF
=
FeedForward
D_ACT
=
gelu_new
def
no_init
(
loading_code
):
def
dummy
(
self
):
...
...
@@ -67,6 +75,9 @@ class SplitCheckpoint(MutableMapping):
#TODO: Might change with non einsum functions?
def
gelu_new
(
x
):
return
0.5
*
x
*
(
1.0
+
torch
.
tanh
(
math
.
sqrt
(
2.0
/
math
.
pi
)
*
(
x
+
0.044715
*
torch
.
pow
(
x
,
3.0
))))
def
fixed_pos_embedding
(
dim
=
None
,
seq_len
=
None
,
x
=
None
):
if
x
is
None
:
x
=
torch
.
empty
(
0
)
...
...
@@ -84,19 +95,7 @@ def apply_rotary_pos_emb(x, sincos, offset=0):
sin
,
cos
=
map
(
lambda
t
:
repeat
(
t
[
offset
:
x
.
shape
[
1
]
+
offset
,:],
"n d -> () n () (d j)"
,
j
=
2
),
sincos
)
return
(
x
*
cos
)
+
(
rotate_every_two
(
x
)
*
sin
)
class
FeedForward
(
nn
.
Module
):
def
__init__
(
self
,
dim
=
768
,
hidden_dim
=
768
*
4
,
activation
=
nn
.
GELU
):
self
.
ff1
=
nn
.
Linear
(
dim
,
hidden_dim
)
self
.
ff2
=
nn
.
Linear
(
hidden_dim
,
dim
)
self
.
activation
=
activation
()
def
forward
(
self
,
x
):
x
=
self
.
ff1
(
x
)
x
=
self
.
activation
(
x
)
x
=
self
.
ff2
(
x
)
return
x
def
_split_heads
(
self
,
tensor
,
num_heads
,
attn_head_size
,
rotary
):
def
_split_heads
(
tensor
,
num_heads
,
attn_head_size
,
rotary
):
"""
Splits hidden_size dim into attn_head_size and num_heads
"""
...
...
@@ -111,7 +110,7 @@ def _split_heads(self, tensor, num_heads, attn_head_size, rotary):
else
:
raise
ValueError
(
f
"Input tensor rank should be one of [4, 5], but is: {len(tensor.shape)}"
)
def
_merge_heads
(
self
,
tensor
,
num_heads
,
attn_head_size
):
def
_merge_heads
(
tensor
,
num_heads
,
attn_head_size
):
"""
Merges attn_head_size dim and num_attn_heads dim into hidden_size
"""
...
...
@@ -143,7 +142,8 @@ def _attn(query, key, value, causal_mask, masked_bias,
class
SelfAttention
(
nn
.
Module
):
# Code copied from HF, might want to sanity check later.
def
__init__
(
self
,
hidden_dim
,
n_head
):
def
__init__
(
self
,
hidden_dim
,
n_head
,
device
=
"cuda"
,
dtype
=
torch
.
float16
):
super
(
SelfAttention
,
self
)
.
__init__
()
max_positions
=
2049
bias
=
torch
.
tril
(
torch
.
ones
((
max_positions
,
max_positions
),
dtype
=
torch
.
uint8
,
requires_grad
=
False
))
.
view
(
1
,
1
,
max_positions
,
max_positions
)
.
bool
()
...
...
@@ -154,10 +154,10 @@ class SelfAttention(nn.Module):
self
.
register_buffer
(
"bias"
,
bias
)
self
.
register_buffer
(
"masked_bias"
,
torch
.
tensor
(
-
1e9
,
requires_grad
=
False
))
attn_bias
=
False
self
.
k_proj
=
nn
.
Linear
(
self
.
hidden_dim
,
self
.
hidden_dim
,
bias
=
attn_bias
)
self
.
v_proj
=
nn
.
Linear
(
self
.
hidden_dim
,
self
.
hidden_dim
,
bias
=
attn_bias
)
self
.
q_proj
=
nn
.
Linear
(
self
.
hidden_dim
,
self
.
hidden_dim
,
bias
=
attn_bias
)
self
.
out_proj
=
nn
.
Linear
(
self
.
hidden_dim
,
self
.
hidden_dim
,
bias
=
attn_bias
)
self
.
k_proj
=
nn
.
Linear
(
self
.
hidden_dim
,
self
.
hidden_dim
,
bias
=
attn_bias
,
device
=
device
,
dtype
=
dtype
)
self
.
v_proj
=
nn
.
Linear
(
self
.
hidden_dim
,
self
.
hidden_dim
,
bias
=
attn_bias
,
device
=
device
,
dtype
=
dtype
)
self
.
q_proj
=
nn
.
Linear
(
self
.
hidden_dim
,
self
.
hidden_dim
,
bias
=
attn_bias
,
device
=
device
,
dtype
=
dtype
)
self
.
out_proj
=
nn
.
Linear
(
self
.
hidden_dim
,
self
.
hidden_dim
,
bias
=
attn_bias
,
device
=
device
,
dtype
=
dtype
)
self
.
rotary_dim
=
self
.
head_dim
# TODO: handle rotary
sin
,
cos
=
fixed_pos_embedding
(
dim
=
self
.
rotary_dim
,
seq_len
=
max_positions
)
...
...
@@ -175,8 +175,8 @@ class SelfAttention(nn.Module):
offset
=
0
key
=
self
.
apply_rotary_pos_emb
(
key
,
(
self
.
sin
,
self
.
cos
),
offset
=
offset
)
.
to
(
key
.
dtype
)
query
=
self
.
apply_rotary_pos_emb
(
query
,
(
self
.
sin
,
self
.
cos
),
offset
=
offset
)
.
to
(
query
.
dtype
)
key
=
apply_rotary_pos_emb
(
key
,
(
self
.
sin
,
self
.
cos
),
offset
=
offset
)
.
to
(
key
.
dtype
)
query
=
apply_rotary_pos_emb
(
query
,
(
self
.
sin
,
self
.
cos
),
offset
=
offset
)
.
to
(
query
.
dtype
)
key
=
key
.
permute
(
0
,
2
,
1
,
3
)
query
=
query
.
permute
(
0
,
2
,
1
,
3
)
...
...
@@ -185,21 +185,34 @@ class SelfAttention(nn.Module):
causal_mask
=
self
.
bias
[:,
:,
key_length
-
query_length
:
key_length
,
:
key_length
]
x
=
_attn
(
query
,
key
,
value
,
causal_mask
,
self
.
masked_bias
,
self
.
scale_attn
query
,
key
,
value
,
causal_mask
,
self
.
masked_bias
,
None
,
self
.
scale_attn
)
x
=
_merge_heads
(
x
,
self
.
n
um_heads
,
self
.
head_dim
)
x
=
_merge_heads
(
x
,
self
.
n
_head
,
self
.
head_dim
)
x
=
self
.
out_proj
(
x
)
return
x
# a, present, (attentions)
class
FeedForward
(
nn
.
Module
):
def
__init__
(
self
,
dim
=
768
,
hidden_dim
=
768
*
4
,
activation
=
nn
.
GELU
,
device
=
"cuda"
,
dtype
=
torch
.
float16
):
super
(
FeedForward
,
self
)
.
__init__
()
self
.
ff1
=
nn
.
Linear
(
dim
,
hidden_dim
,
device
=
device
,
dtype
=
dtype
)
self
.
ff2
=
nn
.
Linear
(
hidden_dim
,
dim
,
device
=
device
,
dtype
=
dtype
)
self
.
activation
=
activation
def
forward
(
self
,
x
):
x
=
self
.
ff1
(
x
)
x
=
self
.
activation
(
x
)
x
=
self
.
ff2
(
x
)
return
x
class
GPTLayer
(
nn
.
Module
):
def
__init__
(
self
,
attn
=
SelfAttention
,
ff
=
FeedForward
,
hidden_dim
=
768
,
n_head
=
4
,
eps
=
1e-6
,
activation
=
nn
.
GELU
):
def
__init__
(
self
,
attn
=
SelfAttention
,
ff
=
FeedForward
,
hidden_dim
=
768
,
n_head
=
4
,
eps
=
1e-6
,
activation
=
nn
.
GELU
,
device
=
"cuda"
,
dtype
=
torch
.
float16
):
super
(
GPTLayer
,
self
)
.
__init__
()
self
.
hidden_dim
=
hidden_dim
self
.
ln_preattn
=
nn
.
LayerNorm
(
hidden_dim
,
eps
=
eps
)
#self.ln_postattn = nn.LayerNorm(hidden_dim, eps=eps)
self
.
ff
=
ff
(
dim
=
hidden_dim
,
hidden_dim
=
hidden_dim
*
4
,
activation
=
activation
)
self
.
attn
=
attn
(
hidden_dim
=
hidden_dim
,
n_head
=
n_head
)
self
.
ln_preattn
=
nn
.
LayerNorm
(
hidden_dim
,
eps
=
eps
,
device
=
device
,
dtype
=
dtype
)
self
.
ff
=
ff
(
dim
=
hidden_dim
,
hidden_dim
=
hidden_dim
*
4
,
activation
=
activation
,
device
=
device
,
dtype
=
dtype
)
self
.
attn
=
attn
(
hidden_dim
=
hidden_dim
,
n_head
=
n_head
,
device
=
device
,
dtype
=
dtype
)
def
forward
(
self
,
x
,
hypernetwork
):
residual
=
x
...
...
@@ -209,20 +222,21 @@ class GPTLayer(nn.Module):
attn_out
=
self
.
attn
(
x
)
ff_out
=
self
.
ff
(
x
)
x
=
residual
+
attn_out
+
ff_out
+
(
hyper_out
if
hyper_out
is
not
None
else
0
)
x
=
residual
+
ff_out
+
attn_out
+
(
hyper_out
if
hypernetwork
is
not
None
else
0
)
return
x
# Can access and change every module from here, as both Layer class and ff and attn classes are passed from GPTModel.
class
GPTModel
(
nn
.
Module
):
def
__init__
(
self
,
hidden_dim
=
512
,
n_layer
=
12
,
n_head
=
4
,
vocab_dim
=
50400
,
eps
=
1e-4
,
activation
=
nn
.
GELU
(),
Layer
=
GPTLayer
):
def
__init__
(
self
,
hidden_dim
=
512
,
n_layer
=
12
,
n_head
=
4
,
vocab_dim
=
50400
,
eps
=
1e-4
,
activation
=
nn
.
GELU
(),
Layer
=
GPTLayer
,
device
=
"cuda"
,
dtype
=
torch
.
float16
):
super
(
GPTModel
,
self
)
.
__init__
()
self
.
hidden_dim
=
hidden_dim
self
.
vocab_embed
=
nn
.
Embedding
(
vocab_dim
,
self
.
hidden_dim
)
self
.
ln_final
=
nn
.
LayerNorm
(
self
.
hidden_dim
,
eps
=
eps
)
self
.
vocab_embed
=
nn
.
Embedding
(
vocab_dim
,
self
.
hidden_dim
,
device
=
device
,
dtype
=
dtype
)
self
.
ln_final
=
nn
.
LayerNorm
(
self
.
hidden_dim
,
eps
=
eps
,
device
=
device
,
dtype
=
dtype
)
self
.
layers
=
nn
.
ModuleList
([])
for
_
in
range
(
n_layer
):
self
.
layers
.
append
(
Layer
(
attn
=
SelfAttention
,
ff
=
FeedForward
,
hidden_dim
=
hidden_dim
,
n_head
=
n_head
,
eps
=
eps
,
activation
=
activation
))
self
.
layers
.
append
(
Layer
(
attn
=
SelfAttention
,
ff
=
FeedForward
,
hidden_dim
=
hidden_dim
,
n_head
=
n_head
,
eps
=
eps
,
activation
=
activation
,
device
=
device
,
dtype
=
dtype
))
#TODO: Decouple more, maybe even init everything here, not sure. Not modular enough yet.
#TODO: Do we want to pass a config object everywhere?
#TODO: Do we want to pass a config object everywhere?
I don't exactly like that but passing a lot of variables is a bit ugly too.
def
forward
(
self
,
x
,
hypernetwork
=
None
):
x
=
self
.
vocab_embed
(
x
)
...
...
@@ -232,9 +246,19 @@ class GPTModel(nn.Module):
x
=
self
.
ln_final
(
x
)
return
x
def
load
(
self
,
path
):
state_dict
=
SplitCheckpoint
(
path
,
device
=
"cuda"
)
self
.
load_state_dict
(
state_dict
)
@
classmethod
def
load
(
cls
,
config
,
path
=
None
,
state_dict
=
None
):
if
path
:
state_dict
=
SplitCheckpoint
(
path
,
device
=
"cuda"
)
model
=
no_init
(
lambda
:
cls
(
**
config
))
model
.
load_state_dict
(
state_dict
,
strict
=
False
)
return
model
@
classmethod
def
init
(
cls
,
config
):
model
=
no_init
(
lambda
:
cls
(
**
config
))
return
model
def
save
(
self
,
path
):
try
:
os
.
mkdir
(
path
)
...
...
@@ -243,6 +267,7 @@ class GPTModel(nn.Module):
for
i
,
x
in
enumerate
(
self
.
state_dict
()
.
items
()):
checkpoint
[
x
[
0
]]
=
f
"{path}/b{i}.pt"
torch
.
save
(
x
[
1
],
f
"{path}/b{i}.pt"
)
torch
.
save
(
checkpoint
,
f
"{path}/m.pt"
)
# TODO: Do we want to have the LM head as a seperate Class? Or just a function? I think we might be better off with a function here and maybe
# also for the self attention, we can just write a function that gets fed in the q, k, v.
...
...
@@ -253,17 +278,57 @@ class GPTLM(nn.Module):
def
forward
(
self
,
x
):
return
def
load_gpt_j
(
path
):
def
load_gpt_j
(
state_dict
=
None
,
path
=
None
):
config
=
{
"n_layer"
:
28
,
"n_head"
:
16
,
"hidden_dim"
:
4096
,
"vocab_dim"
:
50400
,
"eps"
:
1e-5
,
"activation"
:
gelu_new
,
"Layer"
:
GPTLayer
}
model
=
GPTModel
.
load
(
config
,
path
=
path
)
return
model
def
init_6b
():
config
=
{
"n_layer"
:
28
,
"n_head"
:
16
,
"hidden_dim"
:
4096
,
"vocab_dim"
:
50400
,
"eps"
:
1e-5
,
"activation"
:
nn
.
GELU
,
"activation"
:
gelu_new
,
"Layer"
:
GPTLayer
}
model
=
GPTModel
(
**
config
)
return
model
def
init_125m
():
config
=
{
"n_layer"
:
12
,
"n_head"
:
12
,
"hidden_dim"
:
768
,
"vocab_dim"
:
50400
,
"eps"
:
1e-5
,
"activation"
:
gelu_new
,
"Layer"
:
GPTLayer
}
model
=
GPTModel
.
init
(
config
)
return
model
def
init_1_3b
():
config
=
{
"n_layer"
:
24
,
"n_head"
:
16
,
"hidden_dim"
:
2048
,
"vocab_dim"
:
50400
,
"eps"
:
1e-5
,
"activation"
:
gelu_new
,
"Layer"
:
GPTLayer
}
model
=
no_init
(
lambda
:
GPTModel
(
**
config
))
model
.
load
(
path
)
return
model
\ No newline at end of file
model
=
GPTModel
(
**
config
)
return
model
test.py
View file @
7eebf8ad
from
main
import
*
import
time
state_dict
=
SplitCheckpoint
(
"'/home/xuser/models/j6b_ckpt_14001"
,
device
=
"cuda"
)
for
x
in
state_dict
:
print
(
x
)
from
time
import
perf_counter
,
perf_counter_ns
import
numpy
as
np
from
tqdm
import
tqdm
from
contextlib
import
contextmanager
#replicating timeit magic function of ipython
def
timeit
(
func
,
r
=
1
,
n
=
5
,
quiet
=
False
,
function
=
None
,
do_tqdm
=
False
,
first
=
True
):
precision
=
'ns'
r_arr
=
np
.
empty
([
2
,
r
])
# [0] = mean, [1] = std
if
function
:
func
.
__name__
=
function
.
__name__
for
i
in
tqdm
(
range
(
r
))
if
do_tqdm
else
range
(
r
):
n_arr
=
np
.
empty
(
n
)
for
k
in
range
(
n
):
start
=
perf_counter_ns
()
func
()
n_arr
[
k
]
=
perf_counter_ns
()
-
start
if
not
first
:
# delete the first element from n_arr numpy array
n_arr
=
np
.
delete
(
n_arr
,
0
)
r_arr
[
0
,
i
]
=
np
.
mean
(
n_arr
)
r_arr
[
1
,
i
]
=
np
.
std
(
n_arr
)
best
=
r_arr
[:,
np
.
argmin
(
r_arr
[
0
])]
# [0] = mean, [1] = std
#check if best[0] bigger than 1ms in numpy
if
best
[
0
]
<
1e3
:
precision
=
'ns'
elif
best
[
0
]
>=
1e9
:
print
(
'b'
)
best
[
0
]
=
best
[
0
]
*
1e-9
best
[
1
]
=
best
[
1
]
*
1e-9
precision
=
's'
elif
best
[
0
]
>=
1e6
:
best
[
0
]
=
best
[
0
]
*
1e-6
best
[
1
]
=
best
[
1
]
*
1e-6
precision
=
'ms'
elif
best
[
0
]
>=
1e3
:
precision
=
'μs'
best
[
0
]
=
best
[
0
]
*
1e-3
best
[
1
]
=
best
[
1
]
*
1e-3
if
not
quiet
:
if
precision
==
'ns'
:
print
(
f
"{func.__name__}: {best[0]:.0f}{precision} ± {best[1]:.0f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)"
)
if
precision
==
'μs'
:
print
(
f
"{func.__name__}: {best[0]:.2f}{precision} ± {best[1]:.2f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)"
)
elif
precision
==
'ms'
:
print
(
f
"{func.__name__}: {best[0]:.2f}{precision} ± {best[1]:.2f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)"
)
elif
precision
==
's'
:
print
(
f
"{func.__name__}: {best[0]:.4f}{precision} ± {best[1]:.4f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)"
)
with
torch
.
no_grad
():
model
=
init_125m
()
.
cuda
()
.
half
()
'''
timeit(lambda: model(torch.zeros((1, 2048)).long().cuda()), n=20, first=False)
module = torch.jit.trace(model, torch.zeros((1, 2048)).long().cuda())
torch.jit.optimize_for_inference(module)
timeit(lambda: module(torch.zeros((1, 2048)).long().cuda()), n=20, first=False)
timeit(lambda: model(torch.zeros((1, 1000)).long().cuda()), n=20, first=False)
module = torch.jit.trace(model, torch.zeros((1, 1000)).long().cuda())
torch.jit.optimize_for_inference(module)
timeit(lambda: module(torch.zeros((1, 1000)).long().cuda()), n=20, first=False)
'''
module
=
torch
.
jit
.
trace
(
model
,
torch
.
zeros
((
1
,
2048
))
.
long
()
.
cuda
())
torch
.
jit
.
optimize_for_inference
(
module
)
static_input
=
torch
.
zeros
((
1
,
2048
),
device
=
'cuda'
)
.
long
()
static_out
=
torch
.
randn
((
1
,
2048
,
2048
),
device
=
'cuda'
)
.
half
()
timeit
(
lambda
:
module
(
static_input
),
n
=
20
,
first
=
False
)
s
=
torch
.
cuda
.
Stream
()
s
.
wait_stream
(
torch
.
cuda
.
current_stream
())
with
torch
.
cuda
.
stream
(
s
):
for
i
in
range
(
3
):
output
=
module
(
torch
.
randint
(
0
,
50000
,
(
1
,
2048
),
device
=
'cuda'
)
.
long
())
torch
.
cuda
.
current_stream
()
.
wait_stream
(
s
)
g
=
torch
.
cuda
.
CUDAGraph
()
with
torch
.
cuda
.
graph
(
g
,
stream
=
s
):
static_out
=
module
(
static_input
)
real_inputs
=
[
torch
.
randint
(
0
,
50000
,
(
1
,
2048
),
device
=
'cuda'
)
.
long
()
for
_
in
range
(
100
)]
t
=
time
.
perf_counter
()
for
data
in
real_inputs
:
#print(data[0, :20])
static_input
.
copy_
(
data
)
#timeit(lambda: g.replay(), n=100, first=True)
g
.
replay
()
#print(static_out[0, 0, :20])
torch
.
cuda
.
synchronize
()
print
(
f
"{perf_counter() - t}s"
)
#for data in real_inputs:
# print(model(data)[0, 0, :20])
test_pyfra.py
View file @
7eebf8ad
...
...
@@ -7,14 +7,14 @@ dry = False
config_obj
=
KubeConfig
()
config_obj
.
set_name
(
name
)
config_obj
.
set_gpu
(
gpu_name
=
GPU
.
A4
0
,
amount
=
1
)
config_obj
.
set_gpu
(
gpu_name
=
GPU
.
RTX_A500
0
,
amount
=
1
)
config_obj
.
set_ram
(
16
)
config_obj
.
set_cpu
(
4
)
#config_obj.set_cpu_only()
config_obj
.
dry_run
(
dry
)
config_obj
.
print_information
()
config_obj
.
create_deployment
(
overwrite
=
True
)
config_obj
.
create_service
(
overwrite
=
True
)
#
config_obj.create_deployment(overwrite=True)
#
config_obj.create_service(overwrite=True)
remote
=
config_obj
.
get_pyfra_remote
()
env1
=
remote
.
env
(
'noname'
,
python_version
=
None
)
...
...
@@ -25,5 +25,6 @@ models = {'6b': '/home/xuser/models/j6b_ckpt_14001', '20b': '/home/xuser/diffusi
path
=
env1
.
path
(
'/home/xuser/diffusionstorage/workspace/kuru/basedformer'
)
env1
.
sh
(
'pip install /home/xuser/hugessd/pytorch/torch-1.10.1+cu113-cp38-cp38-linux_x86_64.whl'
)
env1
.
sh
(
'pip install einops numpy'
)
env1
.
sh
(
'pip install tqdm'
)
with
always_rerun
():
path
.
sh
(
f
'python3 test.py'
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment