Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
B
Basedformer
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
novelai-storage
Basedformer
Commits
a28f0299
Commit
a28f0299
authored
Jun 13, 2022
by
FIRST_NAME LAST_NAME
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
push
parent
a2b7dffb
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
190 additions
and
138 deletions
+190
-138
basedformer/models/base_lm.py
basedformer/models/base_lm.py
+1
-0
basedformer/optimizer.py
basedformer/optimizer.py
+12
-0
basedformer/utils.py
basedformer/utils.py
+18
-0
finetune.py
finetune.py
+159
-138
No files found.
basedformer/models/base_lm.py
View file @
a28f0299
...
...
@@ -17,6 +17,7 @@ class BaseModel(nn.Module):
self
.
ln_final
=
nn
.
LayerNorm
(
self
.
hidden_dim
,
eps
=
config
.
eps
,
device
=
config
.
device
,
dtype
=
config
.
dtype
)
self
.
layers
=
nn
.
ModuleList
([])
self
.
lm_head
=
nn
.
Linear
(
config
.
hidden_dim
,
config
.
vocab_dim
,
bias
=
True
)
self
.
total_params
=
sum
(
p
.
numel
()
for
p
in
self
.
parameters
())
for
i
in
range
(
config
.
n_layer
):
config
.
layer_idx
=
i
self
.
layers
.
append
(
...
...
basedformer/optimizer.py
View file @
a28f0299
...
...
@@ -6,6 +6,7 @@ from dotmap import DotMap
import
pickle
import
os
from
pathlib
import
Path
from
torch.distributed.optim
import
ZeroRedundancyOptimizer
#Based Optimizer
def
lr_schedule
(
step
,
warmup_steps
,
anneal_steps
,
lr
,
end_lr
,
cosine_warmup
=
False
):
...
...
@@ -61,6 +62,17 @@ class BasedOptimizer:
import
bitsandbytes
as
bnb
self
.
optimizer
=
bnb
.
optim
.
Adam8bit
(
self
.
parameters
,
lr
=
0
,
weight_decay
=
self
.
weight_decay
,
betas
=
(
self
.
beta1
,
self
.
beta2
),
eps
=
self
.
eps
)
elif
self
.
optimizer_name
==
"zero1"
:
import
bitsandbytes
as
bnb
self
.
optimizer
=
ZeroRedundancyOptimizer
(
self
.
parameters
,
optimizer_class
=
bnb
.
optim
.
Adam8bit
,
lr
=
0
,
weight_decay
=
self
.
weight_decay
,
betas
=
(
self
.
beta1
,
self
.
beta2
),
eps
=
self
.
eps
,
)
elif
self
.
optimizer_name
==
"adafactor"
:
try
:
from
transformers.optimization
import
Adafactor
...
...
basedformer/utils.py
View file @
a28f0299
...
...
@@ -30,6 +30,24 @@ class FbDataset(data.Dataset):
data
=
torch
.
tensor
(
self
.
npz
[
nth
]
.
astype
(
np
.
int64
))
return
(
data
[:
-
1
],
data
[
1
:])
class
ShardedDataset
(
data
.
Dataset
):
def
__init__
(
self
,
block_size
,
map_file
,
world_size
=
1
,
rank
=
0
,
skip
=
0
):
self
.
npz
=
np
.
memmap
(
map_file
,
mode
=
"r"
,
dtype
=
"uint16"
)
.
reshape
((
-
1
,
block_size
))
#might want to pad later
self
.
npz
=
self
.
npz
[:
self
.
npz
.
shape
[
0
]
-
(
self
.
npz
.
shape
[
0
]
%
world_size
)]
#shard
self
.
npz
=
self
.
npz
[
rank
::
world_size
]
self
.
samples
=
self
.
npz
.
shape
[
0
]
self
.
skip
=
skip
def
__len__
(
self
):
return
self
.
samples
def
__getitem__
(
self
,
_id
):
nth
=
_id
+
self
.
skip
data
=
torch
.
tensor
(
self
.
npz
[
nth
]
.
astype
(
np
.
int64
))
return
(
data
[:
-
1
],
data
[
1
:])
# Make loading models faster by not letting pytorch initialize the weights.
# Usage: no_init(lambda: load_model(...))
...
...
finetune.py
View file @
a28f0299
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment