Commit a2b7dffb authored by novelailab's avatar novelailab

update

parent fd387a42
from .models import gptj
MODEL_MAP = {
"gptj": gptj.GPTJModel,
}
def get_model(model_name: str):
return MODEL_MAP[model_name]
from basedformer import gptj from basedformer.models import gptj
from basedformer.utils import * from basedformer.utils import *
from basedformer import lm_utils from basedformer import lm_utils
from transformers import AutoTokenizer from transformers import AutoTokenizer
......
...@@ -12,7 +12,7 @@ import wandb ...@@ -12,7 +12,7 @@ import wandb
import numpy as np import numpy as np
from torch.utils.checkpoint import checkpoint as ck from torch.utils.checkpoint import checkpoint as ck
from math import log2, ceil from math import log2, ceil
from basedformer import gptj, optimizer, lm_utils from basedformer import optimizer, lm_utils
from basedformer.utils import * from basedformer.utils import *
import glob import glob
from transformers import AutoTokenizer from transformers import AutoTokenizer
...@@ -194,9 +194,9 @@ train_config = { ...@@ -194,9 +194,9 @@ train_config = {
#"data_path": "/home/xuser/diffusionstorage/datasets/sigurd/map/sigurd_v7_infilling.map", #"data_path": "/home/xuser/diffusionstorage/datasets/sigurd/map/sigurd_v7_infilling.map",
##"data_path": "/home/xuser/diffusionstorage/datasets/OWT2-gpt2-full.map", ##"data_path": "/home/xuser/diffusionstorage/datasets/OWT2-gpt2-full.map",
#"data_path": "/home/xuser/diffusionstorage/datasets/sigurd/map/sigurd_v5_fs_2049.map", #"data_path": "/home/xuser/diffusionstorage/datasets/sigurd/map/sigurd_v5_fs_2049.map",
"save_path": "/home/xuser/diffusionstorage/workspace/kuru/basedformer/models/hypernetwork-gptj-2048-enwik9-again", "save_path": "/home/xuser/diffusionstorage/workspace/kuru/basedformer/models/hypernetwork-fairseq-6b-2048-enwik9-again",
"do_save": True, "do_save": True,
"run_name": "gpt-j-enwik9-6b-postln-bf16-2e-4-4bsz-every5layer", "run_name": "fairseq-6b-enwik9-6b-postln-bf16-2e-4-4bsz-every5layer",
"lr": 2e-4, "lr": 2e-4,
"end_lr": 2e-4, "end_lr": 2e-4,
"warmup_steps": 50, "warmup_steps": 50,
...@@ -215,7 +215,7 @@ gas = train_config["gas"] ...@@ -215,7 +215,7 @@ gas = train_config["gas"]
Path(train_config["save_path"]).mkdir(parents=True, exist_ok=True) Path(train_config["save_path"]).mkdir(parents=True, exist_ok=True)
#model = GPTModel.gpt2_init(model_config).cuda().float() #model = GPTModel.gpt2_init(model_config).cuda().float()
model = lm_utils.load_from_path("pretrained/gptj-6b").cuda().bfloat16() model = lm_utils.load_from_path("pretrained/fairseq_6_7b").cuda().bfloat16()
for param in model.parameters(): for param in model.parameters():
param.requires_grad = False param.requires_grad = False
......
...@@ -28,16 +28,20 @@ env1 = remote.env('noname', python_version=None) ...@@ -28,16 +28,20 @@ env1 = remote.env('noname', python_version=None)
path = env1.path('/home/xuser/diffusionstorage/workspace/kuru/basedformer') path = env1.path('/home/xuser/diffusionstorage/workspace/kuru/basedformer')
if True: if True:
env1.sh('pip install /home/xuser/hugessd/pytorch/torch-1.10.1+cu113-cp38-cp38-linux_x86_64.whl')
env1.sh('pip install einops numpy') env1.sh('pip install einops numpy')
env1.sh('pip install tqdm') env1.sh('pip install tqdm')
#env1.sh('pip install /home/xuser/diffusionstorage/workspace/finetune/pokepls/transformers-repo')
env1.sh('pip3 install einops==0.4.1 pyyaml wandb') env1.sh('pip3 install einops==0.4.1 pyyaml wandb')
env1.sh('wandb login 21a9442d42a35e15ce421f2b702ec58508b9adc4') env1.sh('wandb login 21a9442d42a35e15ce421f2b702ec58508b9adc4')
env1.sh('pip3 install dotmap icecream') env1.sh('pip3 install dotmap icecream')
path.sh("pip3 install --editable .") path.sh("pip3 install --editable .")
path.sh("pip3 install transformers") path.sh("pip3 install transformers")
path.sh("pip3 install termcolor") path.sh("pip3 install termcolor")
#path.sh("pip3 uninstall transformers")
#env1.sh('pip install /home/xuser/diffusionstorage/workspace/finetune/pokepls/transformers-repo')
#env1.sh('pip install git+https://github.com/facebookresearch/fairseq')
env1.sh('pip install /home/xuser/hugessd/pytorch/torch-1.10.1+cu113-cp38-cp38-linux_x86_64.whl')
with always_rerun(): with always_rerun():
if False: if False:
#env1.sh('pip3 install transformers') #env1.sh('pip3 install transformers')
......
...@@ -19,7 +19,7 @@ with torch.no_grad(): ...@@ -19,7 +19,7 @@ with torch.no_grad():
path = "/home/xuser/diffusionstorage/workspace/kuru/basedformer/pretrained/fairseq_125m" path = "/home/xuser/diffusionstorage/workspace/kuru/basedformer/pretrained/fairseq_125m"
based_model = lmu.load_from_path(path).cuda().half().eval() based_model = lmu.load_from_path(path).cuda().half().eval()
print("Loaded based model") print("Loaded based model")
x = torch.randint(0, 51200, (1, 300)).cuda().long() x = torch.randint(0, 50256, (1, 2048)).cuda().long()
assert torch.allclose(hf_model.transformer.wte(x), based_model.vocab_embed(x)) assert torch.allclose(hf_model.transformer.wte(x), based_model.vocab_embed(x))
hidden = hf_model.transformer.wte(x) hidden = hf_model.transformer.wte(x)
...@@ -33,7 +33,7 @@ with torch.no_grad(): ...@@ -33,7 +33,7 @@ with torch.no_grad():
ic(hf_model.transformer.h[layer].attn(hidden)[0].abs().mean()) ic(hf_model.transformer.h[layer].attn(hidden)[0].abs().mean())
ic(based_model.layers[layer].attn(hidden)[0].abs().mean()) ic(based_model.layers[layer].attn(hidden)[0].abs().mean())
ic((hf_model.transformer.h[layer].attn(hidden)[0] - based_model.layers[layer].attn(hidden)[0]).abs().mean()) ic((hf_model.transformer.h[layer].attn(hidden)[0] - based_model.layers[layer].attn(hidden)[0]).abs().mean())
assert torch.allclose(hf_model.transformer.h[layer].attn(hidden)[0], based_model.layers[layer].attn(hidden)[0], rtol=1e-6) assert torch.allclose(hf_model.transformer.h[layer].attn(hidden)[0], based_model.layers[layer].attn(hidden)[0])
attn_out = hf_model.transformer.h[layer].attn(hidden)[0] attn_out = hf_model.transformer.h[layer].attn(hidden)[0]
hidden = residual + attn_out hidden = residual + attn_out
residual = hidden residual = hidden
...@@ -50,4 +50,7 @@ with torch.no_grad(): ...@@ -50,4 +50,7 @@ with torch.no_grad():
assert torch.allclose(hf_model.transformer.ln_f(hidden), based_model.ln_final(hidden)) assert torch.allclose(hf_model.transformer.ln_f(hidden), based_model.ln_final(hidden))
hidden = hf_model.transformer.ln_f(hidden) hidden = hf_model.transformer.ln_f(hidden)
assert torch.allclose(hf_model.transformer(x)["last_hidden_state"], based_model.get_embeds(x)[0]) assert torch.allclose(hf_model.transformer(x)["last_hidden_state"], based_model.get_embeds(x)[0])
ic((hf_model(x)["logits"] - based_model(x)).mean())
print((hf_model(x)["logits"] - based_model(x)).abs().mean())
print((hf_model.transformer(x)["last_hidden_state"] - based_model.get_embeds(x)[0]).abs().mean())
assert torch.allclose(hf_model(x)["logits"], based_model(x)) assert torch.allclose(hf_model(x)["logits"], based_model(x))
\ No newline at end of file
...@@ -7,7 +7,7 @@ from pathlib import Path ...@@ -7,7 +7,7 @@ from pathlib import Path
copy_eot_to_newline = True copy_eot_to_newline = True
copy_newline_to_eot = True copy_newline_to_eot = True
model_dir = 'pretrained/en_dense_lm_125m' # path to smol model weights to fix tokenizer shuffle model_dir = 'pretrained/en_dense_lm_6_7b' # path to smol model weights to fix tokenizer shuffle
checkpoint = {} checkpoint = {}
ckmap = {} ckmap = {}
...@@ -40,7 +40,7 @@ def no_init(loading_code): ...@@ -40,7 +40,7 @@ def no_init(loading_code):
return result return result
lm = no_init(lambda: TransformerLanguageModel.from_pretrained(model_dir, bpe='gpt2').eval().cpu()) lm = no_init(lambda: TransformerLanguageModel.from_pretrained("pretrained/en_dense_lm_125m", bpe='gpt2').eval().cpu())
fairdict = torch.load(f"{sys.argv[1]}", map_location="cpu") fairdict = torch.load(f"{sys.argv[1]}", map_location="cpu")
try: try:
......
from main import * from basedformer import utils
from pathlib import Path
state_dict = SplitCheckpoint("j6b_vanilla", device="cpu") import torch
state_dict = utils.SplitCheckpoint("/home/xuser/diffusionstorage/models/prodbigmodels/sigurd-v4", device="cpu")
# ORIGINAL # ORIGINAL
''' '''
...@@ -54,12 +54,12 @@ for key in state_dict.keys(): ...@@ -54,12 +54,12 @@ for key in state_dict.keys():
#print(new_state_dict) #print(new_state_dict)
def save(state_dict, path): def save(state_dict, path):
try: os.mkdir(path) path = Path(path)
except: pass path.mkdir(parents=True, exist_ok=True)
checkpoint = {} checkpoint = {}
for i, x in enumerate(state_dict.items()): for i, x in enumerate(state_dict.items()):
checkpoint[x[0]] = f"{path}/b{i}.pt" checkpoint[x[0]] = f"{path}/b{i}.pt"
torch.save(x[1], f"{path}/b{i}.pt") torch.save(x[1], f"{path}/b{i}.pt")
torch.save(checkpoint, f"{path}/m.pt") torch.save(checkpoint, f"{path}/m.pt")
save(new_state_dict, "models/6b_vanilla") save(new_state_dict, "pretrained/sigurdv4/lm")
\ No newline at end of file \ No newline at end of file
...@@ -25,7 +25,7 @@ def main(): ...@@ -25,7 +25,7 @@ def main():
tokenizer = AutoTokenizer.from_pretrained('gpt2') tokenizer = AutoTokenizer.from_pretrained('gpt2')
mask = "████████" mask = "████████"
prompt = "You hated the elves enough that if you seen one of them in the forest you would just slice their throats." prompt = "You hated the elves enough that if you seen one of them in the forest you would just slice their throats."
prompt = """'''Kurumuz''' is the founder of tech company [[""" #prompt = """'''Kurumuz''' is the founder of tech company [["""
#promptnomask = f"""The room was lit now by a dozen candles. The door had been locked, and the windows barred; but there were still some faint glimmers of moonlight on the floor outside. For a moment the figure stood motionless in its doorway to look about it with an air of keen and nervous expectancy. Then he came forward into the chamber and moved, where he remained standing for an instant upon his toes like one listening intently before starting to rummage among the books and papers. He selected a large volume from among them and turned back to the window,{mask} holding it between himself and the rest of the room until he could feel the warm breath of the night creeping through the curtains.{mask}""" #promptnomask = f"""The room was lit now by a dozen candles. The door had been locked, and the windows barred; but there were still some faint glimmers of moonlight on the floor outside. For a moment the figure stood motionless in its doorway to look about it with an air of keen and nervous expectancy. Then he came forward into the chamber and moved, where he remained standing for an instant upon his toes like one listening intently before starting to rummage among the books and papers. He selected a large volume from among them and turned back to the window,{mask} holding it between himself and the rest of the room until he could feel the warm breath of the night creeping through the curtains.{mask}"""
#prompt = f"""The room was lit now by a dozen candles. The door had been locked, and the windows barred; but there were still some faint glimmers of moonlight on the floor outside. For a moment the figure stood motionless in its doorway to look about it with an air of keen and nervous expectancy. Then he came forward into the chamber and moved{mask}, where he remained standing for an instant upon his toes like one listening intently before starting to rummage among the books and papers. He selected a large volume from among them and turned back to the window, holding it between himself and the rest of the room until he could feel the warm breath of the night creeping through the curtains.{mask}""" #prompt = f"""The room was lit now by a dozen candles. The door had been locked, and the windows barred; but there were still some faint glimmers of moonlight on the floor outside. For a moment the figure stood motionless in its doorway to look about it with an air of keen and nervous expectancy. Then he came forward into the chamber and moved{mask}, where he remained standing for an instant upon his toes like one listening intently before starting to rummage among the books and papers. He selected a large volume from among them and turned back to the window, holding it between himself and the rest of the room until he could feel the warm breath of the night creeping through the curtains.{mask}"""
tokens = tokenizer.encode(prompt) tokens = tokenizer.encode(prompt)
...@@ -39,10 +39,10 @@ def main(): ...@@ -39,10 +39,10 @@ def main():
#tokens = torch.cat([tokens, tokens], dim=0) #tokens = torch.cat([tokens, tokens], dim=0)
tokens = torch.cat(tokens, dim=0) tokens = torch.cat(tokens, dim=0)
t = time.perf_counter() t = time.perf_counter()
model = lmu.load_from_path('pretrained/gptj-6b').cuda().bfloat16().eval() model = lmu.load_from_path('pretrained/fairseq_125m').cuda().bfloat16().eval()
hypernetwork = hypernet.HyperNetworkSingle(model.config).cuda().float() #hypernetwork = hypernet.HyperNetworkSingle(model.config).cuda().float()
#print("Loading from step {}".format(cp_list[-1].split("_")[-1])) #print("Loading from step {}".format(cp_list[-1].split("_")[-1]))
hypernetwork.load_state_dict(torch.load(last_cp / "hyper.pt")) #hypernetwork.load_state_dict(torch.load(last_cp / "hyper.pt"))
ic(time.perf_counter() - t) ic(time.perf_counter() - t)
...@@ -57,7 +57,7 @@ def main(): ...@@ -57,7 +57,7 @@ def main():
} }
ops_list = [ops] * bsz ops_list = [ops] * bsz
torch.manual_seed(69) torch.manual_seed(69)
tokens_generated = sampling.generate(model.forward, tokens, gen_len, ops_list=ops_list, hypernetwork=hypernetwork, non_deterministic=False) tokens_generated = sampling.generate(model.forward, tokens, gen_len, ops_list=ops_list, hypernetwork=None, non_deterministic=True)
#tokens_generated = sampling.generate_greedy(model.forward, tokens, gen_len, hypernetwork=hypernetwork) #tokens_generated = sampling.generate_greedy(model.forward, tokens, gen_len, hypernetwork=hypernetwork)
#tokens_generated_batched = generate_real_batched(model.forward, tokens, gen_len, ops=ops) #tokens_generated_batched = generate_real_batched(model.forward, tokens, gen_len, ops=ops)
#print(tokens_generated.shape) #print(tokens_generated.shape)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment