Commit 676d36ca authored by novelailab's avatar novelailab

add lm_head to test

parent 89ceb109
......@@ -83,3 +83,4 @@ with torch.no_grad():
assert torch.allclose(hf_model.transformer.ln_f(hidden), based_model.ln_final(hidden))
hidden = hf_model.transformer.ln_f(hidden)
assert torch.allclose(hf_model.lm_head(hidden), based_model.lm_head(hidden))
......@@ -66,14 +66,15 @@ def test_thing(graph, input):
with torch.no_grad():
model = init_1_3b().cuda().half()
shape = (1, 512)
shape = (1, 256)
x = torch.zeros(shape).cuda().long()
print(shape)
print("PyTorch Eager")
timeit(r=1, n=100, func=lambda: model(x), do_tqdm=False, first=False)
print("PyTorch CUDAGraph+JIT")
module = torch.jit.trace(model, torch.zeros(shape).long().cuda())
torch.jit.optimize_for_inference(module)
print("PyTorch CUDAGraph+JIT+NVFuser")
with torch.jit.fuser("fuser2"):
module = torch.jit.trace(model, torch.zeros(shape).long().cuda())
torch.jit.optimize_for_inference(module)
static_input = torch.randint(0, 50256, shape, device='cuda')
fake_inputs = [torch.randint(0, 50256, shape, device="cuda") for _ in range(100)]
real_inputs = [torch.randint(0, 50256, shape, device="cuda") for _ in range(100)]
......
......@@ -32,7 +32,6 @@ def timeit(func, r=1, n=5, quiet=False, function=None, do_tqdm=False, first=True
precision = 'ns'
elif best[0] >= 1e9:
print('b')
best[0] = best[0] * 1e-9
best[1] = best[1] * 1e-9
precision = 's'
......@@ -58,13 +57,18 @@ def timeit(func, r=1, n=5, quiet=False, function=None, do_tqdm=False, first=True
print(f"{func.__name__}: {best[0]:.4f}{precision} ± {best[1]:.4f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)")
def rndinput(shape):
return torch.randint(0, 50256, shape).long().cuda()
with torch.no_grad():
model = init_6b().cuda().half()
x = torch.zeros(50, 1).cuda().long()
shape = (1, 1)
x = torch.zeros(shape).cuda().long()
print(model(x).shape)
print("PyTorch Eager")
timeit(r=1, n=100, func=lambda: model(x), do_tqdm=False, first=False)
module = torch.jit.trace(model, torch.zeros((50, 1)).long().cuda())
torch.jit.optimize_for_inference(module)
timeit(r=1, n=1, func=lambda: model(x), do_tqdm=False, first=True)
with torch.jit.fuser("fuser2"):
module = torch.jit.trace(model, torch.zeros(shape).long().cuda())
torch.jit.optimize_for_inference(module)
print("PyTorch JIT")
timeit(r=1, n=100, func=lambda: module(x), do_tqdm=False, first=False)
\ No newline at end of file
timeit(r=1, n=1, func=lambda: module(rndinput((1, 1))), do_tqdm=False, first=True)
......@@ -7,7 +7,7 @@ dry = False
config_obj = KubeConfig()
config_obj.set_name(name)
config_obj.set_gpu(gpu_name=GPU.RTX_A6000, amount=1)
config_obj.set_gpu(gpu_name=GPU.RTX_A5000, amount=1)
config_obj.set_ram(16)
config_obj.set_cpu(4)
config_obj.dry_run(dry)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment