add lm_head to test

676d36ca · novelailab · 89ceb109 · 676d36ca · 676d36ca · 676d36ca
Commit 676d36ca authored Feb 27, 2022 by novelailab
Hide whitespace changes
Inline Side-by-side

Showing with 17 additions and 11 deletions

comparehf.py comparehf.py +1 -0

cudagraph.py cudagraph.py +5 -4

gptj.py gptj.py +10 -6

test_pyfra.py test_pyfra.py +1 -1

No files found.
--- a/comparehf.py
+++ b/comparehf.py
@@ -83,3 +83,4 @@ with torch.no_grad():

    assert torch.allclose(hf_model.transformer.ln_f(hidden), based_model.ln_final(hidden))
    hidden = hf_model.transformer.ln_f(hidden)
+    assert torch.allclose(hf_model.lm_head(hidden), based_model.lm_head(hidden))
--- a/cudagraph.py
+++ b/cudagraph.py
@@ -66,14 +66,15 @@ def test_thing(graph, input):

 with torch.no_grad():
    model = init_1_3b().cuda().half()
-    shape = (1, 512)
+    shape = (1, 256)
    x = torch.zeros(shape).cuda().long()
    print(shape)
    print("PyTorch Eager")
    timeit(r=1, n=100, func=lambda: model(x), do_tqdm=False, first=False)
-    print("PyTorch CUDAGraph+JIT")
-    module = torch.jit.trace(model, torch.zeros(shape).long().cuda())
-    torch.jit.optimize_for_inference(module)
+    print("PyTorch CUDAGraph+JIT+NVFuser")
+    with torch.jit.fuser("fuser2"):
+        module = torch.jit.trace(model, torch.zeros(shape).long().cuda())
+        torch.jit.optimize_for_inference(module)
    static_input = torch.randint(0, 50256, shape, device='cuda')
    fake_inputs = [torch.randint(0, 50256, shape, device="cuda") for _ in range(100)]
    real_inputs = [torch.randint(0, 50256, shape, device="cuda") for _ in range(100)]

--- a/gptj.py
+++ b/gptj.py
@@ -32,7 +32,6 @@ def timeit(func, r=1, n=5, quiet=False, function=None, do_tqdm=False, first=True
        precision = 'ns'

    elif best[0] >= 1e9:
-        print('b')
        best[0] = best[0] * 1e-9
        best[1] = best[1] * 1e-9
        precision = 's'
@@ -58,13 +57,18 @@ def timeit(func, r=1, n=5, quiet=False, function=None, do_tqdm=False, first=True
            print(f"{func.__name__}: {best[0]:.4f}{precision} ± {best[1]:.4f}{precision} per loop (mean ± std. dev. of {str(r)} runs, {str(n)} loops each)")


+def rndinput(shape):
+    return torch.randint(0, 50256, shape).long().cuda()
+
 with torch.no_grad():
    model = init_6b().cuda().half()
-    x = torch.zeros(50, 1).cuda().long()
+    shape = (1, 1)
+    x = torch.zeros(shape).cuda().long()
    print(model(x).shape)
    print("PyTorch Eager")
-    timeit(r=1, n=100, func=lambda: model(x), do_tqdm=False, first=False)
-    module = torch.jit.trace(model, torch.zeros((50, 1)).long().cuda())
-    torch.jit.optimize_for_inference(module)
+    timeit(r=1, n=1, func=lambda: model(x), do_tqdm=False, first=True)
+    with torch.jit.fuser("fuser2"):
+        module = torch.jit.trace(model, torch.zeros(shape).long().cuda())
+        torch.jit.optimize_for_inference(module)
    print("PyTorch JIT")
-    timeit(r=1, n=100, func=lambda: module(x), do_tqdm=False, first=False)
\ No newline at end of file
+    timeit(r=1, n=1, func=lambda: module(rndinput((1, 1))), do_tqdm=False, first=True)
--- a/test_pyfra.py
+++ b/test_pyfra.py
@@ -7,7 +7,7 @@ dry = False

 config_obj = KubeConfig()
 config_obj.set_name(name)
-config_obj.set_gpu(gpu_name=GPU.RTX_A6000, amount=1)
+config_obj.set_gpu(gpu_name=GPU.RTX_A5000, amount=1)
 config_obj.set_ram(16)
 config_obj.set_cpu(4)
 config_obj.dry_run(dry)