parser.add_argument("--opt-split-attention",action='store_true',help="force-enables cross-attention layer optimization. By default, it's on for torch.cuda and off for other torch devices.")
parser.add_argument("--opt-split-attention-v1",action='store_true',help="enable older version of split attention optimization that does not consume all the VRAM it can find")