From f3da45f65d76f8c48fd95678ecc841afb0ddd04e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 4 May 2025 20:37:15 -0700 Subject: [PATCH] ExLlamaV3_HF: Change max_chunk_size to 256 --- modules/exllamav3_hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/exllamav3_hf.py b/modules/exllamav3_hf.py index 12b22f64..417df473 100644 --- a/modules/exllamav3_hf.py +++ b/modules/exllamav3_hf.py @@ -119,7 +119,7 @@ class Exllamav3HF(PreTrainedModel, GenerationMixin): reset = True # Maximum number of tokens to process in a single forward pass - max_chunk_size = 2048 + max_chunk_size = 256 # Make the forward call if labels is None: