ExLlamaV3_HF: Change max_chunk_size to 256

This commit is contained in:
oobabooga 2025-05-04 20:37:15 -07:00
parent df7bb0db1f
commit f3da45f65d

View file

@ -119,7 +119,7 @@ class Exllamav3HF(PreTrainedModel, GenerationMixin):
reset = True reset = True
# Maximum number of tokens to process in a single forward pass # Maximum number of tokens to process in a single forward pass
max_chunk_size = 2048 max_chunk_size = 256
# Make the forward call # Make the forward call
if labels is None: if labels is None: