From f3da45f65d76f8c48fd95678ecc841afb0ddd04e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 4 May 2025 20:37:15 -0700
Subject: [PATCH] ExLlamaV3_HF: Change max_chunk_size to 256

---
 modules/exllamav3_hf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/exllamav3_hf.py b/modules/exllamav3_hf.py
index 12b22f64..417df473 100644
--- a/modules/exllamav3_hf.py
+++ b/modules/exllamav3_hf.py
@@ -119,7 +119,7 @@ class Exllamav3HF(PreTrainedModel, GenerationMixin):
         reset = True
 
         # Maximum number of tokens to process in a single forward pass
-        max_chunk_size = 2048
+        max_chunk_size = 256
 
         # Make the forward call
         if labels is None: