From 7c29879e795776ceb742a8ddb47fd3843069cf34 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 30 May 2025 11:17:47 -0700
Subject: [PATCH 1/4] Fix 'Start reply with' (closes #7033)

---
 modules/chat.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/modules/chat.py b/modules/chat.py
index e526a9a0..881f7330 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -806,9 +806,12 @@ def remove_last_message(history):
     return html.unescape(last[0]), history
 
 
-def send_dummy_message(textbox, state):
+def send_dummy_message(text, state):
     history = state['history']
-    text = textbox['text']
+
+    # Handle both dict and string inputs
+    if isinstance(text, dict):
+        text = text['text']
 
     # Initialize metadata if not present
     if 'metadata' not in history:
@@ -822,9 +825,12 @@ def send_dummy_message(textbox, state):
     return history
 
 
-def send_dummy_reply(textbox, state):
+def send_dummy_reply(text, state):
     history = state['history']
-    text = textbox['text']
+
+    # Handle both dict and string inputs
+    if isinstance(text, dict):
+        text = text['text']
 
     # Initialize metadata if not present
     if 'metadata' not in history:

From 298d4719c6c9545a701a9cc9e8f4efceb108599a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 30 May 2025 11:32:24 -0700
Subject: [PATCH 2/4] Multiple small style improvements

---
 css/main.css  | 4 ++++
 modules/ui.py | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/css/main.css b/css/main.css
index 9685c863..967d94ed 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1551,3 +1551,7 @@ strong {
     color: var(--body-text-color-subdued);
     margin-top: 4px;
 }
+
+button:focus {
+    outline: none;
+}
diff --git a/modules/ui.py b/modules/ui.py
index a2662e14..9f4d67cb 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -71,6 +71,7 @@ if not shared.args.old_colors:
         block_background_fill_dark='transparent',
         block_border_color_dark='transparent',
         input_border_color_dark='var(--border-color-dark)',
+        input_border_color_focus_dark='var(--border-color-dark)',
         checkbox_border_color_dark='var(--border-color-dark)',
         border_color_primary_dark='var(--border-color-dark)',
         button_secondary_border_color_dark='var(--border-color-dark)',
@@ -89,6 +90,8 @@ if not shared.args.old_colors:
         checkbox_label_shadow='none',
         block_shadow='none',
         block_shadow_dark='none',
+        input_shadow_focus='none',
+        input_shadow_focus_dark='none',
         button_large_radius='0.375rem',
         button_large_padding='6px 12px',
         input_radius='0.375rem',

From 219f0a773166deeb0326c2874b29e66e382df524 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 30 May 2025 12:05:49 -0700
Subject: [PATCH 3/4] Fix exllamav3_hf models failing to unload (closes #7031)

---
 modules/exllamav3_hf.py | 17 +++++++++++++++++
 modules/models.py       |  3 +++
 2 files changed, 20 insertions(+)

diff --git a/modules/exllamav3_hf.py b/modules/exllamav3_hf.py
index 417df473..1254ff5d 100644
--- a/modules/exllamav3_hf.py
+++ b/modules/exllamav3_hf.py
@@ -245,3 +245,20 @@ class Exllamav3HF(PreTrainedModel, GenerationMixin):
         pretrained_model_name_or_path = Path(f'{shared.args.model_dir}') / Path(pretrained_model_name_or_path)
 
         return Exllamav3HF(pretrained_model_name_or_path)
+
+    def unload(self):
+        """Properly unload the ExllamaV3 model and free GPU memory."""
+        if hasattr(self, 'ex_model') and self.ex_model is not None:
+            self.ex_model.unload()
+            self.ex_model = None
+
+        if hasattr(self, 'ex_cache') and self.ex_cache is not None:
+            self.ex_cache = None
+
+        # Clean up any additional ExllamaV3 resources
+        if hasattr(self, 'past_seq'):
+            self.past_seq = None
+        if hasattr(self, 'past_seq_negative'):
+            self.past_seq_negative = None
+        if hasattr(self, 'ex_cache_negative'):
+            self.ex_cache_negative = None
diff --git a/modules/models.py b/modules/models.py
index 4218d58c..d329ae3c 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -116,10 +116,13 @@ def unload_model(keep_model_name=False):
         return
 
     is_llamacpp = (shared.model.__class__.__name__ == 'LlamaServer')
+    if shared.args.loader == 'ExLlamav3_HF':
+        shared.model.unload()
 
     shared.model = shared.tokenizer = None
     shared.lora_names = []
     shared.model_dirty_from_training = False
+
     if not is_llamacpp:
         from modules.torch_utils import clear_torch_cache
         clear_torch_cache()

From 15f466ca3f8255f2566f016db8d7b8fd9ebef3f4 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 30 May 2025 15:49:57 -0700
Subject: [PATCH 4/4] Update README

---
 README.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index daf409d0..55df33d2 100644
--- a/README.md
+++ b/README.md
@@ -14,18 +14,18 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
 
 - Supports multiple text generation backends in one UI/API, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), [ExLlamaV2](https://github.com/turboderp-org/exllamav2), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) (the latter via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile)).
 - Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory.
-- **File attachments**: Upload text files and PDF documents directly in conversations to talk about their contents.
-- **Web search**: Optionally search the internet with LLM-generated queries based on your input to add context to the conversation.
-- Advanced chat management: Edit messages, navigate between message versions, and branch conversations at any point.
+- 100% offline and private, with zero telemetry, external resources, or remote update requests.
 - Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats.
-- Automatic GPU layers for GGUF models (on NVIDIA GPUs).
-- UI that resembles the original ChatGPT style.
-- Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`.
-- Free-form text generation in the Default/Notebook tabs without being limited to chat turns. You can send formatted conversations from the Chat tab to these.
+- **File attachments**: Upload text files and PDF documents to talk about their contents.
+- **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation.
+- Aesthetic UI with dark and light themes.
+- `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters.
+- Edit messages, navigate between message versions, and branch conversations at any point.
 - Multiple sampling parameters and generation options for sophisticated text generation control.
-- Switch between different models easily in the UI without restarting, with fine control over settings.
+- Switch between different models in the UI without restarting.
+- Automatic GPU layers for GGUF models (on NVIDIA GPUs).
+- Free-form text generation in the Default/Notebook tabs without being limited to chat turns.
 - OpenAI-compatible API with Chat and Completions endpoints, including tool-calling support – see [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples).
-- 100% offline and private, with zero telemetry, external resources, or remote update requests. Web search is optional and user-controlled.
 - Extension support, with numerous built-in and user-contributed extensions available. See the [wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details.
 
 ## How to install