Fix exllamav3_hf models failing to unload (closes #7031 )

Multiple small style improvements
Fix 'Start reply with' (closes #7033 )
2025-06-07 06:06:20 -04:00 · 2025-05-30 12:05:49 -07:00 · 2025-05-30 11:32:24 -07:00 · 2025-05-30 11:17:47 -07:00
5 changed files with 37 additions and 4 deletions
--- a/css/main.css
+++ b/css/main.css
@ -1551,3 +1551,7 @@ strong {
    color: var(--body-text-color-subdued);
    margin-top: 4px;
 }
+
+button:focus {
+    outline: none;
+}
--- a/modules/chat.py
+++ b/modules/chat.py
@ -806,9 +806,12 @@ def remove_last_message(history):
    return html.unescape(last[0]), history


-def send_dummy_message(textbox, state):
+def send_dummy_message(text, state):
    history = state['history']
-    text = textbox['text']
+
+    # Handle both dict and string inputs
+    if isinstance(text, dict):
+        text = text['text']

    # Initialize metadata if not present
    if 'metadata' not in history:
@ -822,9 +825,12 @@ def send_dummy_message(textbox, state):
    return history


-def send_dummy_reply(textbox, state):
+def send_dummy_reply(text, state):
    history = state['history']
-    text = textbox['text']
+
+    # Handle both dict and string inputs
+    if isinstance(text, dict):
+        text = text['text']

    # Initialize metadata if not present
    if 'metadata' not in history:
--- a/modules/exllamav3_hf.py
+++ b/modules/exllamav3_hf.py
@ -245,3 +245,20 @@ class Exllamav3HF(PreTrainedModel, GenerationMixin):
        pretrained_model_name_or_path = Path(f'{shared.args.model_dir}') / Path(pretrained_model_name_or_path)

        return Exllamav3HF(pretrained_model_name_or_path)
+
+    def unload(self):
+        """Properly unload the ExllamaV3 model and free GPU memory."""
+        if hasattr(self, 'ex_model') and self.ex_model is not None:
+            self.ex_model.unload()
+            self.ex_model = None
+
+        if hasattr(self, 'ex_cache') and self.ex_cache is not None:
+            self.ex_cache = None
+
+        # Clean up any additional ExllamaV3 resources
+        if hasattr(self, 'past_seq'):
+            self.past_seq = None
+        if hasattr(self, 'past_seq_negative'):
+            self.past_seq_negative = None
+        if hasattr(self, 'ex_cache_negative'):
+            self.ex_cache_negative = None
--- a/modules/models.py
+++ b/modules/models.py
@ -116,10 +116,13 @@ def unload_model(keep_model_name=False):
        return

    is_llamacpp = (shared.model.__class__.__name__ == 'LlamaServer')
+    if shared.args.loader == 'ExLlamav3_HF':
+        shared.model.unload()

    shared.model = shared.tokenizer = None
    shared.lora_names = []
    shared.model_dirty_from_training = False
+
    if not is_llamacpp:
        from modules.torch_utils import clear_torch_cache
        clear_torch_cache()
--- a/modules/ui.py
+++ b/modules/ui.py
@ -71,6 +71,7 @@ if not shared.args.old_colors:
        block_background_fill_dark='transparent',
        block_border_color_dark='transparent',
        input_border_color_dark='var(--border-color-dark)',
+        input_border_color_focus_dark='var(--border-color-dark)',
        checkbox_border_color_dark='var(--border-color-dark)',
        border_color_primary_dark='var(--border-color-dark)',
        button_secondary_border_color_dark='var(--border-color-dark)',
@ -89,6 +90,8 @@ if not shared.args.old_colors:
        checkbox_label_shadow='none',
        block_shadow='none',
        block_shadow_dark='none',
+        input_shadow_focus='none',
+        input_shadow_focus_dark='none',
        button_large_radius='0.375rem',
        button_large_padding='6px 12px',
        input_radius='0.375rem',
Author	SHA1	Message	Date
oobabooga	219f0a7731	Fix exllamav3_hf models failing to unload (closes #7031 )	2025-05-30 12:05:49 -07:00
oobabooga	298d4719c6	Multiple small style improvements	2025-05-30 11:32:24 -07:00
oobabooga	7c29879e79	Fix 'Start reply with' (closes #7033 )	2025-05-30 11:17:47 -07:00