From 1c549d176b27233daf0ef6992bf5b5d8215784f9 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 16 May 2025 17:24:06 -0700
Subject: [PATCH 1/6] Fix GPU layers slider: honor saved settings and show true
 maximum

---
 modules/models_settings.py | 30 +++++++++++++++++++++---------
 modules/ui_model_menu.py   |  2 +-
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/modules/models_settings.py b/modules/models_settings.py
index 3fdf3c84..6715d494 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -72,6 +72,7 @@ def get_model_metadata(model):
                 model_settings['compress_pos_emb'] = metadata[k]
             elif k.endswith('block_count'):
                 model_settings['gpu_layers'] = metadata[k] + 1
+                model_settings['max_gpu_layers'] = metadata[k] + 1
 
         if 'tokenizer.chat_template' in metadata:
             template = metadata['tokenizer.chat_template']
@@ -450,17 +451,28 @@ def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type,
     max_layers = gpu_layers
 
     if auto_adjust:
-        # Get max layers from model metadata
+        # Get model settings including user preferences
         model_settings = get_model_metadata(model)
-        max_layers = model_settings.get('gpu_layers', gpu_layers)
 
-        # Auto-adjust based on available VRAM
-        available_vram = get_nvidia_free_vram()
-        if available_vram > 0:
-            tolerance = 906
-            current_layers = max_layers
-            while current_layers > 0 and estimate_vram(model, current_layers, ctx_size, cache_type) > available_vram - tolerance:
-                current_layers -= 1
+        # Check if the value is from user config-user.yaml
+        user_config = shared.user_config
+        model_regex = Path(model).name + '$'
+        has_user_setting = model_regex in user_config and 'gpu_layers' in user_config[model_regex]
+
+        if has_user_setting:
+            # Just return the current user value without adjustment
+            max_layers = model_settings.get('max_gpu_layers', 256)
+        else:
+            # No user setting, use model's max and auto-adjust
+            max_layers = model_settings.get('max_gpu_layers', model_settings.get('gpu_layers', gpu_layers))
+            current_layers = max_layers  # Start from max
+
+            # Auto-adjust based on available VRAM
+            available_vram = get_nvidia_free_vram()
+            if available_vram > 0:
+                tolerance = 906
+                while current_layers > 0 and estimate_vram(model, current_layers, ctx_size, cache_type) > available_vram - tolerance:
+                    current_layers -= 1
 
     # Calculate VRAM with current layers
     vram_usage = estimate_vram(model, current_layers, ctx_size, cache_type)
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 59bb6759..5b7dfdd8 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -318,7 +318,7 @@ def get_initial_vram_info():
 def get_initial_gpu_layers_max():
     if shared.model_name != 'None' and shared.args.loader == 'llama.cpp':
         model_settings = get_model_metadata(shared.model_name)
-        return model_settings.get('gpu_layers', 256)
+        return model_settings.get('max_gpu_layers', model_settings.get('gpu_layers', 256))
 
     return 256
 

From d99fb0a22a44dc4fb4d695647ba07cbf55e044c6 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 16 May 2025 17:29:18 -0700
Subject: [PATCH 2/6] Add backward compatibility with saved n_gpu_layers values

---
 modules/models_settings.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/modules/models_settings.py b/modules/models_settings.py
index 6715d494..76bce7a9 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -154,6 +154,9 @@ def get_model_metadata(model):
     for pat in settings:
         if re.match(pat.lower(), Path(model).name.lower()):
             for k in settings[pat]:
+                if k == 'n_gpu_layers':
+                    k = 'gpu_layers'
+
                 model_settings[k] = settings[pat][k]
 
     # Load instruction template if defined by name rather than by value

From 71fa046c1708a235853c359ef95b363a20c762d3 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 16 May 2025 17:38:08 -0700
Subject: [PATCH 3/6] Minor changes after
 1c549d176b27233daf0ef6992bf5b5d8215784f9

---
 modules/models_settings.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/modules/models_settings.py b/modules/models_settings.py
index 76bce7a9..3a2400d4 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -457,17 +457,20 @@ def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type,
         # Get model settings including user preferences
         model_settings = get_model_metadata(model)
 
-        # Check if the value is from user config-user.yaml
+        # Get the true maximum layers
+        max_layers = model_settings.get('max_gpu_layers', model_settings.get('gpu_layers', gpu_layers))
+
+        # Check if this is a user-saved setting
         user_config = shared.user_config
         model_regex = Path(model).name + '$'
         has_user_setting = model_regex in user_config and 'gpu_layers' in user_config[model_regex]
 
         if has_user_setting:
-            # Just return the current user value without adjustment
-            max_layers = model_settings.get('max_gpu_layers', 256)
+            # For user settings, just use the current value (which already has user pref)
+            # but ensure the slider maximum is correct
+            current_layers = gpu_layers  # Already has user setting
         else:
-            # No user setting, use model's max and auto-adjust
-            max_layers = model_settings.get('max_gpu_layers', model_settings.get('gpu_layers', gpu_layers))
+            # No user setting, auto-adjust from the maximum
             current_layers = max_layers  # Start from max
 
             # Auto-adjust based on available VRAM

From e3bba510d443a0a447f85083a2dff4a116a50848 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 16 May 2025 17:48:54 -0700
Subject: [PATCH 4/6] UI: Only add a blank space to streaming messages in
 instruct mode

---
 css/main.css              | 2 +-
 js/main.js                | 2 +-
 modules/html_generator.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/css/main.css b/css/main.css
index 0902b184..3fec7bb0 100644
--- a/css/main.css
+++ b/css/main.css
@@ -390,7 +390,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     margin-left: auto;
     margin-right: auto;
     flex: 1;
-    overflow-y: auto;
+    overflow-y: hidden;
     display: flex;
     flex-direction: column;
     word-break: break-word;
diff --git a/js/main.js b/js/main.js
index 205cf88e..6cecd341 100644
--- a/js/main.js
+++ b/js/main.js
@@ -152,7 +152,7 @@ const observer = new MutationObserver(function(mutations) {
   }
 
   const chatElement = document.getElementById("chat");
-  if (chatElement) {
+  if (chatElement && chatElement.getAttribute("data-mode") === "instruct") {
     const messagesContainer = chatElement.querySelector(".messages");
     const lastChild = messagesContainer?.lastElementChild;
     const prevSibling = lastChild?.previousElementSibling;
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 67d15b6e..39659476 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -347,7 +347,7 @@ remove_button = f'<button class="footer-button footer-remove-button" title="Remo
 
 
 def generate_instruct_html(history):
-    output = f'<style>{instruct_css}</style><div class="chat" id="chat"><div class="messages">'
+    output = f'<style>{instruct_css}</style><div class="chat" id="chat" data-mode="instruct"><div class="messages">'
 
     for i in range(len(history['visible'])):
         row_visible = history['visible'][i]

From c0e295dd1d8f06ea889e8a4f8ec68ca755c3727b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 16 May 2025 17:53:20 -0700
Subject: [PATCH 5/6] Remove the 'None' option from the model menu

---
 modules/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/utils.py b/modules/utils.py
index 0e390d08..0e8bdd18 100644
--- a/modules/utils.py
+++ b/modules/utils.py
@@ -137,7 +137,7 @@ def get_available_models():
 
     model_dirs = sorted(model_dirs, key=natural_keys)
 
-    return ['None'] + filtered_gguf_files + model_dirs
+    return filtered_gguf_files + model_dirs
 
 
 def get_available_ggufs():

From 4bf763e1d94e91c1d016b597df642f076c06f849 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 16 May 2025 18:22:43 -0700
Subject: [PATCH 6/6] Multiple small CSS fixes

---
 css/chat_style-Dark.css            | 4 ++--
 css/chat_style-TheEncrypted777.css | 4 ++--
 css/chat_style-cai-chat-square.css | 4 ++--
 css/chat_style-cai-chat.css        | 4 ++--
 css/chat_style-messenger.css       | 4 ++--
 css/chat_style-wpp.css             | 4 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/css/chat_style-Dark.css b/css/chat_style-Dark.css
index 1ad46bc0..6a4784cc 100644
--- a/css/chat_style-Dark.css
+++ b/css/chat_style-Dark.css
@@ -2,8 +2,8 @@
     display: grid;
     align-items: start;
     grid-template-columns: 60px minmax(0, 1fr);
-    padding-bottom: 14px;
-    padding-top: 14px;
+    padding-bottom: 22px;
+    padding-top: 6px;
     font-size: 18px;
     font-family: Roboto, Arial, sans-serif; /* Modern font */
     line-height: 1.5;
diff --git a/css/chat_style-TheEncrypted777.css b/css/chat_style-TheEncrypted777.css
index 9e1230b7..fbd47072 100644
--- a/css/chat_style-TheEncrypted777.css
+++ b/css/chat_style-TheEncrypted777.css
@@ -4,8 +4,8 @@
     display: grid;
     align-items: start;
     grid-template-columns: 60px minmax(0, 1fr);
-    padding-bottom: 14px;
-    padding-top: 14px;
+    padding-bottom: 21px;
+    padding-top: 7px;
     font-size: 18px;
     font-family: 'Noto Sans', Arial, sans-serif;
     line-height: 1.428571429;
diff --git a/css/chat_style-cai-chat-square.css b/css/chat_style-cai-chat-square.css
index 015f6927..291a1209 100644
--- a/css/chat_style-cai-chat-square.css
+++ b/css/chat_style-cai-chat-square.css
@@ -16,7 +16,7 @@
 }
 
 .message {
-    padding-bottom: 1em;
-    padding-top: 1em;
+    padding-bottom: 1.5em;
+    padding-top: 0.5em;
     grid-template-columns: 70px minmax(0, 1fr);
 }
diff --git a/css/chat_style-cai-chat.css b/css/chat_style-cai-chat.css
index 0e91101f..b06b1269 100644
--- a/css/chat_style-cai-chat.css
+++ b/css/chat_style-cai-chat.css
@@ -2,8 +2,8 @@
     display: grid;
     align-items: start;
     grid-template-columns: 60px minmax(0, 1fr);
-    padding-bottom: 1em;
-    padding-top: 1em;
+    padding-bottom: 1.5em;
+    padding-top: 0.5em;
     font-size: 15px;
     font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
     line-height: 22.5px !important;
diff --git a/css/chat_style-messenger.css b/css/chat_style-messenger.css
index 6518d6ca..65af5f7a 100644
--- a/css/chat_style-messenger.css
+++ b/css/chat_style-messenger.css
@@ -1,6 +1,6 @@
 .message {
-    padding-bottom: 12.5px;
-    padding-top: 12.5px;
+    padding-bottom: 22px;
+    padding-top: 3px;
     font-size: 15px;
     font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
     line-height: 1.428571429;
diff --git a/css/chat_style-wpp.css b/css/chat_style-wpp.css
index 1442dd0a..353201c2 100644
--- a/css/chat_style-wpp.css
+++ b/css/chat_style-wpp.css
@@ -1,6 +1,6 @@
 .message {
-    padding-bottom: 12.5px;
-    padding-top: 12.5px;
+    padding-bottom: 22px;
+    padding-top: 3px;
     font-size: 15px;
     font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
     line-height: 1.428571429;