From 1c549d176b27233daf0ef6992bf5b5d8215784f9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 16 May 2025 17:24:06 -0700 Subject: [PATCH 1/6] Fix GPU layers slider: honor saved settings and show true maximum --- modules/models_settings.py | 30 +++++++++++++++++++++--------- modules/ui_model_menu.py | 2 +- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/modules/models_settings.py b/modules/models_settings.py index 3fdf3c84..6715d494 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -72,6 +72,7 @@ def get_model_metadata(model): model_settings['compress_pos_emb'] = metadata[k] elif k.endswith('block_count'): model_settings['gpu_layers'] = metadata[k] + 1 + model_settings['max_gpu_layers'] = metadata[k] + 1 if 'tokenizer.chat_template' in metadata: template = metadata['tokenizer.chat_template'] @@ -450,17 +451,28 @@ def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type, max_layers = gpu_layers if auto_adjust: - # Get max layers from model metadata + # Get model settings including user preferences model_settings = get_model_metadata(model) - max_layers = model_settings.get('gpu_layers', gpu_layers) - # Auto-adjust based on available VRAM - available_vram = get_nvidia_free_vram() - if available_vram > 0: - tolerance = 906 - current_layers = max_layers - while current_layers > 0 and estimate_vram(model, current_layers, ctx_size, cache_type) > available_vram - tolerance: - current_layers -= 1 + # Check if the value is from user config-user.yaml + user_config = shared.user_config + model_regex = Path(model).name + '$' + has_user_setting = model_regex in user_config and 'gpu_layers' in user_config[model_regex] + + if has_user_setting: + # Just return the current user value without adjustment + max_layers = model_settings.get('max_gpu_layers', 256) + else: + # No user setting, use model's max and auto-adjust + max_layers = model_settings.get('max_gpu_layers', model_settings.get('gpu_layers', gpu_layers)) + current_layers = max_layers # Start from max + + # Auto-adjust based on available VRAM + available_vram = get_nvidia_free_vram() + if available_vram > 0: + tolerance = 906 + while current_layers > 0 and estimate_vram(model, current_layers, ctx_size, cache_type) > available_vram - tolerance: + current_layers -= 1 # Calculate VRAM with current layers vram_usage = estimate_vram(model, current_layers, ctx_size, cache_type) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 59bb6759..5b7dfdd8 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -318,7 +318,7 @@ def get_initial_vram_info(): def get_initial_gpu_layers_max(): if shared.model_name != 'None' and shared.args.loader == 'llama.cpp': model_settings = get_model_metadata(shared.model_name) - return model_settings.get('gpu_layers', 256) + return model_settings.get('max_gpu_layers', model_settings.get('gpu_layers', 256)) return 256 From d99fb0a22a44dc4fb4d695647ba07cbf55e044c6 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 16 May 2025 17:29:18 -0700 Subject: [PATCH 2/6] Add backward compatibility with saved n_gpu_layers values --- modules/models_settings.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/models_settings.py b/modules/models_settings.py index 6715d494..76bce7a9 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -154,6 +154,9 @@ def get_model_metadata(model): for pat in settings: if re.match(pat.lower(), Path(model).name.lower()): for k in settings[pat]: + if k == 'n_gpu_layers': + k = 'gpu_layers' + model_settings[k] = settings[pat][k] # Load instruction template if defined by name rather than by value From 71fa046c1708a235853c359ef95b363a20c762d3 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 16 May 2025 17:38:08 -0700 Subject: [PATCH 3/6] Minor changes after 1c549d176b27233daf0ef6992bf5b5d8215784f9 --- modules/models_settings.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/modules/models_settings.py b/modules/models_settings.py index 76bce7a9..3a2400d4 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -457,17 +457,20 @@ def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type, # Get model settings including user preferences model_settings = get_model_metadata(model) - # Check if the value is from user config-user.yaml + # Get the true maximum layers + max_layers = model_settings.get('max_gpu_layers', model_settings.get('gpu_layers', gpu_layers)) + + # Check if this is a user-saved setting user_config = shared.user_config model_regex = Path(model).name + '$' has_user_setting = model_regex in user_config and 'gpu_layers' in user_config[model_regex] if has_user_setting: - # Just return the current user value without adjustment - max_layers = model_settings.get('max_gpu_layers', 256) + # For user settings, just use the current value (which already has user pref) + # but ensure the slider maximum is correct + current_layers = gpu_layers # Already has user setting else: - # No user setting, use model's max and auto-adjust - max_layers = model_settings.get('max_gpu_layers', model_settings.get('gpu_layers', gpu_layers)) + # No user setting, auto-adjust from the maximum current_layers = max_layers # Start from max # Auto-adjust based on available VRAM From e3bba510d443a0a447f85083a2dff4a116a50848 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 16 May 2025 17:48:54 -0700 Subject: [PATCH 4/6] UI: Only add a blank space to streaming messages in instruct mode --- css/main.css | 2 +- js/main.js | 2 +- modules/html_generator.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/css/main.css b/css/main.css index 0902b184..3fec7bb0 100644 --- a/css/main.css +++ b/css/main.css @@ -390,7 +390,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { margin-left: auto; margin-right: auto; flex: 1; - overflow-y: auto; + overflow-y: hidden; display: flex; flex-direction: column; word-break: break-word; diff --git a/js/main.js b/js/main.js index 205cf88e..6cecd341 100644 --- a/js/main.js +++ b/js/main.js @@ -152,7 +152,7 @@ const observer = new MutationObserver(function(mutations) { } const chatElement = document.getElementById("chat"); - if (chatElement) { + if (chatElement && chatElement.getAttribute("data-mode") === "instruct") { const messagesContainer = chatElement.querySelector(".messages"); const lastChild = messagesContainer?.lastElementChild; const prevSibling = lastChild?.previousElementSibling; diff --git a/modules/html_generator.py b/modules/html_generator.py index 67d15b6e..39659476 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -347,7 +347,7 @@ remove_button = f'