Set the maximum gpu_layers value automatically when the model is loaded with --model

2025-06-07 06:06:20 -04:00 · 2025-05-16 11:58:17 -07:00 · 2025-05-16 11:58:17 -07:00 · fc483650b5
commit fc483650b5
parent 38c50087fe
1 changed files with 9 additions and 1 deletions
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@ -38,7 +38,7 @@ def create_ui():
                    gr.Markdown("## Main options")
                    with gr.Row():
                        with gr.Column():
-                            shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=256, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.')
+                            shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=get_initial_gpu_layers_max(), value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.')
                            shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. ⚠️ Lower this value if you can\'t load the model.')
                            shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
                            shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).')
@ -315,6 +315,14 @@ def get_initial_vram_info():
    return "<div id=\"vram-info\"'>Estimated VRAM to load the model:</span>"


+def get_initial_gpu_layers_max():
+    if shared.model_name != 'None' and shared.args.loader == 'llama.cpp':
+        model_settings = get_model_metadata(shared.model_name)
+        return model_settings.get('gpu_layers', 256)
+
+    return 256
+
+
 def handle_load_model_event_initial(model, state):
    state = apply_model_settings_to_state(model, state)
    output = ui.apply_interface_values(state)