From bae1aa34aa020aa749f942708b96e28e2b85c4a4 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 25 May 2025 17:19:26 -0700 Subject: [PATCH] Fix loading `Llama-3_3-Nemotron-Super-49B-v1` and similar models (closes #7012) --- modules/models_settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/models_settings.py b/modules/models_settings.py index df5a8e8d..c914bdea 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -335,7 +335,7 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type): if key.endswith('.block_count'): n_layers = value elif key.endswith('.attention.head_count_kv'): - n_kv_heads = value + n_kv_heads = max(value) if isinstance(value, list) else value elif key.endswith('.embedding_length'): embedding_dim = value