Merge pull request #6987 from oobabooga/dev

Merge dev branch
This commit is contained in:
oobabooga 2025-05-16 22:23:59 -03:00 committed by GitHub
commit 17c29fa0a2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 44 additions and 26 deletions

View file

@ -2,8 +2,8 @@
display: grid; display: grid;
align-items: start; align-items: start;
grid-template-columns: 60px minmax(0, 1fr); grid-template-columns: 60px minmax(0, 1fr);
padding-bottom: 14px; padding-bottom: 22px;
padding-top: 14px; padding-top: 6px;
font-size: 18px; font-size: 18px;
font-family: Roboto, Arial, sans-serif; /* Modern font */ font-family: Roboto, Arial, sans-serif; /* Modern font */
line-height: 1.5; line-height: 1.5;

View file

@ -4,8 +4,8 @@
display: grid; display: grid;
align-items: start; align-items: start;
grid-template-columns: 60px minmax(0, 1fr); grid-template-columns: 60px minmax(0, 1fr);
padding-bottom: 14px; padding-bottom: 21px;
padding-top: 14px; padding-top: 7px;
font-size: 18px; font-size: 18px;
font-family: 'Noto Sans', Arial, sans-serif; font-family: 'Noto Sans', Arial, sans-serif;
line-height: 1.428571429; line-height: 1.428571429;

View file

@ -16,7 +16,7 @@
} }
.message { .message {
padding-bottom: 1em; padding-bottom: 1.5em;
padding-top: 1em; padding-top: 0.5em;
grid-template-columns: 70px minmax(0, 1fr); grid-template-columns: 70px minmax(0, 1fr);
} }

View file

@ -2,8 +2,8 @@
display: grid; display: grid;
align-items: start; align-items: start;
grid-template-columns: 60px minmax(0, 1fr); grid-template-columns: 60px minmax(0, 1fr);
padding-bottom: 1em; padding-bottom: 1.5em;
padding-top: 1em; padding-top: 0.5em;
font-size: 15px; font-size: 15px;
font-family: 'Noto Sans', Helvetica, Arial, sans-serif; font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
line-height: 22.5px !important; line-height: 22.5px !important;

View file

@ -1,6 +1,6 @@
.message { .message {
padding-bottom: 12.5px; padding-bottom: 22px;
padding-top: 12.5px; padding-top: 3px;
font-size: 15px; font-size: 15px;
font-family: 'Noto Sans', Helvetica, Arial, sans-serif; font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
line-height: 1.428571429; line-height: 1.428571429;

View file

@ -1,6 +1,6 @@
.message { .message {
padding-bottom: 12.5px; padding-bottom: 22px;
padding-top: 12.5px; padding-top: 3px;
font-size: 15px; font-size: 15px;
font-family: 'Noto Sans', Helvetica, Arial, sans-serif; font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
line-height: 1.428571429; line-height: 1.428571429;

View file

@ -390,7 +390,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
margin-left: auto; margin-left: auto;
margin-right: auto; margin-right: auto;
flex: 1; flex: 1;
overflow-y: auto; overflow-y: hidden;
display: flex; display: flex;
flex-direction: column; flex-direction: column;
word-break: break-word; word-break: break-word;

View file

@ -152,7 +152,7 @@ const observer = new MutationObserver(function(mutations) {
} }
const chatElement = document.getElementById("chat"); const chatElement = document.getElementById("chat");
if (chatElement) { if (chatElement && chatElement.getAttribute("data-mode") === "instruct") {
const messagesContainer = chatElement.querySelector(".messages"); const messagesContainer = chatElement.querySelector(".messages");
const lastChild = messagesContainer?.lastElementChild; const lastChild = messagesContainer?.lastElementChild;
const prevSibling = lastChild?.previousElementSibling; const prevSibling = lastChild?.previousElementSibling;

View file

@ -347,7 +347,7 @@ remove_button = f'<button class="footer-button footer-remove-button" title="Remo
def generate_instruct_html(history): def generate_instruct_html(history):
output = f'<style>{instruct_css}</style><div class="chat" id="chat"><div class="messages">' output = f'<style>{instruct_css}</style><div class="chat" id="chat" data-mode="instruct"><div class="messages">'
for i in range(len(history['visible'])): for i in range(len(history['visible'])):
row_visible = history['visible'][i] row_visible = history['visible'][i]

View file

@ -72,6 +72,7 @@ def get_model_metadata(model):
model_settings['compress_pos_emb'] = metadata[k] model_settings['compress_pos_emb'] = metadata[k]
elif k.endswith('block_count'): elif k.endswith('block_count'):
model_settings['gpu_layers'] = metadata[k] + 1 model_settings['gpu_layers'] = metadata[k] + 1
model_settings['max_gpu_layers'] = metadata[k] + 1
if 'tokenizer.chat_template' in metadata: if 'tokenizer.chat_template' in metadata:
template = metadata['tokenizer.chat_template'] template = metadata['tokenizer.chat_template']
@ -153,6 +154,9 @@ def get_model_metadata(model):
for pat in settings: for pat in settings:
if re.match(pat.lower(), Path(model).name.lower()): if re.match(pat.lower(), Path(model).name.lower()):
for k in settings[pat]: for k in settings[pat]:
if k == 'n_gpu_layers':
k = 'gpu_layers'
model_settings[k] = settings[pat][k] model_settings[k] = settings[pat][k]
# Load instruction template if defined by name rather than by value # Load instruction template if defined by name rather than by value
@ -450,17 +454,31 @@ def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type,
max_layers = gpu_layers max_layers = gpu_layers
if auto_adjust: if auto_adjust:
# Get max layers from model metadata # Get model settings including user preferences
model_settings = get_model_metadata(model) model_settings = get_model_metadata(model)
max_layers = model_settings.get('gpu_layers', gpu_layers)
# Auto-adjust based on available VRAM # Get the true maximum layers
available_vram = get_nvidia_free_vram() max_layers = model_settings.get('max_gpu_layers', model_settings.get('gpu_layers', gpu_layers))
if available_vram > 0:
tolerance = 906 # Check if this is a user-saved setting
current_layers = max_layers user_config = shared.user_config
while current_layers > 0 and estimate_vram(model, current_layers, ctx_size, cache_type) > available_vram - tolerance: model_regex = Path(model).name + '$'
current_layers -= 1 has_user_setting = model_regex in user_config and 'gpu_layers' in user_config[model_regex]
if has_user_setting:
# For user settings, just use the current value (which already has user pref)
# but ensure the slider maximum is correct
current_layers = gpu_layers # Already has user setting
else:
# No user setting, auto-adjust from the maximum
current_layers = max_layers # Start from max
# Auto-adjust based on available VRAM
available_vram = get_nvidia_free_vram()
if available_vram > 0:
tolerance = 906
while current_layers > 0 and estimate_vram(model, current_layers, ctx_size, cache_type) > available_vram - tolerance:
current_layers -= 1
# Calculate VRAM with current layers # Calculate VRAM with current layers
vram_usage = estimate_vram(model, current_layers, ctx_size, cache_type) vram_usage = estimate_vram(model, current_layers, ctx_size, cache_type)

View file

@ -318,7 +318,7 @@ def get_initial_vram_info():
def get_initial_gpu_layers_max(): def get_initial_gpu_layers_max():
if shared.model_name != 'None' and shared.args.loader == 'llama.cpp': if shared.model_name != 'None' and shared.args.loader == 'llama.cpp':
model_settings = get_model_metadata(shared.model_name) model_settings = get_model_metadata(shared.model_name)
return model_settings.get('gpu_layers', 256) return model_settings.get('max_gpu_layers', model_settings.get('gpu_layers', 256))
return 256 return 256

View file

@ -137,7 +137,7 @@ def get_available_models():
model_dirs = sorted(model_dirs, key=natural_keys) model_dirs = sorted(model_dirs, key=natural_keys)
return ['None'] + filtered_gguf_files + model_dirs return filtered_gguf_files + model_dirs
def get_available_ggufs(): def get_available_ggufs():