mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-06-07 14:17:09 -04:00
commit
17c29fa0a2
12 changed files with 44 additions and 26 deletions
|
@ -2,8 +2,8 @@
|
||||||
display: grid;
|
display: grid;
|
||||||
align-items: start;
|
align-items: start;
|
||||||
grid-template-columns: 60px minmax(0, 1fr);
|
grid-template-columns: 60px minmax(0, 1fr);
|
||||||
padding-bottom: 14px;
|
padding-bottom: 22px;
|
||||||
padding-top: 14px;
|
padding-top: 6px;
|
||||||
font-size: 18px;
|
font-size: 18px;
|
||||||
font-family: Roboto, Arial, sans-serif; /* Modern font */
|
font-family: Roboto, Arial, sans-serif; /* Modern font */
|
||||||
line-height: 1.5;
|
line-height: 1.5;
|
||||||
|
|
|
@ -4,8 +4,8 @@
|
||||||
display: grid;
|
display: grid;
|
||||||
align-items: start;
|
align-items: start;
|
||||||
grid-template-columns: 60px minmax(0, 1fr);
|
grid-template-columns: 60px minmax(0, 1fr);
|
||||||
padding-bottom: 14px;
|
padding-bottom: 21px;
|
||||||
padding-top: 14px;
|
padding-top: 7px;
|
||||||
font-size: 18px;
|
font-size: 18px;
|
||||||
font-family: 'Noto Sans', Arial, sans-serif;
|
font-family: 'Noto Sans', Arial, sans-serif;
|
||||||
line-height: 1.428571429;
|
line-height: 1.428571429;
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
.message {
|
.message {
|
||||||
padding-bottom: 1em;
|
padding-bottom: 1.5em;
|
||||||
padding-top: 1em;
|
padding-top: 0.5em;
|
||||||
grid-template-columns: 70px minmax(0, 1fr);
|
grid-template-columns: 70px minmax(0, 1fr);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,8 +2,8 @@
|
||||||
display: grid;
|
display: grid;
|
||||||
align-items: start;
|
align-items: start;
|
||||||
grid-template-columns: 60px minmax(0, 1fr);
|
grid-template-columns: 60px minmax(0, 1fr);
|
||||||
padding-bottom: 1em;
|
padding-bottom: 1.5em;
|
||||||
padding-top: 1em;
|
padding-top: 0.5em;
|
||||||
font-size: 15px;
|
font-size: 15px;
|
||||||
font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
|
font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
|
||||||
line-height: 22.5px !important;
|
line-height: 22.5px !important;
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
.message {
|
.message {
|
||||||
padding-bottom: 12.5px;
|
padding-bottom: 22px;
|
||||||
padding-top: 12.5px;
|
padding-top: 3px;
|
||||||
font-size: 15px;
|
font-size: 15px;
|
||||||
font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
|
font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
|
||||||
line-height: 1.428571429;
|
line-height: 1.428571429;
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
.message {
|
.message {
|
||||||
padding-bottom: 12.5px;
|
padding-bottom: 22px;
|
||||||
padding-top: 12.5px;
|
padding-top: 3px;
|
||||||
font-size: 15px;
|
font-size: 15px;
|
||||||
font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
|
font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
|
||||||
line-height: 1.428571429;
|
line-height: 1.428571429;
|
||||||
|
|
|
@ -390,7 +390,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
||||||
margin-left: auto;
|
margin-left: auto;
|
||||||
margin-right: auto;
|
margin-right: auto;
|
||||||
flex: 1;
|
flex: 1;
|
||||||
overflow-y: auto;
|
overflow-y: hidden;
|
||||||
display: flex;
|
display: flex;
|
||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
word-break: break-word;
|
word-break: break-word;
|
||||||
|
|
|
@ -152,7 +152,7 @@ const observer = new MutationObserver(function(mutations) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const chatElement = document.getElementById("chat");
|
const chatElement = document.getElementById("chat");
|
||||||
if (chatElement) {
|
if (chatElement && chatElement.getAttribute("data-mode") === "instruct") {
|
||||||
const messagesContainer = chatElement.querySelector(".messages");
|
const messagesContainer = chatElement.querySelector(".messages");
|
||||||
const lastChild = messagesContainer?.lastElementChild;
|
const lastChild = messagesContainer?.lastElementChild;
|
||||||
const prevSibling = lastChild?.previousElementSibling;
|
const prevSibling = lastChild?.previousElementSibling;
|
||||||
|
|
|
@ -347,7 +347,7 @@ remove_button = f'<button class="footer-button footer-remove-button" title="Remo
|
||||||
|
|
||||||
|
|
||||||
def generate_instruct_html(history):
|
def generate_instruct_html(history):
|
||||||
output = f'<style>{instruct_css}</style><div class="chat" id="chat"><div class="messages">'
|
output = f'<style>{instruct_css}</style><div class="chat" id="chat" data-mode="instruct"><div class="messages">'
|
||||||
|
|
||||||
for i in range(len(history['visible'])):
|
for i in range(len(history['visible'])):
|
||||||
row_visible = history['visible'][i]
|
row_visible = history['visible'][i]
|
||||||
|
|
|
@ -72,6 +72,7 @@ def get_model_metadata(model):
|
||||||
model_settings['compress_pos_emb'] = metadata[k]
|
model_settings['compress_pos_emb'] = metadata[k]
|
||||||
elif k.endswith('block_count'):
|
elif k.endswith('block_count'):
|
||||||
model_settings['gpu_layers'] = metadata[k] + 1
|
model_settings['gpu_layers'] = metadata[k] + 1
|
||||||
|
model_settings['max_gpu_layers'] = metadata[k] + 1
|
||||||
|
|
||||||
if 'tokenizer.chat_template' in metadata:
|
if 'tokenizer.chat_template' in metadata:
|
||||||
template = metadata['tokenizer.chat_template']
|
template = metadata['tokenizer.chat_template']
|
||||||
|
@ -153,6 +154,9 @@ def get_model_metadata(model):
|
||||||
for pat in settings:
|
for pat in settings:
|
||||||
if re.match(pat.lower(), Path(model).name.lower()):
|
if re.match(pat.lower(), Path(model).name.lower()):
|
||||||
for k in settings[pat]:
|
for k in settings[pat]:
|
||||||
|
if k == 'n_gpu_layers':
|
||||||
|
k = 'gpu_layers'
|
||||||
|
|
||||||
model_settings[k] = settings[pat][k]
|
model_settings[k] = settings[pat][k]
|
||||||
|
|
||||||
# Load instruction template if defined by name rather than by value
|
# Load instruction template if defined by name rather than by value
|
||||||
|
@ -450,17 +454,31 @@ def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type,
|
||||||
max_layers = gpu_layers
|
max_layers = gpu_layers
|
||||||
|
|
||||||
if auto_adjust:
|
if auto_adjust:
|
||||||
# Get max layers from model metadata
|
# Get model settings including user preferences
|
||||||
model_settings = get_model_metadata(model)
|
model_settings = get_model_metadata(model)
|
||||||
max_layers = model_settings.get('gpu_layers', gpu_layers)
|
|
||||||
|
|
||||||
# Auto-adjust based on available VRAM
|
# Get the true maximum layers
|
||||||
available_vram = get_nvidia_free_vram()
|
max_layers = model_settings.get('max_gpu_layers', model_settings.get('gpu_layers', gpu_layers))
|
||||||
if available_vram > 0:
|
|
||||||
tolerance = 906
|
# Check if this is a user-saved setting
|
||||||
current_layers = max_layers
|
user_config = shared.user_config
|
||||||
while current_layers > 0 and estimate_vram(model, current_layers, ctx_size, cache_type) > available_vram - tolerance:
|
model_regex = Path(model).name + '$'
|
||||||
current_layers -= 1
|
has_user_setting = model_regex in user_config and 'gpu_layers' in user_config[model_regex]
|
||||||
|
|
||||||
|
if has_user_setting:
|
||||||
|
# For user settings, just use the current value (which already has user pref)
|
||||||
|
# but ensure the slider maximum is correct
|
||||||
|
current_layers = gpu_layers # Already has user setting
|
||||||
|
else:
|
||||||
|
# No user setting, auto-adjust from the maximum
|
||||||
|
current_layers = max_layers # Start from max
|
||||||
|
|
||||||
|
# Auto-adjust based on available VRAM
|
||||||
|
available_vram = get_nvidia_free_vram()
|
||||||
|
if available_vram > 0:
|
||||||
|
tolerance = 906
|
||||||
|
while current_layers > 0 and estimate_vram(model, current_layers, ctx_size, cache_type) > available_vram - tolerance:
|
||||||
|
current_layers -= 1
|
||||||
|
|
||||||
# Calculate VRAM with current layers
|
# Calculate VRAM with current layers
|
||||||
vram_usage = estimate_vram(model, current_layers, ctx_size, cache_type)
|
vram_usage = estimate_vram(model, current_layers, ctx_size, cache_type)
|
||||||
|
|
|
@ -318,7 +318,7 @@ def get_initial_vram_info():
|
||||||
def get_initial_gpu_layers_max():
|
def get_initial_gpu_layers_max():
|
||||||
if shared.model_name != 'None' and shared.args.loader == 'llama.cpp':
|
if shared.model_name != 'None' and shared.args.loader == 'llama.cpp':
|
||||||
model_settings = get_model_metadata(shared.model_name)
|
model_settings = get_model_metadata(shared.model_name)
|
||||||
return model_settings.get('gpu_layers', 256)
|
return model_settings.get('max_gpu_layers', model_settings.get('gpu_layers', 256))
|
||||||
|
|
||||||
return 256
|
return 256
|
||||||
|
|
||||||
|
|
|
@ -137,7 +137,7 @@ def get_available_models():
|
||||||
|
|
||||||
model_dirs = sorted(model_dirs, key=natural_keys)
|
model_dirs = sorted(model_dirs, key=natural_keys)
|
||||||
|
|
||||||
return ['None'] + filtered_gguf_files + model_dirs
|
return filtered_gguf_files + model_dirs
|
||||||
|
|
||||||
|
|
||||||
def get_available_ggufs():
|
def get_available_ggufs():
|
||||||
|
|
Loading…
Add table
Reference in a new issue