mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-06-07 14:17:09 -04:00
UI: Fix gpu-layers being ignored (closes #6973)
This commit is contained in:
parent
c375b69413
commit
3fa1a899ae
4 changed files with 4 additions and 4 deletions
|
@ -5,7 +5,7 @@ import gradio as gr
|
|||
|
||||
loaders_and_params = OrderedDict({
|
||||
'llama.cpp': [
|
||||
'n_gpu_layers',
|
||||
'gpu_layers',
|
||||
'threads',
|
||||
'threads_batch',
|
||||
'batch_size',
|
||||
|
|
|
@ -67,7 +67,7 @@ def get_model_metadata(model):
|
|||
elif k.endswith('rope.scaling.factor'):
|
||||
model_settings['compress_pos_emb'] = metadata[k]
|
||||
elif k.endswith('block_count'):
|
||||
model_settings['n_gpu_layers'] = metadata[k] + 1
|
||||
model_settings['gpu_layers'] = metadata[k] + 1
|
||||
|
||||
if 'tokenizer.chat_template' in metadata:
|
||||
template = metadata['tokenizer.chat_template']
|
||||
|
|
|
@ -105,7 +105,7 @@ def list_model_elements():
|
|||
'filter_by_loader',
|
||||
'loader',
|
||||
'cpu_memory',
|
||||
'n_gpu_layers',
|
||||
'gpu_layers',
|
||||
'threads',
|
||||
'threads_batch',
|
||||
'batch_size',
|
||||
|
|
|
@ -50,7 +50,7 @@ def create_ui():
|
|||
gr.Markdown("## Main options")
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
shared.gradio['n_gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=256, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.')
|
||||
shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=256, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.')
|
||||
shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. ⚠️ Lower this value if you can\'t load the model.')
|
||||
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
|
||||
shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).')
|
||||
|
|
Loading…
Add table
Reference in a new issue