mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-06-07 14:17:09 -04:00
Remove obsolete references to llamacpp_HF
This commit is contained in:
parent
a0abf93425
commit
d68f0fbdf7
3 changed files with 2 additions and 5 deletions
|
@ -65,10 +65,8 @@ def _get_next_logits(prompt, state, use_samplers, previous, top_logits=25, retur
|
|||
|
||||
if use_samplers:
|
||||
if is_non_hf_exllamav2:
|
||||
# sampling is all done in C++ for exllama, so it is really hard to hijack
|
||||
logger.error("Sampler hijacking is not supported non-Huggingface loaders.")
|
||||
# sampling is all done in c for exllama, so it is really hard to hijack
|
||||
# it should be possible to hijack llamacpp sampler by hijacking all their sampling methods,
|
||||
# but it is not implemented yet
|
||||
return 'Error: Sampler hijacking is not supported non-Huggingface loaders. Please disable the "Use samplers" option.', previous
|
||||
|
||||
state['max_new_tokens'] = 1
|
||||
|
|
|
@ -220,7 +220,7 @@ def fix_loader_name(name):
|
|||
return name
|
||||
|
||||
name = name.lower()
|
||||
if name in ['llamacpp', 'llama.cpp', 'llama-cpp', 'llama cpp']:
|
||||
if name in ['llama.cpp', 'llamacpp', 'llama-cpp', 'llama cpp']:
|
||||
return 'llama.cpp'
|
||||
elif name in ['transformers', 'huggingface', 'hf', 'hugging_face', 'hugging face']:
|
||||
return 'Transformers'
|
||||
|
|
|
@ -127,7 +127,6 @@ def create_ui():
|
|||
shared.gradio['cpp_runner'] = gr.Checkbox(label="cpp-runner", value=shared.args.cpp_runner, info='Enable inference with ModelRunnerCpp, which is faster than the default ModelRunner.')
|
||||
shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='Set trust_remote_code=True while loading the tokenizer/model. To enable this option, start the web UI with the --trust-remote-code flag.', interactive=shared.args.trust_remote_code)
|
||||
shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info='Set use_fast=False while loading the tokenizer.')
|
||||
shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to place your GGUF in a subfolder of models/ with the necessary tokenizer files.\n\nYou can use the \"llamacpp_HF creator\" menu to do that automatically.")
|
||||
shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
|
||||
shared.gradio['tensorrt_llm_info'] = gr.Markdown('* TensorRT-LLM has to be installed manually in a separate Python 3.10 environment at the moment. For a guide, consult the description of [this PR](https://github.com/oobabooga/text-generation-webui/pull/5715). \n\n* `max_seq_len` is only used when `cpp-runner` is checked.\n\n* `cpp_runner` does not support streaming at the moment.')
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue