mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-06-07 14:17:09 -04:00
commit
ae61c1a0f4
6 changed files with 46 additions and 13 deletions
18
README.md
18
README.md
|
@ -14,18 +14,18 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
|
||||||
|
|
||||||
- Supports multiple text generation backends in one UI/API, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), [ExLlamaV2](https://github.com/turboderp-org/exllamav2), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) (the latter via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile)).
|
- Supports multiple text generation backends in one UI/API, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), [ExLlamaV2](https://github.com/turboderp-org/exllamav2), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) (the latter via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile)).
|
||||||
- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory.
|
- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory.
|
||||||
- **File attachments**: Upload text files and PDF documents directly in conversations to talk about their contents.
|
- 100% offline and private, with zero telemetry, external resources, or remote update requests.
|
||||||
- **Web search**: Optionally search the internet with LLM-generated queries based on your input to add context to the conversation.
|
|
||||||
- Advanced chat management: Edit messages, navigate between message versions, and branch conversations at any point.
|
|
||||||
- Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats.
|
- Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats.
|
||||||
- Automatic GPU layers for GGUF models (on NVIDIA GPUs).
|
- **File attachments**: Upload text files and PDF documents to talk about their contents.
|
||||||
- UI that resembles the original ChatGPT style.
|
- **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation.
|
||||||
- Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`.
|
- Aesthetic UI with dark and light themes.
|
||||||
- Free-form text generation in the Default/Notebook tabs without being limited to chat turns. You can send formatted conversations from the Chat tab to these.
|
- `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters.
|
||||||
|
- Edit messages, navigate between message versions, and branch conversations at any point.
|
||||||
- Multiple sampling parameters and generation options for sophisticated text generation control.
|
- Multiple sampling parameters and generation options for sophisticated text generation control.
|
||||||
- Switch between different models easily in the UI without restarting, with fine control over settings.
|
- Switch between different models in the UI without restarting.
|
||||||
|
- Automatic GPU layers for GGUF models (on NVIDIA GPUs).
|
||||||
|
- Free-form text generation in the Default/Notebook tabs without being limited to chat turns.
|
||||||
- OpenAI-compatible API with Chat and Completions endpoints, including tool-calling support – see [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples).
|
- OpenAI-compatible API with Chat and Completions endpoints, including tool-calling support – see [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples).
|
||||||
- 100% offline and private, with zero telemetry, external resources, or remote update requests. Web search is optional and user-controlled.
|
|
||||||
- Extension support, with numerous built-in and user-contributed extensions available. See the [wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details.
|
- Extension support, with numerous built-in and user-contributed extensions available. See the [wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details.
|
||||||
|
|
||||||
## How to install
|
## How to install
|
||||||
|
|
|
@ -1551,3 +1551,7 @@ strong {
|
||||||
color: var(--body-text-color-subdued);
|
color: var(--body-text-color-subdued);
|
||||||
margin-top: 4px;
|
margin-top: 4px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
button:focus {
|
||||||
|
outline: none;
|
||||||
|
}
|
||||||
|
|
|
@ -806,9 +806,12 @@ def remove_last_message(history):
|
||||||
return html.unescape(last[0]), history
|
return html.unescape(last[0]), history
|
||||||
|
|
||||||
|
|
||||||
def send_dummy_message(textbox, state):
|
def send_dummy_message(text, state):
|
||||||
history = state['history']
|
history = state['history']
|
||||||
text = textbox['text']
|
|
||||||
|
# Handle both dict and string inputs
|
||||||
|
if isinstance(text, dict):
|
||||||
|
text = text['text']
|
||||||
|
|
||||||
# Initialize metadata if not present
|
# Initialize metadata if not present
|
||||||
if 'metadata' not in history:
|
if 'metadata' not in history:
|
||||||
|
@ -822,9 +825,12 @@ def send_dummy_message(textbox, state):
|
||||||
return history
|
return history
|
||||||
|
|
||||||
|
|
||||||
def send_dummy_reply(textbox, state):
|
def send_dummy_reply(text, state):
|
||||||
history = state['history']
|
history = state['history']
|
||||||
text = textbox['text']
|
|
||||||
|
# Handle both dict and string inputs
|
||||||
|
if isinstance(text, dict):
|
||||||
|
text = text['text']
|
||||||
|
|
||||||
# Initialize metadata if not present
|
# Initialize metadata if not present
|
||||||
if 'metadata' not in history:
|
if 'metadata' not in history:
|
||||||
|
|
|
@ -245,3 +245,20 @@ class Exllamav3HF(PreTrainedModel, GenerationMixin):
|
||||||
pretrained_model_name_or_path = Path(f'{shared.args.model_dir}') / Path(pretrained_model_name_or_path)
|
pretrained_model_name_or_path = Path(f'{shared.args.model_dir}') / Path(pretrained_model_name_or_path)
|
||||||
|
|
||||||
return Exllamav3HF(pretrained_model_name_or_path)
|
return Exllamav3HF(pretrained_model_name_or_path)
|
||||||
|
|
||||||
|
def unload(self):
|
||||||
|
"""Properly unload the ExllamaV3 model and free GPU memory."""
|
||||||
|
if hasattr(self, 'ex_model') and self.ex_model is not None:
|
||||||
|
self.ex_model.unload()
|
||||||
|
self.ex_model = None
|
||||||
|
|
||||||
|
if hasattr(self, 'ex_cache') and self.ex_cache is not None:
|
||||||
|
self.ex_cache = None
|
||||||
|
|
||||||
|
# Clean up any additional ExllamaV3 resources
|
||||||
|
if hasattr(self, 'past_seq'):
|
||||||
|
self.past_seq = None
|
||||||
|
if hasattr(self, 'past_seq_negative'):
|
||||||
|
self.past_seq_negative = None
|
||||||
|
if hasattr(self, 'ex_cache_negative'):
|
||||||
|
self.ex_cache_negative = None
|
||||||
|
|
|
@ -116,10 +116,13 @@ def unload_model(keep_model_name=False):
|
||||||
return
|
return
|
||||||
|
|
||||||
is_llamacpp = (shared.model.__class__.__name__ == 'LlamaServer')
|
is_llamacpp = (shared.model.__class__.__name__ == 'LlamaServer')
|
||||||
|
if shared.args.loader == 'ExLlamav3_HF':
|
||||||
|
shared.model.unload()
|
||||||
|
|
||||||
shared.model = shared.tokenizer = None
|
shared.model = shared.tokenizer = None
|
||||||
shared.lora_names = []
|
shared.lora_names = []
|
||||||
shared.model_dirty_from_training = False
|
shared.model_dirty_from_training = False
|
||||||
|
|
||||||
if not is_llamacpp:
|
if not is_llamacpp:
|
||||||
from modules.torch_utils import clear_torch_cache
|
from modules.torch_utils import clear_torch_cache
|
||||||
clear_torch_cache()
|
clear_torch_cache()
|
||||||
|
|
|
@ -71,6 +71,7 @@ if not shared.args.old_colors:
|
||||||
block_background_fill_dark='transparent',
|
block_background_fill_dark='transparent',
|
||||||
block_border_color_dark='transparent',
|
block_border_color_dark='transparent',
|
||||||
input_border_color_dark='var(--border-color-dark)',
|
input_border_color_dark='var(--border-color-dark)',
|
||||||
|
input_border_color_focus_dark='var(--border-color-dark)',
|
||||||
checkbox_border_color_dark='var(--border-color-dark)',
|
checkbox_border_color_dark='var(--border-color-dark)',
|
||||||
border_color_primary_dark='var(--border-color-dark)',
|
border_color_primary_dark='var(--border-color-dark)',
|
||||||
button_secondary_border_color_dark='var(--border-color-dark)',
|
button_secondary_border_color_dark='var(--border-color-dark)',
|
||||||
|
@ -89,6 +90,8 @@ if not shared.args.old_colors:
|
||||||
checkbox_label_shadow='none',
|
checkbox_label_shadow='none',
|
||||||
block_shadow='none',
|
block_shadow='none',
|
||||||
block_shadow_dark='none',
|
block_shadow_dark='none',
|
||||||
|
input_shadow_focus='none',
|
||||||
|
input_shadow_focus_dark='none',
|
||||||
button_large_radius='0.375rem',
|
button_large_radius='0.375rem',
|
||||||
button_large_padding='6px 12px',
|
button_large_padding='6px 12px',
|
||||||
input_radius='0.375rem',
|
input_radius='0.375rem',
|
||||||
|
|
Loading…
Add table
Reference in a new issue