mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-06-07 14:17:09 -04:00
Merge branch 'dev' into dev
This commit is contained in:
commit
fee48eb2a8
39 changed files with 629 additions and 262 deletions
10
README.md
10
README.md
|
@ -12,10 +12,8 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
|
|||
|
||||
## Features
|
||||
|
||||
- Supports multiple text generation backends in one UI/API, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [ExLlamaV2](https://github.com/turboderp-org/exllamav2).
|
||||
- [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) is also supported via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile).
|
||||
- Additional quantization libraries like [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM) can be used with the Transformers loader if you install them manually.
|
||||
- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for llama.cpp GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory that doesn't interfere with your system environment.
|
||||
- Supports multiple text generation backends in one UI/API, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), [ExLlamaV2](https://github.com/turboderp-org/exllamav2), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) (the latter via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile)).
|
||||
- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory that doesn't interfere with your system environment.
|
||||
- UI that resembles the original ChatGPT style.
|
||||
- Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats.
|
||||
- Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`.
|
||||
|
@ -146,14 +144,14 @@ The `requirements*.txt` above contain various wheels precompiled through GitHub
|
|||
For NVIDIA GPU:
|
||||
ln -s docker/{nvidia/Dockerfile,nvidia/docker-compose.yml,.dockerignore} .
|
||||
For AMD GPU:
|
||||
ln -s docker/{amd/Dockerfile,intel/docker-compose.yml,.dockerignore} .
|
||||
ln -s docker/{amd/Dockerfile,amd/docker-compose.yml,.dockerignore} .
|
||||
For Intel GPU:
|
||||
ln -s docker/{intel/Dockerfile,amd/docker-compose.yml,.dockerignore} .
|
||||
For CPU only
|
||||
ln -s docker/{cpu/Dockerfile,cpu/docker-compose.yml,.dockerignore} .
|
||||
cp docker/.env.example .env
|
||||
#Create logs/cache dir :
|
||||
mkdir -p logs cache
|
||||
mkdir -p user_data/logs user_data/cache
|
||||
# Edit .env and set:
|
||||
# TORCH_CUDA_ARCH_LIST based on your GPU model
|
||||
# APP_RUNTIME_GID your host user's group id (run `id -g` in a terminal)
|
||||
|
|
121
css/main.css
121
css/main.css
|
@ -131,7 +131,7 @@ gradio-app > :first-child {
|
|||
}
|
||||
|
||||
.header_bar {
|
||||
box-shadow: 0 0 3px rgba(22 22 22 / 35%);
|
||||
border-right: var(--input-border-width) solid var(--input-border-color);
|
||||
margin-bottom: 0;
|
||||
overflow-x: scroll;
|
||||
text-wrap: nowrap;
|
||||
|
@ -419,6 +419,14 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
|||
padding-right: 1rem;
|
||||
}
|
||||
|
||||
.chat .message .timestamp {
|
||||
font-size: 0.7em;
|
||||
display: inline-block;
|
||||
font-weight: normal;
|
||||
opacity: 0.7;
|
||||
margin-left: 5px;
|
||||
}
|
||||
|
||||
.chat-parent.bigchat {
|
||||
flex: 1;
|
||||
}
|
||||
|
@ -584,6 +592,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
|||
padding: 0.65rem 2.5rem;
|
||||
border: 0;
|
||||
box-shadow: 0;
|
||||
border-radius: 8px;
|
||||
}
|
||||
|
||||
#chat-input textarea::placeholder {
|
||||
|
@ -603,6 +612,16 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
|||
display: none;
|
||||
}
|
||||
|
||||
#chat-input .submit-button {
|
||||
display: none;
|
||||
}
|
||||
|
||||
#chat-input .upload-button {
|
||||
margin-right: 16px;
|
||||
margin-bottom: 7px;
|
||||
background: transparent;
|
||||
}
|
||||
|
||||
.chat-input-positioned {
|
||||
max-width: 54rem;
|
||||
left: 50%;
|
||||
|
@ -827,7 +846,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
|||
}
|
||||
|
||||
#chat-col.bigchat {
|
||||
padding-bottom: 80px !important;
|
||||
padding-bottom: 15px !important;
|
||||
}
|
||||
|
||||
.message-body ol, .message-body ul {
|
||||
|
@ -1171,11 +1190,11 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
|||
background-color: var(--light-theme-gray);
|
||||
}
|
||||
|
||||
#chat-controls {
|
||||
.dark #chat-controls {
|
||||
border-left: 1px solid #d9d9d0;
|
||||
}
|
||||
|
||||
#past-chats-row {
|
||||
.dark #past-chats-row {
|
||||
border-right: 1px solid #d9d9d0;
|
||||
}
|
||||
|
||||
|
@ -1236,42 +1255,31 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
|||
position: relative;
|
||||
}
|
||||
|
||||
.footer-button {
|
||||
/* New container for the buttons */
|
||||
.message-actions {
|
||||
position: absolute;
|
||||
bottom: -23px;
|
||||
left: 0;
|
||||
display: flex;
|
||||
gap: 5px;
|
||||
opacity: 0;
|
||||
transition: opacity 0.2s;
|
||||
}
|
||||
|
||||
.footer-button {
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
border: none;
|
||||
border-radius: 3px;
|
||||
cursor: pointer;
|
||||
opacity: 0;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
transition: opacity 0.2s;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.footer-button.footer-copy-button {
|
||||
bottom: -23px;
|
||||
left: 0;
|
||||
}
|
||||
|
||||
.footer-button.footer-refresh-button {
|
||||
bottom: -23px;
|
||||
left: 25px;
|
||||
}
|
||||
|
||||
.footer-button.footer-continue-button {
|
||||
bottom: -23px;
|
||||
left: 50px;
|
||||
}
|
||||
|
||||
.footer-button.footer-remove-button {
|
||||
bottom: -23px;
|
||||
left: 75px;
|
||||
}
|
||||
|
||||
.message:hover .footer-button,
|
||||
.user-message:hover .footer-button,
|
||||
.assistant-message:hover .footer-button {
|
||||
.message:hover .message-actions,
|
||||
.user-message:hover .message-actions,
|
||||
.assistant-message:hover .message-actions {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
|
@ -1362,6 +1370,11 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
|||
contain: layout;
|
||||
}
|
||||
|
||||
.chat .message-body .thinking-content p,
|
||||
.chat .message-body .thinking-content li {
|
||||
font-size: 15px !important;
|
||||
}
|
||||
|
||||
/* Animation for opening thinking blocks */
|
||||
@keyframes fadeIn {
|
||||
from { opacity: 0; }
|
||||
|
@ -1399,6 +1412,53 @@ strong {
|
|||
color: #07ff07;
|
||||
}
|
||||
|
||||
|
||||
.message-attachments {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 8px;
|
||||
margin-top: 8px;
|
||||
}
|
||||
|
||||
.attachment-box {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 8px;
|
||||
background: rgb(0 0 0 / 5%);
|
||||
border-radius: 6px;
|
||||
border: 1px solid rgb(0 0 0 / 10%);
|
||||
min-width: 80px;
|
||||
max-width: 120px;
|
||||
}
|
||||
|
||||
.attachment-icon {
|
||||
margin-bottom: 4px;
|
||||
color: #555;
|
||||
}
|
||||
|
||||
.attachment-name {
|
||||
font-size: 0.8em;
|
||||
text-align: center;
|
||||
word-break: break-word;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
display: -webkit-box;
|
||||
-webkit-line-clamp: 2;
|
||||
-webkit-box-orient: vertical;
|
||||
}
|
||||
|
||||
.dark .attachment-box {
|
||||
background: rgb(255 255 255 / 5%);
|
||||
border: 1px solid rgb(255 255 255 / 10%);
|
||||
}
|
||||
|
||||
.dark .attachment-icon {
|
||||
color: #ccc;
|
||||
}
|
||||
|
||||
|
||||
/* --- Message Versioning Styles --- */
|
||||
|
||||
.message-versioning-container {
|
||||
|
@ -1490,4 +1550,3 @@ strong {
|
|||
|
||||
.message-versioning-container[hidden] {
|
||||
display: none;
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ WORKDIR /home/app/
|
|||
RUN git clone https://github.com/oobabooga/text-generation-webui.git
|
||||
WORKDIR /home/app/text-generation-webui
|
||||
RUN GPU_CHOICE=B LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
|
||||
COPY CMD_FLAGS.txt /home/app/text-generation-webui/
|
||||
COPY /user_data/CMD_FLAGS.txt /home/app/text-generation-webui/user_data
|
||||
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
|
||||
WORKDIR /home/app/text-generation-webui
|
||||
# set umask to ensure group read / write at runtime
|
||||
|
|
|
@ -41,14 +41,4 @@ services:
|
|||
security_opt:
|
||||
- seccomp=unconfined
|
||||
volumes:
|
||||
- ./cache:/home/app/text-generation-webui/cache
|
||||
- ./characters:/home/app/text-generation-webui/characters
|
||||
- ./extensions:/home/app/text-generation-webui/extensions
|
||||
- ./loras:/home/app/text-generation-webui/loras
|
||||
- ./logs:/home/app/text-generation-webui/logs
|
||||
- ./models:/home/app/text-generation-webui/models
|
||||
- ./presets:/home/app/text-generation-webui/presets
|
||||
- ./prompts:/home/app/text-generation-webui/prompts
|
||||
- ./softprompts:/home/app/text-generation-webui/softprompts
|
||||
- ./training:/home/app/text-generation-webui/training
|
||||
- ./cloudflared:/etc/cloudflared
|
||||
- ./user_data:/home/app/text-generation-webui/user_data
|
||||
|
|
|
@ -14,7 +14,7 @@ WORKDIR /home/app/
|
|||
RUN git clone https://github.com/oobabooga/text-generation-webui.git
|
||||
WORKDIR /home/app/text-generation-webui
|
||||
RUN GPU_CHOICE=D LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
|
||||
COPY CMD_FLAGS.txt /home/app/text-generation-webui/
|
||||
COPY /user_data/CMD_FLAGS.txt /home/app/text-generation-webui/user_data
|
||||
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
|
||||
# set umask to ensure group read / write at runtime
|
||||
WORKDIR /home/app/text-generation-webui
|
||||
|
|
|
@ -41,12 +41,4 @@ services:
|
|||
security_opt:
|
||||
- seccomp=unconfined
|
||||
volumes:
|
||||
- ./characters:/home/app/text-generation-webui/characters
|
||||
- ./extensions:/home/app/text-generation-webui/extensions
|
||||
- ./loras:/home/app/text-generation-webui/loras
|
||||
- ./models:/home/app/text-generation-webui/models
|
||||
- ./presets:/home/app/text-generation-webui/presets
|
||||
- ./prompts:/home/app/text-generation-webui/prompts
|
||||
- ./softprompts:/home/app/text-generation-webui/softprompts
|
||||
- ./training:/home/app/text-generation-webui/training
|
||||
- ./cloudflared:/etc/cloudflared
|
||||
- ./user_data:/home/app/text-generation-webui/user_data
|
||||
|
|
|
@ -115,6 +115,7 @@ async def openai_completions(request: Request, request_data: CompletionRequest):
|
|||
if request_data.stream:
|
||||
async def generator():
|
||||
async with streaming_semaphore:
|
||||
try:
|
||||
response = OAIcompletions.stream_completions(to_dict(request_data), is_legacy=is_legacy)
|
||||
async for resp in iterate_in_threadpool(response):
|
||||
disconnected = await request.is_disconnected()
|
||||
|
@ -122,6 +123,9 @@ async def openai_completions(request: Request, request_data: CompletionRequest):
|
|||
break
|
||||
|
||||
yield {"data": json.dumps(resp)}
|
||||
finally:
|
||||
stop_everything_event()
|
||||
return
|
||||
|
||||
return EventSourceResponse(generator()) # SSE streaming
|
||||
|
||||
|
@ -143,6 +147,7 @@ async def openai_chat_completions(request: Request, request_data: ChatCompletion
|
|||
if request_data.stream:
|
||||
async def generator():
|
||||
async with streaming_semaphore:
|
||||
try:
|
||||
response = OAIcompletions.stream_chat_completions(to_dict(request_data), is_legacy=is_legacy)
|
||||
async for resp in iterate_in_threadpool(response):
|
||||
disconnected = await request.is_disconnected()
|
||||
|
@ -150,6 +155,9 @@ async def openai_chat_completions(request: Request, request_data: ChatCompletion
|
|||
break
|
||||
|
||||
yield {"data": json.dumps(resp)}
|
||||
finally:
|
||||
stop_everything_event()
|
||||
return
|
||||
|
||||
return EventSourceResponse(generator()) # SSE streaming
|
||||
|
||||
|
|
|
@ -18,6 +18,37 @@ function copyToClipboard(element) {
|
|||
});
|
||||
}
|
||||
|
||||
function branchHere(element) {
|
||||
if (!element) return;
|
||||
|
||||
const messageElement = element.closest(".message, .user-message, .assistant-message");
|
||||
if (!messageElement) return;
|
||||
|
||||
const index = messageElement.getAttribute("data-index");
|
||||
if (!index) return;
|
||||
|
||||
const branchIndexInput = document.getElementById("Branch-index").querySelector("input");
|
||||
if (!branchIndexInput) {
|
||||
console.error("Element with ID 'Branch-index' not found.");
|
||||
return;
|
||||
}
|
||||
const branchButton = document.getElementById("Branch");
|
||||
|
||||
if (!branchButton) {
|
||||
console.error("Required element 'Branch' not found.");
|
||||
return;
|
||||
}
|
||||
|
||||
branchIndexInput.value = index;
|
||||
|
||||
// Trigger any 'change' or 'input' events Gradio might be listening for
|
||||
const event = new Event("input", { bubbles: true }); // 'change' might also work
|
||||
branchIndexInput.dispatchEvent(event);
|
||||
|
||||
branchButton.click(); // Gradio will now pick up the 'index'
|
||||
|
||||
}
|
||||
|
||||
function regenerateClick() {
|
||||
document.getElementById("Regenerate").click();
|
||||
}
|
||||
|
|
28
js/main.js
28
js/main.js
|
@ -132,8 +132,6 @@ targetElement.addEventListener("scroll", function() {
|
|||
|
||||
// Create a MutationObserver instance
|
||||
const observer = new MutationObserver(function(mutations) {
|
||||
updateCssProperties();
|
||||
|
||||
if (targetElement.classList.contains("_generating")) {
|
||||
typing.parentNode.classList.add("visible-dots");
|
||||
document.getElementById("stop").style.display = "flex";
|
||||
|
@ -446,32 +444,6 @@ const chatInput = document.querySelector("#chat-input textarea");
|
|||
// Variables to store current dimensions
|
||||
let currentChatInputHeight = chatInput.clientHeight;
|
||||
|
||||
// Update chat layout based on chat and input dimensions
|
||||
function updateCssProperties() {
|
||||
const chatInputHeight = chatInput.clientHeight;
|
||||
|
||||
// Check if the chat container is visible
|
||||
if (chatContainer.clientHeight > 0) {
|
||||
// Adjust scrollTop based on input height change
|
||||
if (chatInputHeight !== currentChatInputHeight) {
|
||||
const deltaHeight = chatInputHeight - currentChatInputHeight;
|
||||
if (!isScrolled && deltaHeight < 0) {
|
||||
chatContainer.scrollTop = chatContainer.scrollHeight;
|
||||
} else {
|
||||
chatContainer.scrollTop += deltaHeight;
|
||||
}
|
||||
|
||||
currentChatInputHeight = chatInputHeight;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Observe textarea size changes and call update function
|
||||
new ResizeObserver(updateCssProperties).observe(document.querySelector("#chat-input textarea"));
|
||||
|
||||
// Handle changes in window size
|
||||
window.addEventListener("resize", updateCssProperties);
|
||||
|
||||
//------------------------------------------------
|
||||
// Focus on the rename text area when it becomes visible
|
||||
//------------------------------------------------
|
||||
|
|
281
modules/chat.py
281
modules/chat.py
|
@ -37,6 +37,30 @@ def strftime_now(format):
|
|||
return datetime.now().strftime(format)
|
||||
|
||||
|
||||
def get_current_timestamp():
|
||||
"""Returns the current time in 24-hour format"""
|
||||
return datetime.now().strftime('%b %d, %Y %H:%M')
|
||||
|
||||
|
||||
def update_message_metadata(metadata_dict, role, index, **fields):
|
||||
"""
|
||||
Updates or adds metadata fields for a specific message.
|
||||
|
||||
Args:
|
||||
metadata_dict: The metadata dictionary
|
||||
role: The role (user, assistant, etc)
|
||||
index: The message index
|
||||
**fields: Arbitrary metadata fields to update/add
|
||||
"""
|
||||
key = f"{role}_{index}"
|
||||
if key not in metadata_dict:
|
||||
metadata_dict[key] = {}
|
||||
|
||||
# Update with provided fields
|
||||
for field_name, field_value in fields.items():
|
||||
metadata_dict[key][field_name] = field_value
|
||||
|
||||
|
||||
jinja_env = ImmutableSandboxedEnvironment(
|
||||
trim_blocks=True,
|
||||
lstrip_blocks=True,
|
||||
|
@ -133,7 +157,9 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
|||
impersonate = kwargs.get('impersonate', False)
|
||||
_continue = kwargs.get('_continue', False)
|
||||
also_return_rows = kwargs.get('also_return_rows', False)
|
||||
history = kwargs.get('history', state['history'])['internal']
|
||||
history_data = kwargs.get('history', state['history'])
|
||||
history = history_data['internal']
|
||||
metadata = history_data.get('metadata', {})
|
||||
|
||||
# Templates
|
||||
chat_template_str = state['chat_template_str']
|
||||
|
@ -172,11 +198,13 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
|||
messages.append({"role": "system", "content": context})
|
||||
|
||||
insert_pos = len(messages)
|
||||
for entry in reversed(history):
|
||||
for i, entry in enumerate(reversed(history)):
|
||||
user_msg = entry[0].strip()
|
||||
assistant_msg = entry[1].strip()
|
||||
tool_msg = entry[2].strip() if len(entry) > 2 else ''
|
||||
|
||||
row_idx = len(history) - i - 1
|
||||
|
||||
if tool_msg:
|
||||
messages.insert(insert_pos, {"role": "tool", "content": tool_msg})
|
||||
|
||||
|
@ -184,10 +212,40 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
|||
messages.insert(insert_pos, {"role": "assistant", "content": assistant_msg})
|
||||
|
||||
if user_msg not in ['', '<|BEGIN-VISIBLE-CHAT|>']:
|
||||
messages.insert(insert_pos, {"role": "user", "content": user_msg})
|
||||
# Check for user message attachments in metadata
|
||||
user_key = f"user_{row_idx}"
|
||||
enhanced_user_msg = user_msg
|
||||
|
||||
# Add attachment content if present
|
||||
if user_key in metadata and "attachments" in metadata[user_key]:
|
||||
attachments_text = ""
|
||||
for attachment in metadata[user_key]["attachments"]:
|
||||
filename = attachment.get("name", "file")
|
||||
content = attachment.get("content", "")
|
||||
attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
|
||||
|
||||
if attachments_text:
|
||||
enhanced_user_msg = f"{user_msg}\n\nATTACHMENTS:\n{attachments_text}"
|
||||
|
||||
messages.insert(insert_pos, {"role": "user", "content": enhanced_user_msg})
|
||||
|
||||
user_input = user_input.strip()
|
||||
if user_input and not impersonate and not _continue:
|
||||
# For the current user input being processed, check if we need to add attachments
|
||||
if not impersonate and not _continue and len(history_data.get('metadata', {})) > 0:
|
||||
current_row_idx = len(history)
|
||||
user_key = f"user_{current_row_idx}"
|
||||
|
||||
if user_key in metadata and "attachments" in metadata[user_key]:
|
||||
attachments_text = ""
|
||||
for attachment in metadata[user_key]["attachments"]:
|
||||
filename = attachment.get("name", "file")
|
||||
content = attachment.get("content", "")
|
||||
attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
|
||||
|
||||
if attachments_text:
|
||||
user_input = f"{user_input}\n\nATTACHMENTS:\n{attachments_text}"
|
||||
|
||||
messages.append({"role": "user", "content": user_input})
|
||||
|
||||
def make_prompt(messages):
|
||||
|
@ -256,7 +314,6 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
|||
|
||||
# Resort to truncating the user input
|
||||
else:
|
||||
|
||||
user_message = messages[-1]['content']
|
||||
|
||||
# Bisect the truncation point
|
||||
|
@ -341,12 +398,111 @@ def get_stopping_strings(state):
|
|||
return result
|
||||
|
||||
|
||||
def add_message_version(history, row_idx, is_current=True):
|
||||
"""Add the current message as a version in the history metadata"""
|
||||
if 'metadata' not in history:
|
||||
history['metadata'] = {}
|
||||
|
||||
if row_idx >= len(history['internal']) or not history['internal'][row_idx][1].strip():
|
||||
return # Skip if row doesn't exist or message is empty
|
||||
|
||||
key = f"assistant_{row_idx}"
|
||||
|
||||
# Initialize metadata structures if needed
|
||||
if key not in history['metadata']:
|
||||
history['metadata'][key] = {"timestamp": get_current_timestamp()}
|
||||
if "versions" not in history['metadata'][key]:
|
||||
history['metadata'][key]["versions"] = []
|
||||
|
||||
# Add current message as a version
|
||||
history['metadata'][key]["versions"].append({
|
||||
"content": history['internal'][row_idx][1],
|
||||
"visible_content": history['visible'][row_idx][1],
|
||||
"timestamp": get_current_timestamp()
|
||||
})
|
||||
|
||||
# Update index if this is the current version
|
||||
if is_current:
|
||||
history['metadata'][key]["current_version_index"] = len(history['metadata'][key]["versions"]) - 1
|
||||
|
||||
|
||||
def add_message_attachment(history, row_idx, file_path, is_user=True):
|
||||
"""Add a file attachment to a message in history metadata"""
|
||||
if 'metadata' not in history:
|
||||
history['metadata'] = {}
|
||||
|
||||
key = f"{'user' if is_user else 'assistant'}_{row_idx}"
|
||||
|
||||
if key not in history['metadata']:
|
||||
history['metadata'][key] = {"timestamp": get_current_timestamp()}
|
||||
if "attachments" not in history['metadata'][key]:
|
||||
history['metadata'][key]["attachments"] = []
|
||||
|
||||
# Get file info using pathlib
|
||||
path = Path(file_path)
|
||||
filename = path.name
|
||||
file_extension = path.suffix.lower()
|
||||
|
||||
try:
|
||||
# Handle different file types
|
||||
if file_extension == '.pdf':
|
||||
# Process PDF file
|
||||
content = extract_pdf_text(path)
|
||||
file_type = "application/pdf"
|
||||
else:
|
||||
# Default handling for text files
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
file_type = "text/plain"
|
||||
|
||||
# Add attachment
|
||||
attachment = {
|
||||
"name": filename,
|
||||
"type": file_type,
|
||||
"content": content,
|
||||
}
|
||||
|
||||
history['metadata'][key]["attachments"].append(attachment)
|
||||
return content # Return the content for reuse
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing attachment {filename}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def extract_pdf_text(pdf_path):
|
||||
"""Extract text from a PDF file"""
|
||||
import PyPDF2
|
||||
|
||||
text = ""
|
||||
try:
|
||||
with open(pdf_path, 'rb') as file:
|
||||
pdf_reader = PyPDF2.PdfReader(file)
|
||||
for page_num in range(len(pdf_reader.pages)):
|
||||
page = pdf_reader.pages[page_num]
|
||||
text += page.extract_text() + "\n\n"
|
||||
|
||||
return text.strip()
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting text from PDF: {e}")
|
||||
return f"[Error extracting PDF text: {str(e)}]"
|
||||
|
||||
|
||||
def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_message=True, for_ui=False):
|
||||
# Handle dict format with text and files
|
||||
files = []
|
||||
if isinstance(text, dict):
|
||||
files = text.get('files', [])
|
||||
text = text.get('text', '')
|
||||
|
||||
history = state['history']
|
||||
output = copy.deepcopy(history)
|
||||
output = apply_extensions('history', output)
|
||||
state = apply_extensions('state', state)
|
||||
|
||||
# Initialize metadata if not present
|
||||
if 'metadata' not in output:
|
||||
output['metadata'] = {}
|
||||
|
||||
visible_text = None
|
||||
stopping_strings = get_stopping_strings(state)
|
||||
is_stream = state['stream']
|
||||
|
@ -355,44 +511,70 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
|
|||
if not (regenerate or _continue):
|
||||
visible_text = html.escape(text)
|
||||
|
||||
# Process file attachments and store in metadata
|
||||
row_idx = len(output['internal'])
|
||||
|
||||
# Add attachments to metadata only, not modifying the message text
|
||||
for file_path in files:
|
||||
add_message_attachment(output, row_idx, file_path, is_user=True)
|
||||
|
||||
# Apply extensions
|
||||
text, visible_text = apply_extensions('chat_input', text, visible_text, state)
|
||||
text = apply_extensions('input', text, state, is_chat=True)
|
||||
|
||||
# Current row index
|
||||
output['internal'].append([text, ''])
|
||||
output['visible'].append([visible_text, ''])
|
||||
# Add metadata with timestamp
|
||||
update_message_metadata(output['metadata'], "user", row_idx, timestamp=get_current_timestamp())
|
||||
|
||||
# *Is typing...*
|
||||
if loading_message:
|
||||
yield {
|
||||
'visible': output['visible'][:-1] + [[output['visible'][-1][0], shared.processing_message]],
|
||||
'internal': output['internal']
|
||||
'internal': output['internal'],
|
||||
'metadata': output['metadata']
|
||||
}
|
||||
else:
|
||||
text, visible_text = output['internal'][-1][0], output['visible'][-1][0]
|
||||
if regenerate:
|
||||
row_idx = len(output['internal']) - 1
|
||||
|
||||
# Store the existing response as a version before regenerating
|
||||
add_message_version(output, row_idx, is_current=False)
|
||||
|
||||
if loading_message:
|
||||
yield {
|
||||
'visible': output['visible'][:-1] + [[visible_text, shared.processing_message]],
|
||||
'internal': output['internal'][:-1] + [[text, '']]
|
||||
'internal': output['internal'][:-1] + [[text, '']],
|
||||
'metadata': output['metadata']
|
||||
}
|
||||
elif _continue:
|
||||
last_reply = [output['internal'][-1][1], output['visible'][-1][1]]
|
||||
if loading_message:
|
||||
yield {
|
||||
'visible': output['visible'][:-1] + [[visible_text, last_reply[1] + '...']],
|
||||
'internal': output['internal']
|
||||
'internal': output['internal'],
|
||||
'metadata': output['metadata']
|
||||
}
|
||||
|
||||
# Generate the prompt
|
||||
kwargs = {
|
||||
'_continue': _continue,
|
||||
'history': output if _continue else {k: v[:-1] for k, v in output.items()}
|
||||
'history': output if _continue else {
|
||||
k: (v[:-1] if k in ['internal', 'visible'] else v)
|
||||
for k, v in output.items()
|
||||
}
|
||||
}
|
||||
|
||||
prompt = apply_extensions('custom_generate_chat_prompt', text, state, **kwargs)
|
||||
if prompt is None:
|
||||
prompt = generate_chat_prompt(text, state, **kwargs)
|
||||
|
||||
# Add timestamp for assistant's response at the start of generation
|
||||
row_idx = len(output['internal']) - 1
|
||||
update_message_metadata(output['metadata'], "assistant", row_idx, timestamp=get_current_timestamp())
|
||||
|
||||
# Generate
|
||||
reply = None
|
||||
for j, reply in enumerate(generate_reply(prompt, state, stopping_strings=stopping_strings, is_chat=True, for_ui=for_ui)):
|
||||
|
@ -421,6 +603,11 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
|
|||
if is_stream:
|
||||
yield output
|
||||
|
||||
# Add the newly generated response as a version (only for regeneration)
|
||||
if regenerate:
|
||||
row_idx = len(output['internal']) - 1
|
||||
add_message_version(output, row_idx, is_current=True)
|
||||
|
||||
output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)
|
||||
yield output
|
||||
|
||||
|
@ -508,9 +695,19 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False):
|
|||
|
||||
|
||||
def remove_last_message(history):
|
||||
if 'metadata' not in history:
|
||||
history['metadata'] = {}
|
||||
|
||||
if len(history['visible']) > 0 and history['internal'][-1][0] != '<|BEGIN-VISIBLE-CHAT|>':
|
||||
row_idx = len(history['internal']) - 1
|
||||
last = history['visible'].pop()
|
||||
history['internal'].pop()
|
||||
|
||||
# Remove metadata directly by known keys
|
||||
if f"user_{row_idx}" in history['metadata']:
|
||||
del history['metadata'][f"user_{row_idx}"]
|
||||
if f"assistant_{row_idx}" in history['metadata']:
|
||||
del history['metadata'][f"assistant_{row_idx}"]
|
||||
else:
|
||||
last = ['', '']
|
||||
|
||||
|
@ -527,30 +724,54 @@ def send_last_reply_to_input(history):
|
|||
def replace_last_reply(text, state):
|
||||
history = state['history']
|
||||
|
||||
# Initialize metadata if not present
|
||||
if 'metadata' not in history:
|
||||
history['metadata'] = {}
|
||||
|
||||
if len(text.strip()) == 0:
|
||||
return history
|
||||
elif len(history['visible']) > 0:
|
||||
row_idx = len(history['internal']) - 1
|
||||
history['visible'][-1][1] = html.escape(text)
|
||||
history['internal'][-1][1] = apply_extensions('input', text, state, is_chat=True)
|
||||
update_message_metadata(history['metadata'], "assistant", row_idx, timestamp=get_current_timestamp())
|
||||
|
||||
return history
|
||||
|
||||
|
||||
def send_dummy_message(text, state):
|
||||
history = state['history']
|
||||
|
||||
# Initialize metadata if not present
|
||||
if 'metadata' not in history:
|
||||
history['metadata'] = {}
|
||||
|
||||
row_idx = len(history['internal'])
|
||||
history['visible'].append([html.escape(text), ''])
|
||||
history['internal'].append([apply_extensions('input', text, state, is_chat=True), ''])
|
||||
update_message_metadata(history['metadata'], "user", row_idx, timestamp=get_current_timestamp())
|
||||
|
||||
return history
|
||||
|
||||
|
||||
def send_dummy_reply(text, state):
|
||||
history = state['history']
|
||||
|
||||
# Initialize metadata if not present
|
||||
if 'metadata' not in history:
|
||||
history['metadata'] = {}
|
||||
|
||||
if len(history['visible']) > 0 and not history['visible'][-1][1] == '':
|
||||
row_idx = len(history['internal'])
|
||||
history['visible'].append(['', ''])
|
||||
history['internal'].append(['', ''])
|
||||
# We don't need to add system metadata
|
||||
|
||||
row_idx = len(history['internal']) - 1
|
||||
history['visible'][-1][1] = html.escape(text)
|
||||
history['internal'][-1][1] = apply_extensions('input', text, state, is_chat=True)
|
||||
update_message_metadata(history['metadata'], "assistant", row_idx, timestamp=get_current_timestamp())
|
||||
|
||||
return history
|
||||
|
||||
|
||||
|
@ -560,7 +781,8 @@ def redraw_html(history, name1, name2, mode, style, character, reset_cache=False
|
|||
|
||||
def start_new_chat(state):
|
||||
mode = state['mode']
|
||||
history = {'internal': [], 'visible': []}
|
||||
# Initialize with empty metadata dictionary
|
||||
history = {'internal': [], 'visible': [], 'metadata': {}}
|
||||
|
||||
if mode != 'instruct':
|
||||
greeting = replace_character_names(state['greeting'], state['name1'], state['name2'])
|
||||
|
@ -568,6 +790,9 @@ def start_new_chat(state):
|
|||
history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', greeting]]
|
||||
history['visible'] += [['', apply_extensions('output', html.escape(greeting), state, is_chat=True)]]
|
||||
|
||||
# Add timestamp for assistant's greeting
|
||||
update_message_metadata(history['metadata'], "assistant", 0, timestamp=get_current_timestamp())
|
||||
|
||||
unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S')
|
||||
save_history(history, unique_id, state['character_menu'], state['mode'])
|
||||
|
||||
|
@ -749,6 +974,16 @@ def load_history(unique_id, character, mode):
|
|||
'visible': f['data_visible']
|
||||
}
|
||||
|
||||
# Add metadata if it doesn't exist
|
||||
if 'metadata' not in history:
|
||||
history['metadata'] = {}
|
||||
# Add placeholder timestamps for existing messages
|
||||
for i, (user_msg, asst_msg) in enumerate(history['internal']):
|
||||
if user_msg and user_msg != '<|BEGIN-VISIBLE-CHAT|>':
|
||||
update_message_metadata(history['metadata'], "user", i, timestamp="")
|
||||
if asst_msg:
|
||||
update_message_metadata(history['metadata'], "assistant", i, timestamp="")
|
||||
|
||||
return history
|
||||
|
||||
|
||||
|
@ -764,6 +999,16 @@ def load_history_json(file, history):
|
|||
'visible': f['data_visible']
|
||||
}
|
||||
|
||||
# Add metadata if it doesn't exist
|
||||
if 'metadata' not in history:
|
||||
history['metadata'] = {}
|
||||
# Add placeholder timestamps
|
||||
for i, (user_msg, asst_msg) in enumerate(history['internal']):
|
||||
if user_msg and user_msg != '<|BEGIN-VISIBLE-CHAT|>':
|
||||
update_message_metadata(history['metadata'], "user", i, timestamp="")
|
||||
if asst_msg:
|
||||
update_message_metadata(history['metadata'], "assistant", i, timestamp="")
|
||||
|
||||
return history
|
||||
except:
|
||||
return history
|
||||
|
@ -1093,7 +1338,7 @@ def handle_replace_last_reply_click(text, state):
|
|||
message_versioning.append_message_version(history, state, is_bot=True)
|
||||
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
||||
|
||||
return [history, html, ""]
|
||||
return [history, html, {"text": "", "files": []}]
|
||||
|
||||
|
||||
def handle_send_dummy_message_click(text, state):
|
||||
|
@ -1102,7 +1347,7 @@ def handle_send_dummy_message_click(text, state):
|
|||
message_versioning.append_message_version(history, state, is_bot=False)
|
||||
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
||||
|
||||
return [history, html, ""]
|
||||
return [history, html, {"text": "", "files": []}]
|
||||
|
||||
|
||||
def handle_send_dummy_reply_click(text, state):
|
||||
|
@ -1111,7 +1356,7 @@ def handle_send_dummy_reply_click(text, state):
|
|||
message_versioning.append_message_version(history, state, is_bot=True)
|
||||
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
||||
|
||||
return [history, html, ""]
|
||||
return [history, html, {"text": "", "files": []}]
|
||||
|
||||
|
||||
def handle_remove_last_click(state):
|
||||
|
@ -1119,7 +1364,7 @@ def handle_remove_last_click(state):
|
|||
save_history(history, state['unique_id'], state['character_menu'], state['mode'])
|
||||
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
||||
|
||||
return [history, html, last_input]
|
||||
return [history, html, {"text": last_input, "files": []}]
|
||||
|
||||
|
||||
def handle_unique_id_select(state):
|
||||
|
@ -1175,7 +1420,13 @@ def handle_delete_chat_confirm_click(state):
|
|||
|
||||
|
||||
def handle_branch_chat_click(state):
|
||||
branch_from_index = state['branch_index']
|
||||
if branch_from_index == -1:
|
||||
history = state['history']
|
||||
else:
|
||||
history = state['history']
|
||||
history['visible'] = history['visible'][:branch_from_index + 1]
|
||||
history['internal'] = history['internal'][:branch_from_index + 1]
|
||||
new_unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S')
|
||||
save_history(history, new_unique_id, state['character_menu'], state['mode'])
|
||||
|
||||
|
@ -1186,7 +1437,7 @@ def handle_branch_chat_click(state):
|
|||
|
||||
past_chats_update = gr.update(choices=histories, value=new_unique_id)
|
||||
|
||||
return [history, html, past_chats_update]
|
||||
return [history, html, past_chats_update, -1]
|
||||
|
||||
|
||||
def handle_rename_chat_click():
|
||||
|
@ -1328,7 +1579,7 @@ def handle_your_picture_change(picture, state):
|
|||
|
||||
def handle_send_instruction_click(state):
|
||||
state['mode'] = 'instruct'
|
||||
state['history'] = {'internal': [], 'visible': []}
|
||||
state['history'] = {'internal': [], 'visible': [], 'metadata': {}}
|
||||
|
||||
output = generate_chat_prompt("Input", state)
|
||||
|
||||
|
|
|
@ -169,11 +169,7 @@ def convert_to_markdown(string, message_id=None):
|
|||
thinking_block = f'''
|
||||
<details class="thinking-block" data-block-id="{block_id}" data-streaming="{str(is_streaming).lower()}">
|
||||
<summary class="thinking-header">
|
||||
<svg class="thinking-icon" width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M8 1.33334C4.31868 1.33334 1.33334 4.31868 1.33334 8.00001C1.33334 11.6813 4.31868 14.6667 8 14.6667C11.6813 14.6667 14.6667 11.6813 14.6667 8.00001C14.6667 4.31868 11.6813 1.33334 8 1.33334Z" stroke="currentColor" stroke-width="1.33" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M8 10.6667V8.00001" stroke="currentColor" stroke-width="1.33" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M8 5.33334H8.00667" stroke="currentColor" stroke-width="1.33" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
{info_svg_small}
|
||||
<span class="thinking-title">{title_text}</span>
|
||||
</summary>
|
||||
<div class="thinking-content pretty_scrollbar">{thinking_html}</div>
|
||||
|
@ -339,11 +335,59 @@ copy_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" vie
|
|||
refresh_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-repeat"><path d="M4 12v-3a3 3 0 0 1 3 -3h13m-3 -3l3 3l-3 3"></path><path d="M20 12v3a3 3 0 0 1 -3 3h-13m3 3l-3 -3l3 -3"></path></svg>'''
|
||||
continue_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-player-play"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M7 4v16l13 -8z" /></svg>'''
|
||||
remove_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-trash"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M4 7l16 0" /><path d="M10 11l0 6" /><path d="M14 11l0 6" /><path d="M5 7l1 12a2 2 0 0 0 2 2h8a2 2 0 0 0 2 -2l1 -12" /><path d="M9 7v-3a1 1 0 0 1 1 -1h4a1 1 0 0 1 1 1v3" /></svg>'''
|
||||
branch_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-git-branch"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M7 18m-2 0a2 2 0 1 0 4 0a2 2 0 1 0 -4 0" /><path d="M7 6m-2 0a2 2 0 1 0 4 0a2 2 0 1 0 -4 0" /><path d="M17 6m-2 0a2 2 0 1 0 4 0a2 2 0 1 0 -4 0" /><path d="M7 8l0 8" /><path d="M9 18h6a2 2 0 0 0 2 -2v-5" /><path d="M14 14l3 -3l3 3" /></svg>'''
|
||||
info_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="thinking-icon tabler-icon tabler-icon-info-circle"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M12 2a10 10 0 0 1 0 20a10 10 0 0 1 0 -20z" /><path d="M12 16v-4" /><path d="M12 8h.01" /></svg>'''
|
||||
info_svg_small = '''<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="thinking-icon tabler-icon tabler-icon-info-circle"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M12 2a10 10 0 0 1 0 20a10 10 0 0 1 0 -20z" /><path d="M12 16v-4" /><path d="M12 8h.01" /></svg>'''
|
||||
attachment_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M21.44 11.05l-9.19 9.19a6 6 0 0 1-8.48-8.48l9.19-9.19a4 4 0 0 1 5.66 5.66l-9.2 9.19a2 2 0 0 1-2.83-2.83l8.49-8.48"></path></svg>'''
|
||||
|
||||
copy_button = f'<button class="footer-button footer-copy-button" title="Copy" onclick="copyToClipboard(this)">{copy_svg}</button>'
|
||||
branch_button = f'<button class="footer-button footer-branch-button" title="Branch here" onclick="branchHere(this)">{branch_svg}</button>'
|
||||
refresh_button = f'<button class="footer-button footer-refresh-button" title="Regenerate" onclick="regenerateClick()">{refresh_svg}</button>'
|
||||
continue_button = f'<button class="footer-button footer-continue-button" title="Continue" onclick="continueClick()">{continue_svg}</button>'
|
||||
remove_button = f'<button class="footer-button footer-remove-button" title="Remove last reply" onclick="removeLastClick()">{remove_svg}</button>'
|
||||
info_button = f'<button class="footer-button footer-info-button" title="message">{info_svg}</button>'
|
||||
|
||||
|
||||
def format_message_timestamp(history, role, index):
|
||||
"""Get a formatted timestamp HTML span for a message if available"""
|
||||
key = f"{role}_{index}"
|
||||
if 'metadata' in history and key in history['metadata'] and history['metadata'][key].get('timestamp'):
|
||||
timestamp = history['metadata'][key]['timestamp']
|
||||
return f"<span class='timestamp'>{timestamp}</span>"
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def format_message_attachments(history, role, index):
|
||||
"""Get formatted HTML for message attachments if available"""
|
||||
key = f"{role}_{index}"
|
||||
if 'metadata' in history and key in history['metadata'] and 'attachments' in history['metadata'][key]:
|
||||
attachments = history['metadata'][key]['attachments']
|
||||
if not attachments:
|
||||
return ""
|
||||
|
||||
attachments_html = '<div class="message-attachments">'
|
||||
for attachment in attachments:
|
||||
attachments_html += (
|
||||
f'<div class="attachment-box">'
|
||||
f'<div class="attachment-icon">{attachment_svg}</div>'
|
||||
f'<div class="attachment-name">{html.escape(attachment["name"])}</div>'
|
||||
f'</div>'
|
||||
)
|
||||
attachments_html += '</div>'
|
||||
return attachments_html
|
||||
|
||||
return ""
|
||||
|
||||
def actions_html(history, i, info_message=""):
|
||||
return (f'<div class="message-actions">'
|
||||
f'{copy_button}'
|
||||
f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
|
||||
f'{continue_button if i == len(history["visible"]) - 1 else ""}'
|
||||
f'{remove_button if i == len(history["visible"]) - 1 else ""}'
|
||||
f'{branch_button}'
|
||||
f'{info_message}'
|
||||
f'</div>')
|
||||
|
||||
|
||||
def generate_instruct_html(history):
|
||||
|
@ -356,6 +400,27 @@ def generate_instruct_html(history):
|
|||
versioning_nav_user = message_versioning.get_message_version_nav_elements(i, 0)
|
||||
versioning_nav_bot = message_versioning.get_message_version_nav_elements(i, 1)
|
||||
|
||||
# Get timestamps
|
||||
user_timestamp = format_message_timestamp(history, "user", i)
|
||||
assistant_timestamp = format_message_timestamp(history, "assistant", i)
|
||||
|
||||
# Get attachments
|
||||
user_attachments = format_message_attachments(history, "user", i)
|
||||
assistant_attachments = format_message_attachments(history, "assistant", i)
|
||||
|
||||
# Create info buttons for timestamps if they exist
|
||||
info_message_user = ""
|
||||
if user_timestamp != "":
|
||||
# Extract the timestamp value from the span
|
||||
user_timestamp_value = user_timestamp.split('>', 1)[1].split('<', 1)[0]
|
||||
info_message_user = info_button.replace("message", user_timestamp_value)
|
||||
|
||||
info_message_assistant = ""
|
||||
if assistant_timestamp != "":
|
||||
# Extract the timestamp value from the span
|
||||
assistant_timestamp_value = assistant_timestamp.split('>', 1)[1].split('<', 1)[0]
|
||||
info_message_assistant = info_button.replace("message", assistant_timestamp_value)
|
||||
|
||||
if converted_visible[0]: # Don't display empty user messages
|
||||
selected_class = " selected-message" if message_versioning.is_message_selected(i, 0) else ""
|
||||
output += (
|
||||
|
@ -364,8 +429,8 @@ def generate_instruct_html(history):
|
|||
f'data-raw="{html.escape(row_internal[0], quote=True)}">'
|
||||
f'<div class="text">'
|
||||
f'<div class="message-body">{converted_visible[0]}</div>'
|
||||
f'{copy_button}'
|
||||
f'{versioning_nav_user}'
|
||||
f'{user_attachments}'
|
||||
f'<div class="message-actions">{copy_button}{info_message_user}</div>'
|
||||
f'</div>'
|
||||
f'</div>'
|
||||
)
|
||||
|
@ -373,15 +438,12 @@ def generate_instruct_html(history):
|
|||
selected_class = " selected-message" if message_versioning.is_message_selected(i, 1) else ""
|
||||
output += (
|
||||
f'<div class="assistant-message{selected_class}" '
|
||||
f'data-history-index="{i}" '
|
||||
f'data-raw="{html.escape(row_internal[1], quote=True)}">'
|
||||
f'data-raw="{html.escape(row_internal[1], quote=True)}"'
|
||||
f'data-index={i}>'
|
||||
f'<div class="text">'
|
||||
f'<div class="message-body">{converted_visible[1]}</div>'
|
||||
f'{copy_button}'
|
||||
f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
|
||||
f'{continue_button if i == len(history["visible"]) - 1 else ""}'
|
||||
f'{remove_button if i == len(history["visible"]) - 1 else ""}'
|
||||
f'{versioning_nav_bot}'
|
||||
f'{assistant_attachments}'
|
||||
f'{actions_html(history, i, info_message_assistant)}'
|
||||
f'</div>'
|
||||
f'</div>'
|
||||
)
|
||||
|
@ -408,10 +470,17 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
|
|||
row_visible = history['visible'][i]
|
||||
row_internal = history['internal'][i]
|
||||
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
||||
|
||||
versioning_nav_user = message_versioning.get_message_version_nav_elements(i, 0)
|
||||
versioning_nav_bot = message_versioning.get_message_version_nav_elements(i, 1)
|
||||
|
||||
# Get timestamps
|
||||
user_timestamp = format_message_timestamp(history, "user", i)
|
||||
assistant_timestamp = format_message_timestamp(history, "assistant", i)
|
||||
|
||||
# Get attachments
|
||||
user_attachments = format_message_attachments(history, "user", i)
|
||||
assistant_attachments = format_message_attachments(history, "assistant", i)
|
||||
|
||||
if converted_visible[0]: # Don't display empty user messages
|
||||
selected_class = " selected-message" if message_versioning.is_message_selected(i, 0) else ""
|
||||
output += (
|
||||
|
@ -420,28 +489,25 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
|
|||
f'data-raw="{html.escape(row_internal[0], quote=True)}">'
|
||||
f'<div class="circle-you">{img_me}</div>'
|
||||
f'<div class="text">'
|
||||
f'<div class="username">{name1}</div>'
|
||||
f'<div class="username">{name1}{user_timestamp}</div>'
|
||||
f'<div class="message-body">{converted_visible[0]}</div>'
|
||||
f'{copy_button}'
|
||||
f'{versioning_nav_user}'
|
||||
f'{user_attachments}'
|
||||
f'<div class="message-actions">{copy_button}</div>'
|
||||
f'</div>'
|
||||
f'</div>'
|
||||
)
|
||||
|
||||
selected_class = " selected-message" if message_versioning.is_message_selected(i, 1) else ""
|
||||
output += (
|
||||
f'<div class="message{selected_class}" '
|
||||
f'data-history-index="{i}" data-message-type="1" '
|
||||
f'data-raw="{html.escape(row_internal[1], quote=True)}">'
|
||||
f'<div class="message"{selected_class}'
|
||||
f'data-raw="{html.escape(row_internal[1], quote=True)}"'
|
||||
f'data-index={i}>'
|
||||
f'<div class="circle-bot">{img_bot}</div>'
|
||||
f'<div class="text">'
|
||||
f'<div class="username">{name2}</div>'
|
||||
f'<div class="username">{name2}{assistant_timestamp}</div>'
|
||||
f'<div class="message-body">{converted_visible[1]}</div>'
|
||||
f'{copy_button}'
|
||||
f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
|
||||
f'{continue_button if i == len(history["visible"]) - 1 else ""}'
|
||||
f'{remove_button if i == len(history["visible"]) - 1 else ""}'
|
||||
f'{versioning_nav_bot}'
|
||||
f'{assistant_attachments}'
|
||||
f'{actions_html(history, i)}'
|
||||
f'</div>'
|
||||
f'</div>'
|
||||
)
|
||||
|
@ -457,20 +523,40 @@ def generate_chat_html(history, name1, name2, reset_cache=False):
|
|||
row_visible = history['visible'][i]
|
||||
row_internal = history['internal'][i]
|
||||
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
||||
|
||||
versioning_nav_user = message_versioning.get_message_version_nav_elements(i, 0)
|
||||
versioning_nav_bot = message_versioning.get_message_version_nav_elements(i, 1)
|
||||
|
||||
# Get timestamps
|
||||
user_timestamp = format_message_timestamp(history, "user", i)
|
||||
assistant_timestamp = format_message_timestamp(history, "assistant", i)
|
||||
|
||||
# Get attachments
|
||||
user_attachments = format_message_attachments(history, "user", i)
|
||||
assistant_attachments = format_message_attachments(history, "assistant", i)
|
||||
|
||||
# Create info buttons for timestamps if they exist
|
||||
info_message_user = ""
|
||||
if user_timestamp != "":
|
||||
# Extract the timestamp value from the span
|
||||
user_timestamp_value = user_timestamp.split('>', 1)[1].split('<', 1)[0]
|
||||
info_message_user = info_button.replace("message", user_timestamp_value)
|
||||
|
||||
info_message_assistant = ""
|
||||
if assistant_timestamp != "":
|
||||
# Extract the timestamp value from the span
|
||||
assistant_timestamp_value = assistant_timestamp.split('>', 1)[1].split('<', 1)[0]
|
||||
info_message_assistant = info_button.replace("message", assistant_timestamp_value)
|
||||
|
||||
if converted_visible[0]: # Don't display empty user messages
|
||||
selected_class = " selected-message" if message_versioning.is_message_selected(i, 0) else ""
|
||||
output += (
|
||||
f'<div class="message{selected_class}" '
|
||||
f'data-history-index="{i}" data-message-type="0" '
|
||||
f'data-history-index="{i}"'
|
||||
f'data-raw="{html.escape(row_internal[0], quote=True)}">'
|
||||
f'<div class="text-you">'
|
||||
f'<div class="message-body">{converted_visible[0]}</div>'
|
||||
f'{copy_button}'
|
||||
f'{versioning_nav_user}'
|
||||
f'{user_attachments}'
|
||||
f'<div class="message-actions">{copy_button}{info_message_user}</div>'
|
||||
f'</div>'
|
||||
f'</div>'
|
||||
)
|
||||
|
@ -478,15 +564,12 @@ def generate_chat_html(history, name1, name2, reset_cache=False):
|
|||
selected_class = " selected-message" if message_versioning.is_message_selected(i, 1) else ""
|
||||
output += (
|
||||
f'<div class="message{selected_class}" '
|
||||
f'data-history-index="{i}" data-message-type="1" '
|
||||
f'data-raw="{html.escape(row_internal[1], quote=True)}">'
|
||||
f'data-raw="{html.escape(row_internal[1], quote=True)}"'
|
||||
f'data-index={i}>'
|
||||
f'<div class="text-bot">'
|
||||
f'<div class="message-body">{converted_visible[1]}</div>'
|
||||
f'{copy_button}'
|
||||
f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
|
||||
f'{continue_button if i == len(history["visible"]) - 1 else ""}'
|
||||
f'{remove_button if i == len(history["visible"]) - 1 else ""}'
|
||||
f'{versioning_nav_bot}'
|
||||
f'{assistant_attachments}'
|
||||
f'{actions_html(history, i, info_message_assistant)}'
|
||||
f'</div>'
|
||||
f'</div>'
|
||||
)
|
||||
|
|
|
@ -90,11 +90,6 @@ loaders_and_params = OrderedDict({
|
|||
'ctx_size_draft',
|
||||
'speculative_decoding_accordion',
|
||||
],
|
||||
'HQQ': [
|
||||
'hqq_backend',
|
||||
'trust_remote_code',
|
||||
'no_use_fast',
|
||||
],
|
||||
'TensorRT-LLM': [
|
||||
'ctx_size',
|
||||
'cpp_runner',
|
||||
|
@ -158,7 +153,6 @@ def transformers_samplers():
|
|||
|
||||
loaders_samplers = {
|
||||
'Transformers': transformers_samplers(),
|
||||
'HQQ': transformers_samplers(),
|
||||
'ExLlamav3_HF': {
|
||||
'temperature',
|
||||
'dynatemp_low',
|
||||
|
|
|
@ -21,7 +21,6 @@ def load_model(model_name, loader=None):
|
|||
'ExLlamav3_HF': ExLlamav3_HF_loader,
|
||||
'ExLlamav2_HF': ExLlamav2_HF_loader,
|
||||
'ExLlamav2': ExLlamav2_loader,
|
||||
'HQQ': HQQ_loader,
|
||||
'TensorRT-LLM': TensorRT_LLM_loader,
|
||||
}
|
||||
|
||||
|
@ -102,21 +101,6 @@ def ExLlamav2_loader(model_name):
|
|||
return model, tokenizer
|
||||
|
||||
|
||||
def HQQ_loader(model_name):
|
||||
try:
|
||||
from hqq.core.quantize import HQQBackend, HQQLinear
|
||||
from hqq.models.hf.base import AutoHQQHFModel
|
||||
except ModuleNotFoundError:
|
||||
raise ModuleNotFoundError("Failed to import 'hqq'. Please install it manually following the instructions in the HQQ GitHub repository.")
|
||||
|
||||
logger.info(f"Loading HQQ model with backend: \"{shared.args.hqq_backend}\"")
|
||||
|
||||
model_dir = Path(f'{shared.args.model_dir}/{model_name}')
|
||||
model = AutoHQQHFModel.from_quantized(str(model_dir))
|
||||
HQQLinear.set_backend(getattr(HQQBackend, shared.args.hqq_backend))
|
||||
return model
|
||||
|
||||
|
||||
def TensorRT_LLM_loader(model_name):
|
||||
try:
|
||||
from modules.tensorrt_llm import TensorRTLLMModel
|
||||
|
|
|
@ -2,7 +2,7 @@ import functools
|
|||
import json
|
||||
import re
|
||||
import subprocess
|
||||
from math import exp
|
||||
from math import floor
|
||||
from pathlib import Path
|
||||
|
||||
import gradio as gr
|
||||
|
@ -154,10 +154,11 @@ def get_model_metadata(model):
|
|||
for pat in settings:
|
||||
if re.match(pat.lower(), Path(model).name.lower()):
|
||||
for k in settings[pat]:
|
||||
new_k = k
|
||||
if k == 'n_gpu_layers':
|
||||
k = 'gpu_layers'
|
||||
new_k = 'gpu_layers'
|
||||
|
||||
model_settings[k] = settings[pat][k]
|
||||
model_settings[new_k] = settings[pat][k]
|
||||
|
||||
# Load instruction template if defined by name rather than by value
|
||||
if model_settings['instruction_template'] != 'Custom (obtained from model metadata)':
|
||||
|
@ -182,8 +183,6 @@ def infer_loader(model_name, model_settings, hf_quant_method=None):
|
|||
loader = 'ExLlamav3_HF'
|
||||
elif re.match(r'.*exl2', model_name.lower()):
|
||||
loader = 'ExLlamav2_HF'
|
||||
elif re.match(r'.*-hqq', model_name.lower()):
|
||||
return 'HQQ'
|
||||
else:
|
||||
loader = 'Transformers'
|
||||
|
||||
|
@ -331,8 +330,6 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
|
|||
n_layers = None
|
||||
n_kv_heads = None
|
||||
embedding_dim = None
|
||||
context_length = None
|
||||
feed_forward_dim = None
|
||||
|
||||
for key, value in metadata.items():
|
||||
if key.endswith('.block_count'):
|
||||
|
@ -341,10 +338,6 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
|
|||
n_kv_heads = value
|
||||
elif key.endswith('.embedding_length'):
|
||||
embedding_dim = value
|
||||
elif key.endswith('.context_length'):
|
||||
context_length = value
|
||||
elif key.endswith('.feed_forward_length'):
|
||||
feed_forward_dim = value
|
||||
|
||||
if gpu_layers > n_layers:
|
||||
gpu_layers = n_layers
|
||||
|
@ -359,22 +352,16 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
|
|||
|
||||
# Derived features
|
||||
size_per_layer = size_in_mb / max(n_layers, 1e-6)
|
||||
context_per_layer = context_length / max(n_layers, 1e-6)
|
||||
ffn_per_embedding = feed_forward_dim / max(embedding_dim, 1e-6)
|
||||
kv_cache_factor = n_kv_heads * cache_type * ctx_size
|
||||
|
||||
# Helper function for smaller
|
||||
def smaller(x, y):
|
||||
return 1 if x < y else 0
|
||||
embedding_per_context = embedding_dim / ctx_size
|
||||
|
||||
# Calculate VRAM using the model
|
||||
# Details: https://oobabooga.github.io/blog/posts/gguf-vram-formula/
|
||||
vram = (
|
||||
(size_per_layer - 21.19195204848197)
|
||||
* exp(0.0001047328491557063 * size_in_mb * smaller(ffn_per_embedding, 2.671096993407845))
|
||||
+ 0.0006621544775632052 * context_per_layer
|
||||
+ 3.34664386576376e-05 * kv_cache_factor
|
||||
) * (1.363306170123392 + gpu_layers) + 1255.163594536052
|
||||
(size_per_layer - 17.99552795246051 + 3.148552680382576e-05 * kv_cache_factor)
|
||||
* (gpu_layers + max(0.9690636483914102, cache_type - (floor(50.77817218646521 * embedding_per_context) + 9.987899908205632)))
|
||||
+ 1516.522943869404
|
||||
)
|
||||
|
||||
return vram
|
||||
|
||||
|
@ -451,7 +438,7 @@ def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type,
|
|||
- If for_ui=False: (vram_usage, adjusted_layers) or just vram_usage
|
||||
"""
|
||||
if loader != 'llama.cpp' or model in ["None", None] or not model.endswith(".gguf"):
|
||||
vram_info = "<div id=\"vram-info\"'>Estimated VRAM to load the model:</span>"
|
||||
vram_info = "<div id=\"vram-info\"'>Estimated VRAM to load the model:</div>"
|
||||
if for_ui:
|
||||
return (vram_info, gr.update()) if auto_adjust else vram_info
|
||||
else:
|
||||
|
@ -485,7 +472,7 @@ def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type,
|
|||
return_free = False if (for_ui and shared.model_name not in [None, 'None']) else True
|
||||
available_vram = get_nvidia_vram(return_free=return_free)
|
||||
if available_vram > 0:
|
||||
tolerance = 906
|
||||
tolerance = 577
|
||||
while current_layers > 0 and estimate_vram(model, current_layers, ctx_size, cache_type) > available_vram - tolerance:
|
||||
current_layers -= 1
|
||||
|
||||
|
@ -493,7 +480,7 @@ def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type,
|
|||
vram_usage = estimate_vram(model, current_layers, ctx_size, cache_type)
|
||||
|
||||
if for_ui:
|
||||
vram_info = f"<div id=\"vram-info\"'>Estimated VRAM to load the model: <span class=\"value\">{vram_usage:.0f} MiB</span>"
|
||||
vram_info = f"<div id=\"vram-info\"'>Estimated VRAM to load the model: <span class=\"value\">{vram_usage:.0f} MiB</span></div>"
|
||||
if auto_adjust:
|
||||
return vram_info, gr.update(value=current_layers, maximum=max_layers)
|
||||
else:
|
||||
|
|
|
@ -47,6 +47,7 @@ settings = {
|
|||
'max_new_tokens_max': 4096,
|
||||
'prompt_lookup_num_tokens': 0,
|
||||
'max_tokens_second': 0,
|
||||
'max_updates_second': 12,
|
||||
'auto_max_new_tokens': True,
|
||||
'ban_eos_token': False,
|
||||
'add_bos_token': True,
|
||||
|
@ -86,7 +87,7 @@ group.add_argument('--idle-timeout', type=int, default=0, help='Unload model aft
|
|||
|
||||
# Model loader
|
||||
group = parser.add_argument_group('Model loader')
|
||||
group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, HQQ, TensorRT-LLM.')
|
||||
group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, TensorRT-LLM.')
|
||||
|
||||
# Transformers/Accelerate
|
||||
group = parser.add_argument_group('Transformers/Accelerate')
|
||||
|
@ -151,10 +152,6 @@ group.add_argument('--no_sdpa', action='store_true', help='Force Torch SDPA to n
|
|||
group.add_argument('--num_experts_per_token', type=int, default=2, metavar='N', help='Number of experts to use for generation. Applies to MoE models like Mixtral.')
|
||||
group.add_argument('--enable_tp', action='store_true', help='Enable Tensor Parallelism (TP) in ExLlamaV2.')
|
||||
|
||||
# HQQ
|
||||
group = parser.add_argument_group('HQQ')
|
||||
group.add_argument('--hqq-backend', type=str, default='PYTORCH_COMPILE', help='Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.')
|
||||
|
||||
# TensorRT-LLM
|
||||
group = parser.add_argument_group('TensorRT-LLM')
|
||||
group.add_argument('--cpp-runner', action='store_true', help='Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn\'t support streaming yet.')
|
||||
|
@ -262,8 +259,6 @@ def fix_loader_name(name):
|
|||
return 'ExLlamav2_HF'
|
||||
elif name in ['exllamav3-hf', 'exllamav3_hf', 'exllama-v3-hf', 'exllama_v3_hf', 'exllama-v3_hf', 'exllama3-hf', 'exllama3_hf', 'exllama-3-hf', 'exllama_3_hf', 'exllama-3_hf']:
|
||||
return 'ExLlamav3_HF'
|
||||
elif name in ['hqq']:
|
||||
return 'HQQ'
|
||||
elif name in ['tensorrt', 'tensorrtllm', 'tensorrt_llm', 'tensorrt-llm', 'tensort', 'tensortllm']:
|
||||
return 'TensorRT-LLM'
|
||||
|
||||
|
|
|
@ -65,39 +65,41 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
|
|||
all_stop_strings += st
|
||||
|
||||
shared.stop_everything = False
|
||||
last_update = -1
|
||||
reply = ''
|
||||
is_stream = state['stream']
|
||||
if len(all_stop_strings) > 0 and not state['stream']:
|
||||
state = copy.deepcopy(state)
|
||||
state['stream'] = True
|
||||
|
||||
min_update_interval = 0
|
||||
if state.get('max_updates_second', 0) > 0:
|
||||
min_update_interval = 1 / state['max_updates_second']
|
||||
|
||||
# Generate
|
||||
last_update = -1
|
||||
latency_threshold = 1 / 1000
|
||||
for reply in generate_func(question, original_question, state, stopping_strings, is_chat=is_chat):
|
||||
cur_time = time.monotonic()
|
||||
reply, stop_found = apply_stopping_strings(reply, all_stop_strings)
|
||||
if escape_html:
|
||||
reply = html.escape(reply)
|
||||
|
||||
if is_stream:
|
||||
cur_time = time.time()
|
||||
|
||||
# Limit number of tokens/second to make text readable in real time
|
||||
if state['max_tokens_second'] > 0:
|
||||
diff = 1 / state['max_tokens_second'] - (cur_time - last_update)
|
||||
if diff > 0:
|
||||
time.sleep(diff)
|
||||
|
||||
last_update = time.monotonic()
|
||||
last_update = time.time()
|
||||
yield reply
|
||||
|
||||
# Limit updates to avoid lag in the Gradio UI
|
||||
# API updates are not limited
|
||||
else:
|
||||
# If 'generate_func' takes less than 0.001 seconds to yield the next token
|
||||
# (equivalent to more than 1000 tok/s), assume that the UI is lagging behind and skip yielding
|
||||
if (cur_time - last_update) > latency_threshold:
|
||||
if cur_time - last_update > min_update_interval:
|
||||
last_update = cur_time
|
||||
yield reply
|
||||
last_update = time.monotonic()
|
||||
|
||||
if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything):
|
||||
break
|
||||
|
|
|
@ -109,7 +109,6 @@ def list_model_elements():
|
|||
'threads',
|
||||
'threads_batch',
|
||||
'batch_size',
|
||||
'hqq_backend',
|
||||
'ctx_size',
|
||||
'cache_type',
|
||||
'tensor_split',
|
||||
|
@ -192,6 +191,7 @@ def list_interface_input_elements():
|
|||
'max_new_tokens',
|
||||
'prompt_lookup_num_tokens',
|
||||
'max_tokens_second',
|
||||
'max_updates_second',
|
||||
'do_sample',
|
||||
'dynamic_temperature',
|
||||
'temperature_last',
|
||||
|
@ -210,6 +210,7 @@ def list_interface_input_elements():
|
|||
'negative_prompt',
|
||||
'dry_sequence_breakers',
|
||||
'grammar_string',
|
||||
'branch_index'
|
||||
]
|
||||
|
||||
# Chat elements
|
||||
|
|
|
@ -24,7 +24,8 @@ def create_ui():
|
|||
with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
|
||||
with gr.Column():
|
||||
with gr.Row(elem_id='past-chats-buttons'):
|
||||
shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes='refresh-button', interactive=not mu)
|
||||
shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes='refresh-button', elem_id='Branch', interactive=not mu)
|
||||
shared.gradio['branch_index'] = gr.Number(value=-1, precision=0, visible=False, elem_id="Branch-index", interactive=True)
|
||||
shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes='refresh-button', interactive=not mu)
|
||||
shared.gradio['delete_chat'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
|
||||
shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'focus-on-chat-input'])
|
||||
|
@ -47,13 +48,13 @@ def create_ui():
|
|||
with gr.Row():
|
||||
with gr.Column(elem_id='chat-col'):
|
||||
shared.gradio['display'] = gr.JSON(value={}, visible=False) # Hidden buffer
|
||||
shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, '', '', 'chat', 'cai-chat', '')['html'], visible=True)
|
||||
shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': [], 'metadata': {}}, '', '', 'chat', 'cai-chat', '')['html'], visible=True)
|
||||
with gr.Row(elem_id="chat-input-row"):
|
||||
with gr.Column(scale=1, elem_id='gr-hover-container'):
|
||||
gr.HTML(value='<div class="hover-element" onclick="void(0)"><span style="width: 100px; display: block" id="hover-element-button">☰</span><div class="hover-menu" id="hover-menu"></div>', elem_id='gr-hover')
|
||||
|
||||
with gr.Column(scale=10, elem_id='chat-input-container'):
|
||||
shared.gradio['textbox'] = gr.Textbox(label='', placeholder='Send a message', elem_id='chat-input', elem_classes=['add_scrollbar'])
|
||||
shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
|
||||
shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls')
|
||||
shared.gradio['typing-dots'] = gr.HTML(value='<div class="typing"><span></span><span class="dot1"></span><span class="dot2"></span></div>', label='typing', elem_id='typing-container')
|
||||
|
||||
|
@ -79,8 +80,8 @@ def create_ui():
|
|||
shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply')
|
||||
|
||||
with gr.Row():
|
||||
shared.gradio['send-chat-to-default'] = gr.Button('Send to default')
|
||||
shared.gradio['send-chat-to-notebook'] = gr.Button('Send to notebook')
|
||||
shared.gradio['send-chat-to-default'] = gr.Button('Send to Default')
|
||||
shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook')
|
||||
|
||||
with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']):
|
||||
with gr.Column():
|
||||
|
@ -195,7 +196,7 @@ def create_event_handlers():
|
|||
|
||||
shared.gradio['Generate'].click(
|
||||
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
||||
lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
|
||||
lambda x: (x, {"text": "", "files": []}), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
|
||||
lambda: None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.add("_generating")').then(
|
||||
chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(
|
||||
None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then(
|
||||
|
@ -203,7 +204,7 @@ def create_event_handlers():
|
|||
|
||||
shared.gradio['textbox'].submit(
|
||||
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
||||
lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
|
||||
lambda x: (x, {"text": "", "files": []}), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
|
||||
lambda: None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.add("_generating")').then(
|
||||
chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(
|
||||
None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then(
|
||||
|
@ -271,7 +272,7 @@ def create_event_handlers():
|
|||
|
||||
shared.gradio['branch_chat'].click(
|
||||
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
||||
chat.handle_branch_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)
|
||||
chat.handle_branch_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id', 'branch_index'), show_progress=False)
|
||||
|
||||
shared.gradio['rename_chat'].click(chat.handle_rename_chat_click, None, gradio('rename_to', 'rename-row'), show_progress=False)
|
||||
shared.gradio['rename_to-cancel'].click(lambda: gr.update(visible=False), None, gradio('rename-row'), show_progress=False)
|
||||
|
|
|
@ -39,11 +39,9 @@ def create_ui():
|
|||
with gr.Row():
|
||||
with gr.Column():
|
||||
shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.')
|
||||
shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. ⚠️ Lower this value if you can\'t load the model.')
|
||||
shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072. ⚠️ Lower this value if you can\'t load the model.')
|
||||
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
|
||||
shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).')
|
||||
shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend)
|
||||
|
||||
with gr.Column():
|
||||
shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
|
||||
shared.gradio['flash_attn'] = gr.Checkbox(label="flash-attn", value=shared.args.flash_attn, info='Use flash-attention.')
|
||||
|
@ -312,7 +310,7 @@ def get_initial_vram_info():
|
|||
for_ui=True
|
||||
)
|
||||
|
||||
return "<div id=\"vram-info\"'>Estimated VRAM to load the model:</span>"
|
||||
return "<div id=\"vram-info\"'>Estimated VRAM to load the model:</div>"
|
||||
|
||||
|
||||
def get_initial_gpu_layers_max():
|
||||
|
|
|
@ -71,6 +71,8 @@ def create_ui(default_preset):
|
|||
shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
|
||||
shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
|
||||
shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
|
||||
shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
|
||||
|
||||
with gr.Column():
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
|
|
|
@ -13,6 +13,7 @@ peft==0.15.*
|
|||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
@ -30,8 +31,8 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
|
|
|
@ -12,6 +12,7 @@ peft==0.15.*
|
|||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
@ -29,7 +30,7 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
|
|
|
@ -12,6 +12,7 @@ peft==0.15.*
|
|||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
@ -29,7 +30,7 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
|
|
|
@ -12,6 +12,7 @@ peft==0.15.*
|
|||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
@ -29,7 +30,7 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
|
||||
|
|
|
@ -12,6 +12,7 @@ peft==0.15.*
|
|||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
@ -29,8 +30,8 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
|
||||
|
|
|
@ -12,6 +12,7 @@ peft==0.15.*
|
|||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
@ -29,5 +30,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
|
|
|
@ -12,6 +12,7 @@ peft==0.15.*
|
|||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
@ -29,5 +30,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, no AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
|
|
|
@ -13,6 +13,7 @@ peft==0.15.*
|
|||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
@ -30,8 +31,8 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
|
|
|
@ -12,6 +12,7 @@ peft==0.15.*
|
|||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -4,6 +4,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
@ -15,5 +16,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
@ -4,6 +4,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
@ -15,5 +16,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
|
|
|
@ -4,6 +4,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
@ -15,6 +16,6 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
|
||||
|
|
|
@ -4,6 +4,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
@ -15,5 +16,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
@ -4,6 +4,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
@ -15,5 +16,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, no AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
@ -4,6 +4,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
@ -15,5 +16,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
@ -4,6 +4,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -4,6 +4,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
@ -15,5 +16,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
@ -4,6 +4,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
@ -15,5 +16,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
@ -18,6 +18,7 @@ max_new_tokens_min: 1
|
|||
max_new_tokens_max: 4096
|
||||
prompt_lookup_num_tokens: 0
|
||||
max_tokens_second: 0
|
||||
max_updates_second: 12
|
||||
auto_max_new_tokens: true
|
||||
ban_eos_token: false
|
||||
add_bos_token: true
|
||||
|
|
Loading…
Add table
Reference in a new issue