From 27641ac1823751165615a1a53b62ae24977e37a0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 28 May 2025 17:09:05 -0700 Subject: [PATCH 01/59] UI: Make message editing work the same for user and assistant messages --- js/global_scope_js.js | 28 ++++++------ modules/chat.py | 94 ++++++++++++++++++++------------------- modules/html_generator.py | 42 ++++++++++------- modules/ui.py | 3 +- modules/ui_chat.py | 4 +- 5 files changed, 94 insertions(+), 77 deletions(-) diff --git a/js/global_scope_js.js b/js/global_scope_js.js index 0e86d450..3274f47e 100644 --- a/js/global_scope_js.js +++ b/js/global_scope_js.js @@ -186,31 +186,33 @@ function navigateVersion(element, direction) { const index = messageElement.getAttribute("data-index"); if (!index) return; - const indexInput = document.getElementById("Navigate-message-index").querySelector("input"); - if (!indexInput) { - console.error("Element with ID 'Navigate-message-index' not found."); - return; - } - - const directionInput = document.getElementById("Navigate-direction").querySelector("textarea"); - if (!directionInput) { - console.error("Element with ID 'Navigate-direction' not found."); - return; + // Determine role based on message element classes + let role = "assistant"; // Default role + if (messageElement.classList.contains("user-message") || + messageElement.querySelector(".text-you") || + messageElement.querySelector(".circle-you")) { + role = "user"; } + const indexInput = document.getElementById("Navigate-message-index")?.querySelector("input"); + const directionInput = document.getElementById("Navigate-direction")?.querySelector("textarea"); + const roleInput = document.getElementById("Navigate-message-role")?.querySelector("textarea"); const navigateButton = document.getElementById("Navigate-version"); - if (!navigateButton) { - console.error("Required element 'Navigate-version' not found."); + + if (!indexInput || !directionInput || !roleInput || !navigateButton) { + console.error("Navigation control elements (index, direction, role, or button) not found."); return; } indexInput.value = index; directionInput.value = direction; + roleInput.value = role; - // Trigger any 'change' or 'input' events Gradio might be listening for + // Trigger 'input' events for Gradio to pick up changes const event = new Event("input", { bubbles: true }); indexInput.dispatchEvent(event); directionInput.dispatchEvent(event); + roleInput.dispatchEvent(event); navigateButton.click(); } diff --git a/modules/chat.py b/modules/chat.py index b2aacd5c..8bac680c 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -451,19 +451,21 @@ def get_stopping_strings(state): return result -def add_message_version(history, row_idx, is_current=True): - key = f"assistant_{row_idx}" +def add_message_version(history, role, row_idx, is_current=True): + key = f"{role}_{row_idx}" + if 'metadata' not in history: + history['metadata'] = {} if key not in history['metadata']: history['metadata'][key] = {} if "versions" not in history['metadata'][key]: history['metadata'][key]["versions"] = [] - current_content = history['internal'][row_idx][1] - current_visible = history['visible'][row_idx][1] + # Determine which index to use for content based on role + content_idx = 0 if role == 'user' else 1 + current_content = history['internal'][row_idx][content_idx] + current_visible = history['visible'][row_idx][content_idx] - # Always add the current message as a new version entry. - # The timestamp will differentiate it even if content is identical to a previous version. history['metadata'][key]["versions"].append({ "content": current_content, "visible_content": current_visible, @@ -594,7 +596,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess # Store the first response as a version before regenerating if not output['metadata'].get(f"assistant_{row_idx}", {}).get('versions'): - add_message_version(output, row_idx, is_current=False) + add_message_version(output, "assistant", row_idx, is_current=False) if loading_message: yield { @@ -656,12 +658,13 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess if is_stream: yield output + output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True) + # Add the newly generated response as a version (only for regeneration) if regenerate: row_idx = len(output['internal']) - 1 - add_message_version(output, row_idx, is_current=True) + add_message_version(output, "assistant", row_idx, is_current=True) - output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True) yield output @@ -1441,37 +1444,35 @@ def handle_edit_message_click(state): if message_index >= len(history['internal']): html_output = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) - return [history, html_output, gr.update()] + return [history, html_output, gr.update()] # No unique_id change - # Use the role passed from frontend - is_user_msg = (role == "user") - role_idx = 0 if is_user_msg else 1 + role_idx = 0 if role == "user" else 1 - # For assistant messages, save the original version BEFORE updating content - if not is_user_msg: - if not history['metadata'].get(f"assistant_{message_index}", {}).get('versions'): - add_message_version(history, message_index, is_current=False) + if 'metadata' not in history: + history['metadata'] = {} + + key = f"{role}_{message_index}" + if key not in history['metadata']: + history['metadata'][key] = {} + + # If no versions exist yet for this message, store the current (pre-edit) content as the first version. + if "versions" not in history['metadata'][key] or not history['metadata'][key]["versions"]: + original_content = history['internal'][message_index][role_idx] + original_visible = history['visible'][message_index][role_idx] + + history['metadata'][key]["versions"] = [{ + "content": original_content, + "visible_content": original_visible, + "timestamp": get_current_timestamp() + }] - # NOW update the message content history['internal'][message_index][role_idx] = apply_extensions('input', new_text, state, is_chat=True) history['visible'][message_index][role_idx] = html.escape(new_text) - # Branch if editing user message, add version if editing assistant message - if is_user_msg: - # Branch like branch-here - history['visible'] = history['visible'][:message_index + 1] - history['internal'] = history['internal'][:message_index + 1] - new_unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S') - save_history(history, new_unique_id, state['character_menu'], state['mode']) - histories = find_all_histories_with_first_prompts(state) - past_chats_update = gr.update(choices=histories, value=new_unique_id) - state['unique_id'] = new_unique_id - elif not is_user_msg: - # Add the new version as current - add_message_version(history, message_index, is_current=True) - past_chats_update = gr.update() - else: - past_chats_update = gr.update() + add_message_version(history, role, message_index, is_current=True) + + # Since we are not branching, unique_id does not change. + past_chats_update = gr.update() save_history(history, state['unique_id'], state['character_menu'], state['mode']) html_output = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) @@ -1483,33 +1484,36 @@ def handle_navigate_version_click(state): history = state['history'] message_index = int(state['navigate_message_index']) direction = state['navigate_direction'] + role = state['navigate_message_role'] - # Get assistant message metadata - key = f"assistant_{message_index}" - if key not in history['metadata'] or 'versions' not in history['metadata'][key]: - # No versions to navigate + if not role: + logger.error("Role not provided for version navigation.") + html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + return [history, html] + + key = f"{role}_{message_index}" + if 'metadata' not in history or key not in history['metadata'] or 'versions' not in history['metadata'][key]: html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) return [history, html] metadata = history['metadata'][key] - current_idx = metadata.get('current_version_index', 0) versions = metadata['versions'] + # Default to the last version if current_version_index is not set + current_idx = metadata.get('current_version_index', len(versions) - 1 if versions else 0) - # Calculate new index if direction == 'left': new_idx = max(0, current_idx - 1) else: # right new_idx = min(len(versions) - 1, current_idx + 1) if new_idx == current_idx: - # No change needed html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) return [history, html] - # Update history with new version - version = versions[new_idx] - history['internal'][message_index][1] = version['content'] - history['visible'][message_index][1] = version['visible_content'] + msg_content_idx = 0 if role == 'user' else 1 # 0 for user content, 1 for assistant content in the pair + version_to_load = versions[new_idx] + history['internal'][message_index][msg_content_idx] = version_to_load['content'] + history['visible'][message_index][msg_content_idx] = version_to_load['visible_content'] metadata['current_version_index'] = new_idx # Redraw and save diff --git a/modules/html_generator.py b/modules/html_generator.py index bfb278cd..cbf3e19c 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -388,16 +388,17 @@ def format_message_attachments(history, role, index): return "" -def get_version_navigation_html(history, i): +def get_version_navigation_html(history, i, role): """Generate simple navigation arrows for message versions""" - key = f"assistant_{i}" + key = f"{role}_{i}" metadata = history.get('metadata', {}) if key not in metadata or 'versions' not in metadata[key]: return "" versions = metadata[key]['versions'] - current_idx = metadata[key].get('current_version_index', 0) + # Default to the last version if current_version_index isn't set in metadata + current_idx = metadata[key].get('current_version_index', len(versions) - 1 if versions else 0) if len(versions) <= 1: return "" @@ -413,22 +414,33 @@ def get_version_navigation_html(history, i): def actions_html(history, i, role, info_message=""): + action_buttons = "" + version_nav_html = "" + if role == "assistant": - return (f'
' - f'{copy_button}' - f'{edit_button}' - f'{refresh_button if i == len(history["visible"]) - 1 else ""}' - f'{continue_button if i == len(history["visible"]) - 1 else ""}' - f'{remove_button if i == len(history["visible"]) - 1 else ""}' - f'{branch_button}' - f'{info_message}' - f'
' - f'{get_version_navigation_html(history, i)}') - return (f'
' + action_buttons = ( f'{copy_button}' f'{edit_button}' + f'{refresh_button if i == len(history["visible"]) - 1 else ""}' + f'{continue_button if i == len(history["visible"]) - 1 else ""}' + f'{remove_button if i == len(history["visible"]) - 1 else ""}' + f'{branch_button}' + ) + + version_nav_html = get_version_navigation_html(history, i, "assistant") + elif role == "user": + action_buttons = ( + f'{copy_button}' + f'{edit_button}' + ) + + version_nav_html = get_version_navigation_html(history, i, "user") + + return (f'
' + f'{action_buttons}' f'{info_message}' - f'
') + f'
' + f'{version_nav_html}') def generate_instruct_html(history): diff --git a/modules/ui.py b/modules/ui.py index e24e6402..a2662e14 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -212,14 +212,13 @@ def list_interface_input_elements(): 'grammar_string', 'navigate_message_index', 'navigate_direction', + 'navigate_message_role', 'edit_message_index', 'edit_message_text', 'edit_message_role', 'branch_index', 'enable_web_search', 'web_search_pages', - 'navigate_message_index', - 'navigate_direction', ] # Chat elements diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 719af85a..df3d3929 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -110,6 +110,7 @@ def create_ui(): with gr.Row(visible=False): shared.gradio['navigate_message_index'] = gr.Number(value=-1, precision=0, elem_id="Navigate-message-index") shared.gradio['navigate_direction'] = gr.Textbox(value="", elem_id="Navigate-direction") + shared.gradio['navigate_message_role'] = gr.Textbox(value="", elem_id="Navigate-message-role") shared.gradio['navigate_version'] = gr.Button(elem_id="Navigate-version") shared.gradio['edit_message_index'] = gr.Number(value=-1, precision=0, elem_id="Edit-message-index") shared.gradio['edit_message_text'] = gr.Textbox(value="", elem_id="Edit-message-text") @@ -313,8 +314,7 @@ def create_event_handlers(): shared.gradio['edit_message'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.handle_edit_message_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False).then( - lambda: None, None, None, js='() => { const role = document.getElementById("Edit-message-role").querySelector("textarea").value; if (role === "user") document.getElementById("Regenerate").click(); }') + chat.handle_edit_message_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False) # Save/delete a character shared.gradio['save_character'].click(chat.handle_save_character_click, gradio('name2'), gradio('save_character_filename', 'character_saver'), show_progress=False) From 3eb0b77427ad7b87c128999fd915f97b22104819 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 28 May 2025 18:14:51 -0700 Subject: [PATCH 02/59] Improve the web search query generation --- modules/chat.py | 25 ++++++++++++++++++++++++- modules/web_search.py | 29 ++++------------------------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 8bac680c..495fe934 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -538,6 +538,27 @@ def extract_pdf_text(pdf_path): return f"[Error extracting PDF text: {str(e)}]" +def generate_search_query(user_message, state): + """Generate a search query from user message using the LLM""" + # Augment the user message with search instruction + augmented_message = f"{user_message}\n\n=====\n\nPlease turn the message above into a short web search query in the same language as the message. Respond with only the search query, nothing else." + + # Use a minimal state for search query generation but keep the full history + search_state = state.copy() + search_state['max_new_tokens'] = 64 + search_state['auto_max_new_tokens'] = False + search_state['enable_thinking'] = False + + # Generate the full prompt using existing history + augmented message + formatted_prompt = generate_chat_prompt(augmented_message, search_state) + + query = "" + for reply in generate_reply(formatted_prompt, search_state, stopping_strings=[], is_chat=True): + query = reply.strip() + + return query + + def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_message=True, for_ui=False): # Handle dict format with text and files files = [] @@ -570,7 +591,9 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess add_message_attachment(output, row_idx, file_path, is_user=True) # Add web search results as attachments if enabled - add_web_search_attachments(output, row_idx, text, state) + if state.get('enable_web_search', False): + search_query = generate_search_query(text, state) + add_web_search_attachments(output, row_idx, text, search_query, state) # Apply extensions text, visible_text = apply_extensions('chat_input', text, visible_text, state) diff --git a/modules/web_search.py b/modules/web_search.py index d3387ac9..667178c5 100644 --- a/modules/web_search.py +++ b/modules/web_search.py @@ -13,22 +13,6 @@ def get_current_timestamp(): return datetime.now().strftime('%b %d, %Y %H:%M') -def generate_search_query(user_message, state): - """Generate a search query from user message using the LLM""" - search_prompt = f"{user_message}\n\n=====\n\nPlease turn the message above into a short web search query in the same language as the message. Respond with only the search query, nothing else." - - # Use a minimal state for search query generation - search_state = state.copy() - search_state['max_new_tokens'] = 64 - search_state['temperature'] = 0.1 - - query = "" - for reply in generate_reply(search_prompt, search_state, stopping_strings=[], is_chat=False): - query = reply.strip() - - return query - - def download_web_page(url, timeout=10): """Download and extract text from a web page""" try: @@ -82,19 +66,14 @@ def perform_web_search(query, num_pages=3): return [] -def add_web_search_attachments(history, row_idx, user_message, state): +def add_web_search_attachments(history, row_idx, user_message, search_query, state): """Perform web search and add results as attachments""" - if not state.get('enable_web_search', False): + if not search_query: + logger.warning("No search query provided") return try: - # Generate search query - search_query = generate_search_query(user_message, state) - if not search_query: - logger.warning("Failed to generate search query") - return - - logger.info(f"Generated search query: {search_query}") + logger.info(f"Using search query: {search_query}") # Perform web search num_pages = int(state.get('web_search_pages', 3)) From 7080a02252b9949297950ef3669361d21f4a6bcf Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 28 May 2025 18:15:21 -0700 Subject: [PATCH 03/59] Reduce the timeout for downloading web pages --- modules/web_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/web_search.py b/modules/web_search.py index 667178c5..070f850c 100644 --- a/modules/web_search.py +++ b/modules/web_search.py @@ -13,7 +13,7 @@ def get_current_timestamp(): return datetime.now().strftime('%b %d, %Y %H:%M') -def download_web_page(url, timeout=10): +def download_web_page(url, timeout=5): """Download and extract text from a web page""" try: headers = { From 75d6cfd14d1aed5ba19bd747479794cbd34212d0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 28 May 2025 20:34:14 -0700 Subject: [PATCH 04/59] Download fetched web search results in parallel --- modules/web_search.py | 44 +++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/modules/web_search.py b/modules/web_search.py index 070f850c..1f670349 100644 --- a/modules/web_search.py +++ b/modules/web_search.py @@ -1,3 +1,5 @@ +import concurrent.futures +from concurrent.futures import as_completed from datetime import datetime import requests @@ -5,7 +7,6 @@ from bs4 import BeautifulSoup from duckduckgo_search import DDGS from modules.logging_colors import logger -from modules.text_generation import generate_reply def get_current_timestamp(): @@ -40,27 +41,50 @@ def download_web_page(url, timeout=5): return f"[Error downloading content from {url}: {str(e)}]" -def perform_web_search(query, num_pages=3): +def perform_web_search(query, num_pages=3, max_workers=5): """Perform web search and return results with content""" try: with DDGS() as ddgs: results = list(ddgs.text(query, max_results=num_pages)) - search_results = [] + # Prepare download tasks + download_tasks = [] for i, result in enumerate(results): url = result.get('href', '') title = result.get('title', f'Search Result {i+1}') + download_tasks.append((url, title, i)) - # Download page content - content = download_web_page(url) + search_results = [None] * len(download_tasks) # Pre-allocate to maintain order - search_results.append({ - 'title': title, - 'url': url, - 'content': content - }) + # Download pages in parallel + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: + # Submit all download tasks + future_to_task = { + executor.submit(download_web_page, task[0]): task + for task in download_tasks + } + + # Collect results as they complete + for future in as_completed(future_to_task): + url, title, index = future_to_task[future] + try: + content = future.result() + search_results[index] = { + 'title': title, + 'url': url, + 'content': content + } + except Exception as e: + logger.error(f"Error downloading {url}: {e}") + # Include failed downloads with empty content + search_results[index] = { + 'title': title, + 'url': url, + 'content': '' + } return search_results + except Exception as e: logger.error(f"Error performing web search: {e}") return [] From 63234b9b6f60ec4f276480b4e7f9d4cd1395dcaf Mon Sep 17 00:00:00 2001 From: Underscore <47636331+Th-Underscore@users.noreply.github.com> Date: Thu, 29 May 2025 07:22:03 -0400 Subject: [PATCH 05/59] UI: Fix impersonate (#7025) --- modules/chat.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 495fe934..7afd906d 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -691,16 +691,19 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess yield output -def impersonate_wrapper(text, state): +def impersonate_wrapper(textbox, state): + text = textbox['text'] static_output = chat_html_wrapper(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) prompt = generate_chat_prompt('', state, impersonate=True) stopping_strings = get_stopping_strings(state) - yield text + '...', static_output + textbox['text'] = text + '...' + yield textbox, static_output reply = None for reply in generate_reply(prompt + text, state, stopping_strings=stopping_strings, is_chat=True): - yield (text + reply).lstrip(' '), static_output + textbox['text'] = (text + reply).lstrip(' ') + yield textbox, static_output if shared.stop_everything: return From a8d02dec8f5e6a054a153b3b09425b51e090ae11 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 04:24:21 -0700 Subject: [PATCH 06/59] Bump llama.cpp --- requirements/full/requirements.txt | 4 ++-- requirements/full/requirements_amd.txt | 4 ++-- requirements/full/requirements_amd_noavx2.txt | 4 ++-- requirements/full/requirements_apple_intel.txt | 4 ++-- requirements/full/requirements_apple_silicon.txt | 6 +++--- requirements/full/requirements_cpu_only.txt | 4 ++-- requirements/full/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/full/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements.txt | 4 ++-- requirements/portable/requirements_apple_intel.txt | 4 ++-- requirements/portable/requirements_apple_silicon.txt | 6 +++--- requirements/portable/requirements_cpu_only.txt | 4 ++-- requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/portable/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements_vulkan.txt | 4 ++-- requirements/portable/requirements_vulkan_noavx2.txt | 4 ++-- 16 files changed, 34 insertions(+), 34 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 0eaf10da..5f61aff9 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -33,8 +33,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 65f184bf..a718b6ca 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index d20b2ec3..5fddc623 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 2613d787..8e014445 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index af583b00..77779f3d 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -32,8 +32,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 9bf2a37d..79efc607 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -32,5 +32,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 1731448e..8b29453e 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -32,5 +32,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index fc481a1a..f1f4a02e 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -33,8 +33,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index fdae681d..adf50d9a 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index a58f39f7..46b36791 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index 91ea3a6d..66052711 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -18,6 +18,6 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index 37e5aa40..4013abcc 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index dcb2884b..41808854 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index 8f1295bb..cff79ec6 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 858b4488..762b3fa3 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index 569bae99..b425d305 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" From 685cfe254036111711de027f6d3a8198d02e7545 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 04:26:43 -0700 Subject: [PATCH 07/59] Lint --- css/main.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/css/main.css b/css/main.css index 181a19b8..8af87b42 100644 --- a/css/main.css +++ b/css/main.css @@ -265,7 +265,7 @@ button { .dark .pretty_scrollbar::-webkit-scrollbar-thumb, .dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover { - background: rgba(255, 255, 255, 0.2); + background: rgb(255 255 255 / 20%); border-radius: 10px; } From f2ee917d4f600ebbc5fa9d5fcf65cf5feef27fc1 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 04:55:05 -0700 Subject: [PATCH 08/59] Update README --- README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7105ce23..afb21cb0 100644 --- a/README.md +++ b/README.md @@ -14,14 +14,17 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. - Supports multiple text generation backends in one UI/API, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), [ExLlamaV2](https://github.com/turboderp-org/exllamav2), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) (the latter via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile)). - Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory that doesn't interfere with your system environment. +- **File attachments**: Upload text files and PDF documents directly in conversations to talk about their contents. +- **Web search**: Optionally search the internet with LLM-generated queries based on your input to add context to the conversation. +- **Advanced chat management**: Edit messages, navigate between message versions (like "swipes"), and branch conversations at any point. +- **Automatic prompt formatting** using Jinja2 templates. You don't need to ever worry about prompt formats. - UI that resembles the original ChatGPT style. -- Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats. - Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`. - Free-form text generation in the Default/Notebook tabs without being limited to chat turns. You can send formatted conversations from the Chat tab to these. - Multiple sampling parameters and generation options for sophisticated text generation control. - Switch between different models easily in the UI without restarting, with fine control over settings. - OpenAI-compatible API with Chat and Completions endpoints, including tool-calling support – see [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). -- 100% offline and private, with zero telemetry, external resources, or remote update requests. +- 100% offline and private, with zero telemetry, external resources, or remote update requests. Web search is optional and user-controlled. - Extension support, with numerous built-in and user-contributed extensions available. See the [wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details. ## How to install From 2a9699033d90f4ffedfb22cbba7003c6441d08dc Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 04:55:59 -0700 Subject: [PATCH 09/59] Update README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index afb21cb0..05809436 100644 --- a/README.md +++ b/README.md @@ -16,8 +16,8 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. - Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory that doesn't interfere with your system environment. - **File attachments**: Upload text files and PDF documents directly in conversations to talk about their contents. - **Web search**: Optionally search the internet with LLM-generated queries based on your input to add context to the conversation. -- **Advanced chat management**: Edit messages, navigate between message versions (like "swipes"), and branch conversations at any point. -- **Automatic prompt formatting** using Jinja2 templates. You don't need to ever worry about prompt formats. +- Advanced chat management: Edit messages, navigate between message versions (like "swipes"), and branch conversations at any point. +- Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats. - UI that resembles the original ChatGPT style. - Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`. - Free-form text generation in the Default/Notebook tabs without being limited to chat turns. You can send formatted conversations from the Chat tab to these. From 9a94d7b4f6ae95b6b4b2fc521b5b25c300915dc9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 05:02:52 -0700 Subject: [PATCH 10/59] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 05809436..900d5fbd 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features - Supports multiple text generation backends in one UI/API, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), [ExLlamaV2](https://github.com/turboderp-org/exllamav2), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) (the latter via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile)). -- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory that doesn't interfere with your system environment. +- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory without affecting your system. - **File attachments**: Upload text files and PDF documents directly in conversations to talk about their contents. - **Web search**: Optionally search the internet with LLM-generated queries based on your input to add context to the conversation. - Advanced chat management: Edit messages, navigate between message versions (like "swipes"), and branch conversations at any point. From 0986d075fb22dc5aa582bbefdfdb0ebdb6ee92c8 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 05:03:59 -0700 Subject: [PATCH 11/59] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 900d5fbd..ec01c0aa 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. - Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory without affecting your system. - **File attachments**: Upload text files and PDF documents directly in conversations to talk about their contents. - **Web search**: Optionally search the internet with LLM-generated queries based on your input to add context to the conversation. -- Advanced chat management: Edit messages, navigate between message versions (like "swipes"), and branch conversations at any point. +- Advanced chat management: Edit messages, navigate between message versions, and branch conversations at any point. - Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats. - UI that resembles the original ChatGPT style. - Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`. From 36bc2760058ed4e6998f4c55176c7311b0facabe Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 05:39:26 -0700 Subject: [PATCH 12/59] Update README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index ec01c0aa..9accffb7 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. - **Web search**: Optionally search the internet with LLM-generated queries based on your input to add context to the conversation. - Advanced chat management: Edit messages, navigate between message versions, and branch conversations at any point. - Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats. +- Automatic GPU layers for GGUF models (on NVIDIA GPUs). - UI that resembles the original ChatGPT style. - Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`. - Free-form text generation in the Default/Notebook tabs without being limited to chat turns. You can send formatted conversations from the Chat tab to these. From 81794692ab6fbc0ef24c7484b6571de090984dde Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 08:07:14 -0700 Subject: [PATCH 13/59] UI: Make the dark theme darker --- css/main.css | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/css/main.css b/css/main.css index 8af87b42..0d0a13cf 100644 --- a/css/main.css +++ b/css/main.css @@ -1,11 +1,11 @@ :root { --darker-gray: #202123; - --dark-gray: #343541; - --light-gray: #444654; + --dark-gray: #2A2B32; + --light-gray: #373943; --light-theme-gray: #f9fbff; --border-color-dark: #525252; --header-width: 112px; - --selected-item-color-dark: #32333e; + --selected-item-color-dark: #2E2F38; } @font-face { From c970c5f1665c3966c84ba50a05a45d2598038ea6 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 08:15:13 -0700 Subject: [PATCH 14/59] Make scrollbars darker in dark theme --- css/main.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/css/main.css b/css/main.css index 0d0a13cf..7f9d4618 100644 --- a/css/main.css +++ b/css/main.css @@ -265,7 +265,7 @@ button { .dark .pretty_scrollbar::-webkit-scrollbar-thumb, .dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover { - background: rgb(255 255 255 / 20%); + background: rgb(255 255 255 / 10%); border-radius: 10px; } From 3f37a2e915a31b273caddd12a80412a199d753a7 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 08:49:31 -0700 Subject: [PATCH 15/59] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9accffb7..361584f8 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features - Supports multiple text generation backends in one UI/API, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), [ExLlamaV2](https://github.com/turboderp-org/exllamav2), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) (the latter via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile)). -- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory without affecting your system. +- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory. - **File attachments**: Upload text files and PDF documents directly in conversations to talk about their contents. - **Web search**: Optionally search the internet with LLM-generated queries based on your input to add context to the conversation. - Advanced chat management: Edit messages, navigate between message versions, and branch conversations at any point. From faa5c82c64e2036762ed3ff60a38fc5b37dac36d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 09:02:34 -0700 Subject: [PATCH 16/59] Fix message version count not updating during regeneration streaming --- modules/chat.py | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 7afd906d..90d66687 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -617,10 +617,19 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess if regenerate: row_idx = len(output['internal']) - 1 - # Store the first response as a version before regenerating + # Store the old response as a version before regenerating if not output['metadata'].get(f"assistant_{row_idx}", {}).get('versions'): add_message_version(output, "assistant", row_idx, is_current=False) + # Add new empty version (will be filled during streaming) + key = f"assistant_{row_idx}" + output['metadata'][key]["versions"].append({ + "content": "", + "visible_content": "", + "timestamp": get_current_timestamp() + }) + output['metadata'][key]["current_version_index"] = len(output['metadata'][key]["versions"]) - 1 + if loading_message: yield { 'visible': output['visible'][:-1] + [[visible_text, shared.processing_message]], @@ -673,20 +682,34 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess if _continue: output['internal'][-1] = [text, last_reply[0] + reply] output['visible'][-1] = [visible_text, last_reply[1] + visible_reply] - if is_stream: - yield output elif not (j == 0 and visible_reply.strip() == ''): output['internal'][-1] = [text, reply.lstrip(' ')] output['visible'][-1] = [visible_text, visible_reply.lstrip(' ')] - if is_stream: - yield output + + # Keep version metadata in sync during streaming (for regeneration) + if regenerate: + row_idx = len(output['internal']) - 1 + key = f"assistant_{row_idx}" + current_idx = output['metadata'][key]['current_version_index'] + output['metadata'][key]['versions'][current_idx].update({ + 'content': output['internal'][row_idx][1], + 'visible_content': output['visible'][row_idx][1] + }) + + if is_stream: + yield output output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True) - # Add the newly generated response as a version (only for regeneration) + # Final sync for version metadata (in case streaming was disabled) if regenerate: row_idx = len(output['internal']) - 1 - add_message_version(output, "assistant", row_idx, is_current=True) + key = f"assistant_{row_idx}" + current_idx = output['metadata'][key]['current_version_index'] + output['metadata'][key]['versions'][current_idx].update({ + 'content': output['internal'][row_idx][1], + 'visible_content': output['visible'][row_idx][1] + }) yield output From 724147ffabce95b5d20528b83b6e44c1523d58f0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 10:49:29 -0700 Subject: [PATCH 17/59] Better detect when no model is available --- modules/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/utils.py b/modules/utils.py index 0e8bdd18..577c55b8 100644 --- a/modules/utils.py +++ b/modules/utils.py @@ -74,7 +74,7 @@ def natural_keys(text): def check_model_loaded(): if shared.model_name == 'None' or shared.model is None: - if len(get_available_models()) <= 1: + if len(get_available_models()) == 0: error_msg = "No model is loaded.\n\nTo get started:\n1) Place a GGUF file in your user_data/models folder\n2) Go to the Model tab and select it" logger.error(error_msg) return False, error_msg From e7129f9dbefbe87fa4c425b5873f80cbddaf7cf0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 12:45:53 -0700 Subject: [PATCH 18/59] Prevent footer buttons below last assistant message from always appearing --- js/main.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/js/main.js b/js/main.js index 48bb8632..ea3ff46a 100644 --- a/js/main.js +++ b/js/main.js @@ -171,7 +171,6 @@ const observer = new MutationObserver(function(mutations) { document.getElementById("Generate").style.display = "flex"; } - doSyntaxHighlighting(); if (!isScrolled && targetElement.scrollTop !== targetElement.scrollHeight) { @@ -184,7 +183,7 @@ const observer = new MutationObserver(function(mutations) { const lastChild = messagesContainer?.lastElementChild; const prevSibling = lastChild?.previousElementSibling; if (lastChild && prevSibling) { - lastChild.style.minHeight = `calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 102px))`; + lastChild.style.setProperty("margin-bottom", `calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 102px) - ${lastChild.offsetHeight}px)`, "important"); } } }); From aff41f3482bc7045334b0d81ac514723fdbd4f97 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 12:53:41 -0700 Subject: [PATCH 19/59] Update README --- README.md | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 361584f8..daf409d0 100644 --- a/README.md +++ b/README.md @@ -189,13 +189,13 @@ usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [-- [--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code] [--force-safetensors] [--no_use_fast] [--use_flash_attention_2] [--use_eager_attention] [--torch-compile] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn] [--threads THREADS] [--threads-batch THREADS_BATCH] [--batch-size BATCH_SIZE] [--no-mmap] - [--mlock] [--n-gpu-layers N_GPU_LAYERS] [--tensor-split TENSOR_SPLIT] [--numa] [--no-kv-offload] [--row-split] [--extra-flags EXTRA_FLAGS] [--streaming-llm] [--ctx-size N] + [--mlock] [--gpu-layers N] [--tensor-split TENSOR_SPLIT] [--numa] [--no-kv-offload] [--row-split] [--extra-flags EXTRA_FLAGS] [--streaming-llm] [--ctx-size N] [--cache-type N] [--model-draft MODEL_DRAFT] [--draft-max DRAFT_MAX] [--gpu-layers-draft GPU_LAYERS_DRAFT] [--device-draft DEVICE_DRAFT] [--ctx-size-draft CTX_SIZE_DRAFT] [--gpu-split GPU_SPLIT] - [--autosplit] [--cfg-cache] [--no_flash_attn] [--no_xformers] [--no_sdpa] [--num_experts_per_token N] [--enable_tp] [--hqq-backend HQQ_BACKEND] [--cpp-runner] - [--cache_type CACHE_TYPE] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR] [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] - [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT] [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] - [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] [--subpath SUBPATH] [--old-colors] [--api] [--public-api] - [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--api-enable-ipv6] [--api-disable-ipv4] [--nowebui] + [--autosplit] [--cfg-cache] [--no_flash_attn] [--no_xformers] [--no_sdpa] [--num_experts_per_token N] [--enable_tp] [--cpp-runner] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR] + [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT] + [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] + [--subpath SUBPATH] [--old-colors] [--portable] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] + [--api-enable-ipv6] [--api-disable-ipv4] [--nowebui] Text generation web UI @@ -217,7 +217,7 @@ Basic settings: --idle-timeout IDLE_TIMEOUT Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again. Model loader: - --loader LOADER Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, HQQ, + --loader LOADER Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, TensorRT-LLM. Transformers/Accelerate: @@ -248,16 +248,18 @@ llama.cpp: --batch-size BATCH_SIZE Maximum number of prompt tokens to batch together when calling llama_eval. --no-mmap Prevent mmap from being used. --mlock Force the system to keep the model in RAM. - --n-gpu-layers N_GPU_LAYERS Number of layers to offload to the GPU. + --gpu-layers N, --n-gpu-layers N Number of layers to offload to the GPU. --tensor-split TENSOR_SPLIT Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40. --numa Activate NUMA task allocation for llama.cpp. --no-kv-offload Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance. --row-split Split the model by rows across GPUs. This may improve multi-gpu performance. - --extra-flags EXTRA_FLAGS Extra flags to pass to llama-server. Format: "flag1=value1;flag2;flag3=value3". Example: "override-tensor=exps=CPU" + --extra-flags EXTRA_FLAGS Extra flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU" --streaming-llm Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed. -Context and cache management: +Context and cache: --ctx-size N, --n_ctx N, --max_seq_len N Context size in tokens. + --cache-type N, --cache_type N KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8 (can specify k_bits and v_bits + separately, e.g. q4_q8). Speculative decoding: --model-draft MODEL_DRAFT Path to the draft model for speculative decoding. @@ -276,15 +278,9 @@ ExLlamaV2: --num_experts_per_token N Number of experts to use for generation. Applies to MoE models like Mixtral. --enable_tp Enable Tensor Parallelism (TP) in ExLlamaV2. -HQQ: - --hqq-backend HQQ_BACKEND Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN. - TensorRT-LLM: --cpp-runner Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn't support streaming yet. -Cache: - --cache_type CACHE_TYPE KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4. - DeepSpeed: --deepspeed Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration. --nvme-offload-dir NVME_OFFLOAD_DIR DeepSpeed: Directory to use for ZeRO-3 NVME offloading. @@ -307,6 +303,7 @@ Gradio: --ssl-certfile SSL_CERTFILE The path to the SSL certificate cert file. --subpath SUBPATH Customize the subpath for gradio, use with reverse proxy --old-colors Use the legacy Gradio colors, before the December/2024 update. + --portable Hide features not available in portable mode like training. API: --api Enable the API extension. From f59998d2680f346038320b536617c4738c393947 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 13:08:48 -0700 Subject: [PATCH 20/59] Don't limit the number of prompt characters printed with --verbose --- modules/text_generation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/text_generation.py b/modules/text_generation.py index 962311df..1fd6d810 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -505,11 +505,11 @@ def generate_reply_custom(question, original_question, state, stopping_strings=N return -def print_prompt(prompt, max_chars=2000): +def print_prompt(prompt, max_chars=-1): DARK_YELLOW = "\033[38;5;3m" RESET = "\033[0m" - if len(prompt) > max_chars: + if max_chars > 0 and len(prompt) > max_chars: half_chars = max_chars // 2 hidden_len = len(prompt[half_chars:-half_chars]) hidden_msg = f"{DARK_YELLOW}[...{hidden_len} characters hidden...]{RESET}" From a45a65213052dad02d696ed54af1b9f2ea82cd4a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 13:28:51 -0700 Subject: [PATCH 21/59] CSS fix --- js/main.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/js/main.js b/js/main.js index ea3ff46a..f23dc246 100644 --- a/js/main.js +++ b/js/main.js @@ -183,7 +183,10 @@ const observer = new MutationObserver(function(mutations) { const lastChild = messagesContainer?.lastElementChild; const prevSibling = lastChild?.previousElementSibling; if (lastChild && prevSibling) { - lastChild.style.setProperty("margin-bottom", `calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 102px) - ${lastChild.offsetHeight}px)`, "important"); + lastChild.style.setProperty("margin-bottom", + `max(0px, calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 102px) - ${lastChild.offsetHeight}px))`, + "important" + ); } } }); From 8078c41ec67b96656d7e96128d915290b319e4f5 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 13:32:19 -0700 Subject: [PATCH 22/59] Revert "Bump llama.cpp" This reverts commit a8d02dec8f5e6a054a153b3b09425b51e090ae11. --- requirements/full/requirements.txt | 4 ++-- requirements/full/requirements_amd.txt | 4 ++-- requirements/full/requirements_amd_noavx2.txt | 4 ++-- requirements/full/requirements_apple_intel.txt | 4 ++-- requirements/full/requirements_apple_silicon.txt | 6 +++--- requirements/full/requirements_cpu_only.txt | 4 ++-- requirements/full/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/full/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements.txt | 4 ++-- requirements/portable/requirements_apple_intel.txt | 4 ++-- requirements/portable/requirements_apple_silicon.txt | 6 +++--- requirements/portable/requirements_cpu_only.txt | 4 ++-- requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/portable/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements_vulkan.txt | 4 ++-- requirements/portable/requirements_vulkan_noavx2.txt | 4 ++-- 16 files changed, 34 insertions(+), 34 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 5f61aff9..0eaf10da 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -33,8 +33,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index a718b6ca..65f184bf 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index 5fddc623..d20b2ec3 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 8e014445..2613d787 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 77779f3d..af583b00 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -32,8 +32,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 79efc607..9bf2a37d 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -32,5 +32,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 8b29453e..1731448e 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -32,5 +32,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index f1f4a02e..fc481a1a 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -33,8 +33,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index adf50d9a..fdae681d 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index 46b36791..a58f39f7 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index 66052711..91ea3a6d 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -18,6 +18,6 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index 4013abcc..37e5aa40 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index 41808854..dcb2884b 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index cff79ec6..8f1295bb 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 762b3fa3..858b4488 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index b425d305..569bae99 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" From dce02732a4caef16157ffbc288dfe079053e0bb4 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 14:08:48 -0700 Subject: [PATCH 23/59] Fix timestamp issues when editing/swiping messages --- modules/chat.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/chat.py b/modules/chat.py index 90d66687..6b3ff4fc 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -1508,11 +1508,12 @@ def handle_edit_message_click(state): if "versions" not in history['metadata'][key] or not history['metadata'][key]["versions"]: original_content = history['internal'][message_index][role_idx] original_visible = history['visible'][message_index][role_idx] + original_timestamp = history['metadata'][key].get('timestamp', get_current_timestamp()) history['metadata'][key]["versions"] = [{ "content": original_content, "visible_content": original_visible, - "timestamp": get_current_timestamp() + "timestamp": original_timestamp }] history['internal'][message_index][role_idx] = apply_extensions('input', new_text, state, is_chat=True) @@ -1564,6 +1565,7 @@ def handle_navigate_version_click(state): history['internal'][message_index][msg_content_idx] = version_to_load['content'] history['visible'][message_index][msg_content_idx] = version_to_load['visible_content'] metadata['current_version_index'] = new_idx + update_message_metadata(history['metadata'], role, message_index, timestamp=version_to_load['timestamp']) # Redraw and save html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) From acbcc12e7b19cc9f540d32b8d601ceefde77b7a1 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 14:11:21 -0700 Subject: [PATCH 24/59] Clean up --- modules/chat.py | 7 ++----- modules/ui_chat.py | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 6b3ff4fc..e526a9a0 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -1493,7 +1493,7 @@ def handle_edit_message_click(state): if message_index >= len(history['internal']): html_output = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) - return [history, html_output, gr.update()] # No unique_id change + return [history, html_output] role_idx = 0 if role == "user" else 1 @@ -1521,13 +1521,10 @@ def handle_edit_message_click(state): add_message_version(history, role, message_index, is_current=True) - # Since we are not branching, unique_id does not change. - past_chats_update = gr.update() - save_history(history, state['unique_id'], state['character_menu'], state['mode']) html_output = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) - return [history, html_output, past_chats_update] + return [history, html_output] def handle_navigate_version_click(state): diff --git a/modules/ui_chat.py b/modules/ui_chat.py index df3d3929..d79aa523 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -314,7 +314,7 @@ def create_event_handlers(): shared.gradio['edit_message'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.handle_edit_message_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False) + chat.handle_edit_message_click, gradio('interface_state'), gradio('history', 'display'), show_progress=False) # Save/delete a character shared.gradio['save_character'].click(chat.handle_save_character_click, gradio('name2'), gradio('save_character_filename', 'character_saver'), show_progress=False) From d1bfb08e8d4bab174e6b4467eff20f8a01a2a613 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 14:27:47 -0700 Subject: [PATCH 25/59] Improve the style of message editing --- css/main.css | 1 + 1 file changed, 1 insertion(+) diff --git a/css/main.css b/css/main.css index 7f9d4618..9685c863 100644 --- a/css/main.css +++ b/css/main.css @@ -1462,6 +1462,7 @@ strong { .editing-textarea { width: 100%; min-height: 200px; + max-height: 65vh; padding: 10px; border-radius: 5px; border: 1px solid #ccc; From 28e6bd4fcd8cd385cc92cc56c0c49fc474006147 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 14:49:07 -0700 Subject: [PATCH 26/59] Revert "Update transformers requirement in /requirements/full (#7017)" This reverts commit cc9b7253c1216e5340da85cba9b65a13cf3526e9. --- requirements/full/requirements.txt | 2 +- requirements/full/requirements_amd.txt | 2 +- requirements/full/requirements_amd_noavx2.txt | 2 +- requirements/full/requirements_apple_intel.txt | 2 +- requirements/full/requirements_apple_silicon.txt | 2 +- requirements/full/requirements_cpu_only.txt | 2 +- requirements/full/requirements_cpu_only_noavx2.txt | 2 +- requirements/full/requirements_noavx2.txt | 2 +- requirements/full/requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 0eaf10da..2c322715 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -23,7 +23,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 65f184bf..6aeb325e 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -22,7 +22,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index d20b2ec3..3b052423 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -22,7 +22,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 2613d787..8c51459e 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -22,7 +22,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index af583b00..b9f15d45 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -22,7 +22,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 9bf2a37d..0877d968 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -22,7 +22,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 1731448e..cab78237 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -22,7 +22,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index fc481a1a..dfd42577 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -23,7 +23,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index 2ed8affa..5d9f84ce 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -22,7 +22,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb From 7c29879e795776ceb742a8ddb47fd3843069cf34 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 30 May 2025 11:17:47 -0700 Subject: [PATCH 27/59] Fix 'Start reply with' (closes #7033) --- modules/chat.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index e526a9a0..881f7330 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -806,9 +806,12 @@ def remove_last_message(history): return html.unescape(last[0]), history -def send_dummy_message(textbox, state): +def send_dummy_message(text, state): history = state['history'] - text = textbox['text'] + + # Handle both dict and string inputs + if isinstance(text, dict): + text = text['text'] # Initialize metadata if not present if 'metadata' not in history: @@ -822,9 +825,12 @@ def send_dummy_message(textbox, state): return history -def send_dummy_reply(textbox, state): +def send_dummy_reply(text, state): history = state['history'] - text = textbox['text'] + + # Handle both dict and string inputs + if isinstance(text, dict): + text = text['text'] # Initialize metadata if not present if 'metadata' not in history: From 298d4719c6c9545a701a9cc9e8f4efceb108599a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 30 May 2025 11:32:24 -0700 Subject: [PATCH 28/59] Multiple small style improvements --- css/main.css | 4 ++++ modules/ui.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/css/main.css b/css/main.css index 9685c863..967d94ed 100644 --- a/css/main.css +++ b/css/main.css @@ -1551,3 +1551,7 @@ strong { color: var(--body-text-color-subdued); margin-top: 4px; } + +button:focus { + outline: none; +} diff --git a/modules/ui.py b/modules/ui.py index a2662e14..9f4d67cb 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -71,6 +71,7 @@ if not shared.args.old_colors: block_background_fill_dark='transparent', block_border_color_dark='transparent', input_border_color_dark='var(--border-color-dark)', + input_border_color_focus_dark='var(--border-color-dark)', checkbox_border_color_dark='var(--border-color-dark)', border_color_primary_dark='var(--border-color-dark)', button_secondary_border_color_dark='var(--border-color-dark)', @@ -89,6 +90,8 @@ if not shared.args.old_colors: checkbox_label_shadow='none', block_shadow='none', block_shadow_dark='none', + input_shadow_focus='none', + input_shadow_focus_dark='none', button_large_radius='0.375rem', button_large_padding='6px 12px', input_radius='0.375rem', From 219f0a773166deeb0326c2874b29e66e382df524 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 30 May 2025 12:05:49 -0700 Subject: [PATCH 29/59] Fix exllamav3_hf models failing to unload (closes #7031) --- modules/exllamav3_hf.py | 17 +++++++++++++++++ modules/models.py | 3 +++ 2 files changed, 20 insertions(+) diff --git a/modules/exllamav3_hf.py b/modules/exllamav3_hf.py index 417df473..1254ff5d 100644 --- a/modules/exllamav3_hf.py +++ b/modules/exllamav3_hf.py @@ -245,3 +245,20 @@ class Exllamav3HF(PreTrainedModel, GenerationMixin): pretrained_model_name_or_path = Path(f'{shared.args.model_dir}') / Path(pretrained_model_name_or_path) return Exllamav3HF(pretrained_model_name_or_path) + + def unload(self): + """Properly unload the ExllamaV3 model and free GPU memory.""" + if hasattr(self, 'ex_model') and self.ex_model is not None: + self.ex_model.unload() + self.ex_model = None + + if hasattr(self, 'ex_cache') and self.ex_cache is not None: + self.ex_cache = None + + # Clean up any additional ExllamaV3 resources + if hasattr(self, 'past_seq'): + self.past_seq = None + if hasattr(self, 'past_seq_negative'): + self.past_seq_negative = None + if hasattr(self, 'ex_cache_negative'): + self.ex_cache_negative = None diff --git a/modules/models.py b/modules/models.py index 4218d58c..d329ae3c 100644 --- a/modules/models.py +++ b/modules/models.py @@ -116,10 +116,13 @@ def unload_model(keep_model_name=False): return is_llamacpp = (shared.model.__class__.__name__ == 'LlamaServer') + if shared.args.loader == 'ExLlamav3_HF': + shared.model.unload() shared.model = shared.tokenizer = None shared.lora_names = [] shared.model_dirty_from_training = False + if not is_llamacpp: from modules.torch_utils import clear_torch_cache clear_torch_cache() From 15f466ca3f8255f2566f016db8d7b8fd9ebef3f4 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 30 May 2025 15:49:57 -0700 Subject: [PATCH 30/59] Update README --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index daf409d0..55df33d2 100644 --- a/README.md +++ b/README.md @@ -14,18 +14,18 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. - Supports multiple text generation backends in one UI/API, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), [ExLlamaV2](https://github.com/turboderp-org/exllamav2), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) (the latter via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile)). - Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory. -- **File attachments**: Upload text files and PDF documents directly in conversations to talk about their contents. -- **Web search**: Optionally search the internet with LLM-generated queries based on your input to add context to the conversation. -- Advanced chat management: Edit messages, navigate between message versions, and branch conversations at any point. +- 100% offline and private, with zero telemetry, external resources, or remote update requests. - Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats. -- Automatic GPU layers for GGUF models (on NVIDIA GPUs). -- UI that resembles the original ChatGPT style. -- Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`. -- Free-form text generation in the Default/Notebook tabs without being limited to chat turns. You can send formatted conversations from the Chat tab to these. +- **File attachments**: Upload text files and PDF documents to talk about their contents. +- **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation. +- Aesthetic UI with dark and light themes. +- `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters. +- Edit messages, navigate between message versions, and branch conversations at any point. - Multiple sampling parameters and generation options for sophisticated text generation control. -- Switch between different models easily in the UI without restarting, with fine control over settings. +- Switch between different models in the UI without restarting. +- Automatic GPU layers for GGUF models (on NVIDIA GPUs). +- Free-form text generation in the Default/Notebook tabs without being limited to chat turns. - OpenAI-compatible API with Chat and Completions endpoints, including tool-calling support – see [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). -- 100% offline and private, with zero telemetry, external resources, or remote update requests. Web search is optional and user-controlled. - Extension support, with numerous built-in and user-contributed extensions available. See the [wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details. ## How to install From c55d3c61c6e44712e90fa60c1e434d7687e90947 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 31 May 2025 14:21:42 -0700 Subject: [PATCH 31/59] Bump exllamav2 to 0.3.1 --- requirements/full/requirements.txt | 6 +++--- requirements/full/requirements_amd.txt | 4 ++-- requirements/full/requirements_amd_noavx2.txt | 4 ++-- requirements/full/requirements_apple_intel.txt | 2 +- requirements/full/requirements_apple_silicon.txt | 2 +- requirements/full/requirements_noavx2.txt | 6 +++--- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 2c322715..dd631341 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -37,8 +37,8 @@ https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_ https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" +https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 6aeb325e..acdbd455 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -34,5 +34,5 @@ tiktoken # AMD wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index 3b052423..a478d7d3 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -34,5 +34,5 @@ tiktoken # AMD wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 8c51459e..98ed90a2 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -35,4 +35,4 @@ tiktoken https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl -https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl +https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index b9f15d45..cb72d036 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -36,4 +36,4 @@ https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_ https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl -https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl +https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index dfd42577..f6982134 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -37,8 +37,8 @@ https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_ https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" +https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" From dc8ed6dbe769457b3a2758780abefab0ab04c8a4 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 31 May 2025 14:27:33 -0700 Subject: [PATCH 32/59] Bump exllamav3 to 0.0.3 --- requirements/full/requirements.txt | 4 ++-- requirements/full/requirements_apple_intel.txt | 2 +- requirements/full/requirements_apple_silicon.txt | 2 +- requirements/full/requirements_noavx2.txt | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index dd631341..ec055876 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -35,8 +35,8 @@ tiktoken # CUDA wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 98ed90a2..96a48f32 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -34,5 +34,5 @@ tiktoken # Mac wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl +https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index cb72d036..14b74081 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -35,5 +35,5 @@ tiktoken https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl +https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index f6982134..de507308 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -35,8 +35,8 @@ tiktoken # CUDA wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" From 1d88456659d8e71800f6fb732b8cad7d36fa4c20 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 31 May 2025 20:15:07 -0700 Subject: [PATCH 33/59] Add support for .docx attachments --- README.md | 2 +- modules/chat.py | 50 +++++++++++++++++++ requirements/full/requirements.txt | 1 + requirements/full/requirements_amd.txt | 1 + requirements/full/requirements_amd_noavx2.txt | 1 + .../full/requirements_apple_intel.txt | 1 + .../full/requirements_apple_silicon.txt | 1 + requirements/full/requirements_cpu_only.txt | 1 + .../full/requirements_cpu_only_noavx2.txt | 1 + requirements/full/requirements_noavx2.txt | 1 + requirements/full/requirements_nowheels.txt | 1 + requirements/portable/requirements.txt | 1 + .../portable/requirements_apple_intel.txt | 1 + .../portable/requirements_apple_silicon.txt | 1 + .../portable/requirements_cpu_only.txt | 1 + .../portable/requirements_cpu_only_noavx2.txt | 1 + requirements/portable/requirements_noavx2.txt | 1 + .../portable/requirements_nowheels.txt | 1 + requirements/portable/requirements_vulkan.txt | 1 + .../portable/requirements_vulkan_noavx2.txt | 1 + 20 files changed, 69 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 55df33d2..16b02539 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. - Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory. - 100% offline and private, with zero telemetry, external resources, or remote update requests. - Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats. -- **File attachments**: Upload text files and PDF documents to talk about their contents. +- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents. - **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation. - Aesthetic UI with dark and light themes. - `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters. diff --git a/modules/chat.py b/modules/chat.py index 881f7330..ba61c7a9 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -500,6 +500,9 @@ def add_message_attachment(history, row_idx, file_path, is_user=True): # Process PDF file content = extract_pdf_text(path) file_type = "application/pdf" + elif file_extension == '.docx': + content = extract_docx_text(path) + file_type = "application/docx" else: # Default handling for text files with open(path, 'r', encoding='utf-8') as f: @@ -538,6 +541,53 @@ def extract_pdf_text(pdf_path): return f"[Error extracting PDF text: {str(e)}]" +def extract_docx_text(docx_path): + """ + Extract text from a .docx file, including headers, + body (paragraphs and tables), and footers. + """ + try: + import docx + + doc = docx.Document(docx_path) + parts = [] + + # 1) Extract non-empty header paragraphs from each section + for section in doc.sections: + for para in section.header.paragraphs: + text = para.text.strip() + if text: + parts.append(text) + + # 2) Extract body blocks (paragraphs and tables) in document order + parent_elm = doc.element.body + for child in parent_elm.iterchildren(): + if isinstance(child, docx.oxml.text.paragraph.CT_P): + para = docx.text.paragraph.Paragraph(child, doc) + text = para.text.strip() + if text: + parts.append(text) + + elif isinstance(child, docx.oxml.table.CT_Tbl): + table = docx.table.Table(child, doc) + for row in table.rows: + cells = [cell.text.strip() for cell in row.cells] + parts.append("\t".join(cells)) + + # 3) Extract non-empty footer paragraphs from each section + for section in doc.sections: + for para in section.footer.paragraphs: + text = para.text.strip() + if text: + parts.append(text) + + return "\n".join(parts) + + except Exception as e: + logger.error(f"Error extracting text from DOCX: {e}") + return f"[Error extracting DOCX text: {str(e)}]" + + def generate_search_query(user_message, state): """Generate a search query from user message using the LLM""" # Augment the user message with search instruction diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index ec055876..e61677a6 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -16,6 +16,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index acdbd455..f807199d 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -15,6 +15,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index a478d7d3..4fb70eb1 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -15,6 +15,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 96a48f32..a311ab9b 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -15,6 +15,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 14b74081..30e8409a 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -15,6 +15,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 0877d968..70949949 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -15,6 +15,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index cab78237..318bb93a 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -15,6 +15,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index de507308..e0cb84b4 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -16,6 +16,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index 5d9f84ce..a412367c 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -15,6 +15,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index fdae681d..bde310e1 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index a58f39f7..521edc0c 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index 91ea3a6d..ef7946ff 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index 37e5aa40..a3ad743e 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index dcb2884b..eec052d3 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index 8f1295bb..c9898a05 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt index 21805fe2..f6c866cf 100644 --- a/requirements/portable/requirements_nowheels.txt +++ b/requirements/portable/requirements_nowheels.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 858b4488..0de9c7cb 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index 569bae99..2bfb4d51 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich From 4a2727b71d8976366cc35e18048ad9742ccb1898 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 31 May 2025 20:24:31 -0700 Subject: [PATCH 34/59] Add a tooltip to the file upload button --- js/main.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/js/main.js b/js/main.js index f23dc246..0fdd7ffd 100644 --- a/js/main.js +++ b/js/main.js @@ -872,3 +872,10 @@ function navigateLastAssistantMessage(direction) { return false; } + +//------------------------------------------------ +// Tooltips +//------------------------------------------------ + +// File upload button +document.querySelector("#chat-input .upload-button").title = "Upload text files, PDFs, and DOCX documents"; From f8d220c1e6c0263e76797b0e34dc9ce20335875b Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 31 May 2025 21:22:36 -0700 Subject: [PATCH 35/59] Add a tooltip to the web search checkbox --- js/main.js | 3 +++ modules/ui_chat.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/js/main.js b/js/main.js index 0fdd7ffd..b9cb3cdd 100644 --- a/js/main.js +++ b/js/main.js @@ -879,3 +879,6 @@ function navigateLastAssistantMessage(direction) { // File upload button document.querySelector("#chat-input .upload-button").title = "Upload text files, PDFs, and DOCX documents"; + +// Activate web search +document.getElementById("web-search").title = "Search the internet with DuckDuckGo"; diff --git a/modules/ui_chat.py b/modules/ui_chat.py index d79aa523..73528a92 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -87,7 +87,7 @@ def create_ui(): shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar']) with gr.Row(): - shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search') + shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search', elem_id='web-search') with gr.Row(visible=shared.settings.get('enable_web_search', False)) as shared.gradio['web_search_row']: shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label='Number of pages to download', minimum=1, maximum=10) From 85f2f01a3a78dc85bce9eeded71d9ff9f5bd4ab3 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 31 May 2025 21:29:25 -0700 Subject: [PATCH 36/59] UI: Fix extra gaps on the right sidebar --- css/main.css | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/css/main.css b/css/main.css index 967d94ed..bdaacd4f 100644 --- a/css/main.css +++ b/css/main.css @@ -1555,3 +1555,8 @@ strong { button:focus { outline: none; } + +/* Fix extra gaps for hidden elements on the right sidebar */ +.svelte-sa48pu.stretch:has(> .hidden:only-child) { + display: none; +} From 98a7508a99f2c3bcb2139f7ef975b692f004c695 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 31 May 2025 22:18:17 -0700 Subject: [PATCH 37/59] UI: Move 'Show controls' inside the hover menu --- css/main.css | 52 +++++++++++++++++++++++----------------------- js/main.js | 40 ++++++++++++++--------------------- modules/ui_chat.py | 25 ++++++++-------------- 3 files changed, 51 insertions(+), 66 deletions(-) diff --git a/css/main.css b/css/main.css index bdaacd4f..adc59fba 100644 --- a/css/main.css +++ b/css/main.css @@ -582,7 +582,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { #chat-input { padding: 0; - padding-top: 18px; background: transparent; border: none; } @@ -661,31 +660,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { } } -#show-controls { - position: absolute; - background-color: transparent; - border: 0 !important; - border-radius: 0; -} - -#show-controls label { - z-index: 1000; - position: absolute; - right: 30px; - top: 10px; - white-space: nowrap; - overflow: hidden; - text-overflow: ellipsis; -} - -.dark #show-controls span { - color: var(--neutral-400); -} - -#show-controls span { - color: var(--neutral-600); -} - #typing-container { display: none; position: absolute; @@ -785,6 +759,32 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { background: var(--selected-item-color-dark) !important; } +#show-controls { + height: 36px; + border-top: 1px solid var(--border-color-dark) !important; + border-left: 1px solid var(--border-color-dark) !important; + border-right: 1px solid var(--border-color-dark) !important; + border-radius: 0; + border-bottom: 0 !important; + background-color: var(--darker-gray); + padding-top: 3px; + padding-left: 4px; + display: flex; +} + +#show-controls label { + display: flex; + flex-direction: row-reverse; + font-weight: bold; + justify-content: space-between; + width: 100%; + padding-right: 12px; +} + +#show-controls label input { + margin-top: 4px; +} + .transparent-substring { opacity: 0.333; } diff --git a/js/main.js b/js/main.js index b9cb3cdd..3652daa0 100644 --- a/js/main.js +++ b/js/main.js @@ -277,7 +277,7 @@ for (i = 0; i < slimDropdownElements.length; i++) { // The show/hide events were adapted from: // https://github.com/SillyTavern/SillyTavern/blob/6c8bd06308c69d51e2eb174541792a870a83d2d6/public/script.js //------------------------------------------------ -var buttonsInChat = document.querySelectorAll("#chat-tab #chat-buttons button"); +var buttonsInChat = document.querySelectorAll("#chat-tab #chat-buttons button, #chat-tab #chat-buttons #show-controls"); var button = document.getElementById("hover-element-button"); var menu = document.getElementById("hover-menu"); var istouchscreen = (navigator.maxTouchPoints > 0) || "ontouchstart" in document.documentElement; @@ -298,18 +298,21 @@ if (buttonsInChat.length > 0) { const thisButton = buttonsInChat[i]; menu.appendChild(thisButton); - thisButton.addEventListener("click", () => { - hideMenu(); - }); + // Only apply transformations to button elements + if (thisButton.tagName.toLowerCase() === 'button') { + thisButton.addEventListener("click", () => { + hideMenu(); + }); + + const buttonText = thisButton.textContent; + const matches = buttonText.match(/(\(.*?\))/); - const buttonText = thisButton.textContent; - const matches = buttonText.match(/(\(.*?\))/); - - if (matches && matches.length > 1) { - // Apply the transparent-substring class to the matched substring - const substring = matches[1]; - const newText = buttonText.replace(substring, ` ${substring.slice(1, -1)}`); - thisButton.innerHTML = newText; + if (matches && matches.length > 1) { + // Apply the transparent-substring class to the matched substring + const substring = matches[1]; + const newText = buttonText.replace(substring, ` ${substring.slice(1, -1)}`); + thisButton.innerHTML = newText; + } } } } @@ -382,21 +385,10 @@ document.addEventListener("click", function (event) { } }); -//------------------------------------------------ -// Relocate the "Show controls" checkbox -//------------------------------------------------ -var elementToMove = document.getElementById("show-controls"); -var parent = elementToMove.parentNode; -for (var i = 0; i < 2; i++) { - parent = parent.parentNode; -} - -parent.insertBefore(elementToMove, parent.firstChild); - //------------------------------------------------ // Position the chat input //------------------------------------------------ -document.getElementById("show-controls").parentNode.classList.add("chat-input-positioned"); +document.getElementById("chat-input-row").classList.add("chat-input-positioned"); //------------------------------------------------ // Focus on the chat input diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 73528a92..822b77b8 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -55,7 +55,6 @@ def create_ui(): with gr.Column(scale=10, elem_id='chat-input-container'): shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar']) - shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls') shared.gradio['typing-dots'] = gr.HTML(value='
', label='typing', elem_id='typing-container') with gr.Column(scale=1, elem_id='generate-stop-container'): @@ -65,21 +64,15 @@ def create_ui(): # Hover menu buttons with gr.Column(elem_id='chat-buttons'): - with gr.Row(): - shared.gradio['Regenerate'] = gr.Button('Regenerate (Ctrl + Enter)', elem_id='Regenerate') - shared.gradio['Continue'] = gr.Button('Continue (Alt + Enter)', elem_id='Continue') - shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last') - - with gr.Row(): - shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate') - - with gr.Row(): - shared.gradio['Send dummy message'] = gr.Button('Send dummy message') - shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply') - - with gr.Row(): - shared.gradio['send-chat-to-default'] = gr.Button('Send to Default') - shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook') + shared.gradio['Regenerate'] = gr.Button('Regenerate (Ctrl + Enter)', elem_id='Regenerate') + shared.gradio['Continue'] = gr.Button('Continue (Alt + Enter)', elem_id='Continue') + shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last') + shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate') + shared.gradio['Send dummy message'] = gr.Button('Send dummy message') + shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply') + shared.gradio['send-chat-to-default'] = gr.Button('Send to Default') + shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook') + shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls') with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']): with gr.Column(): From 0816ecedb75add2dd1a61c9bd9a477e5d847c88a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 31 May 2025 22:24:39 -0700 Subject: [PATCH 38/59] Lint --- js/main.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/js/main.js b/js/main.js index 3652daa0..d152a572 100644 --- a/js/main.js +++ b/js/main.js @@ -299,11 +299,11 @@ if (buttonsInChat.length > 0) { menu.appendChild(thisButton); // Only apply transformations to button elements - if (thisButton.tagName.toLowerCase() === 'button') { + if (thisButton.tagName.toLowerCase() === "button") { thisButton.addEventListener("click", () => { hideMenu(); }); - + const buttonText = thisButton.textContent; const matches = buttonText.match(/(\(.*?\))/); From 9e801930087170bb24628e680ad4cbd4f6a5b098 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 31 May 2025 22:39:07 -0700 Subject: [PATCH 39/59] Add the model name to each message's metadata --- modules/chat.py | 2 +- modules/html_generator.py | 47 ++++++++++++++++++++++++++------------- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index ba61c7a9..1222d2bb 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -710,7 +710,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess # Add timestamp for assistant's response at the start of generation row_idx = len(output['internal']) - 1 - update_message_metadata(output['metadata'], "assistant", row_idx, timestamp=get_current_timestamp()) + update_message_metadata(output['metadata'], "assistant", row_idx, timestamp=get_current_timestamp(), model_name=shared.model_name) # Generate reply = None diff --git a/modules/html_generator.py b/modules/html_generator.py index cbf3e19c..03b5d485 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -350,12 +350,14 @@ remove_button = f'' -def format_message_timestamp(history, role, index): +def format_message_timestamp(history, role, index, tooltip_include_timestamp=True): """Get a formatted timestamp HTML span for a message if available""" key = f"{role}_{index}" if 'metadata' in history and key in history['metadata'] and history['metadata'][key].get('timestamp'): timestamp = history['metadata'][key]['timestamp'] - return f"{timestamp}" + tooltip_text = get_message_tooltip(history, role, index, include_timestamp=tooltip_include_timestamp) + title_attr = f' title="{html.escape(tooltip_text)}"' if tooltip_text else '' + return f"{timestamp}" return "" @@ -388,6 +390,23 @@ def format_message_attachments(history, role, index): return "" +def get_message_tooltip(history, role, index, include_timestamp=True): + """Get tooltip text combining timestamp and model name for a message""" + key = f"{role}_{index}" + if 'metadata' not in history or key not in history['metadata']: + return "" + + meta = history['metadata'][key] + tooltip_parts = [] + + if include_timestamp and meta.get('timestamp'): + tooltip_parts.append(meta['timestamp']) + if meta.get('model_name'): + tooltip_parts.append(f"Model: {meta['model_name']}") + + return " | ".join(tooltip_parts) + + def get_version_navigation_html(history, i, role): """Generate simple navigation arrows for message versions""" key = f"{role}_{i}" @@ -462,15 +481,13 @@ def generate_instruct_html(history): # Create info buttons for timestamps if they exist info_message_user = "" if user_timestamp != "": - # Extract the timestamp value from the span - user_timestamp_value = user_timestamp.split('>', 1)[1].split('<', 1)[0] - info_message_user = info_button.replace("message", user_timestamp_value) + tooltip_text = get_message_tooltip(history, "user", i) + info_message_user = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"') info_message_assistant = "" if assistant_timestamp != "": - # Extract the timestamp value from the span - assistant_timestamp_value = assistant_timestamp.split('>', 1)[1].split('<', 1)[0] - info_message_assistant = info_button.replace("message", assistant_timestamp_value) + tooltip_text = get_message_tooltip(history, "assistant", i) + info_message_assistant = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"') if converted_visible[0]: # Don't display empty user messages output += ( @@ -521,8 +538,8 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache= converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible] # Get timestamps - user_timestamp = format_message_timestamp(history, "user", i) - assistant_timestamp = format_message_timestamp(history, "assistant", i) + user_timestamp = format_message_timestamp(history, "user", i, tooltip_include_timestamp=False) + assistant_timestamp = format_message_timestamp(history, "assistant", i, tooltip_include_timestamp=False) # Get attachments user_attachments = format_message_attachments(history, "user", i) @@ -580,15 +597,13 @@ def generate_chat_html(history, name1, name2, reset_cache=False): # Create info buttons for timestamps if they exist info_message_user = "" if user_timestamp != "": - # Extract the timestamp value from the span - user_timestamp_value = user_timestamp.split('>', 1)[1].split('<', 1)[0] - info_message_user = info_button.replace("message", user_timestamp_value) + tooltip_text = get_message_tooltip(history, "user", i) + info_message_user = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"') info_message_assistant = "" if assistant_timestamp != "": - # Extract the timestamp value from the span - assistant_timestamp_value = assistant_timestamp.split('>', 1)[1].split('<', 1)[0] - info_message_assistant = info_button.replace("message", assistant_timestamp_value) + tooltip_text = get_message_tooltip(history, "assistant", i) + info_message_assistant = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"') if converted_visible[0]: # Don't display empty user messages output += ( From 88ff3e6ad8ddf96aabf6d7ceb4c228ed6fb08980 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 1 Jun 2025 08:00:37 -0700 Subject: [PATCH 40/59] CSS fixes after 98a7508a99f2c3bcb2139f7ef975b692f004c695 --- css/main.css | 2 +- js/main.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/css/main.css b/css/main.css index adc59fba..0c6dc16e 100644 --- a/css/main.css +++ b/css/main.css @@ -665,7 +665,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { position: absolute; background-color: transparent; left: -2px; - top: 4px; + top: -14px; padding: var(--block-padding); } diff --git a/js/main.js b/js/main.js index d152a572..05c19571 100644 --- a/js/main.js +++ b/js/main.js @@ -184,7 +184,7 @@ const observer = new MutationObserver(function(mutations) { const prevSibling = lastChild?.previousElementSibling; if (lastChild && prevSibling) { lastChild.style.setProperty("margin-bottom", - `max(0px, calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 102px) - ${lastChild.offsetHeight}px))`, + `max(0px, calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 84px) - ${lastChild.offsetHeight}px))`, "important" ); } From 3e3746283cd60409f83b6cf5549ba08d12612bde Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 1 Jun 2025 10:55:31 -0700 Subject: [PATCH 41/59] Improve the typing dots position --- css/main.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/css/main.css b/css/main.css index 0c6dc16e..296476cd 100644 --- a/css/main.css +++ b/css/main.css @@ -665,7 +665,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { position: absolute; background-color: transparent; left: -2px; - top: -14px; + top: -5px; padding: var(--block-padding); } From 83849336d8efcae0340b768a39c83106ee406264 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 1 Jun 2025 10:58:28 -0700 Subject: [PATCH 42/59] Improve how Show controls looks in the hover menu --- css/main.css | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/css/main.css b/css/main.css index 296476cd..71d67ff4 100644 --- a/css/main.css +++ b/css/main.css @@ -776,9 +776,10 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { display: flex; flex-direction: row-reverse; font-weight: bold; - justify-content: space-between; + justify-content: start; width: 100%; padding-right: 12px; + gap: 10px; } #show-controls label input { From bf42b2c3a1175266dcc7c481f589d53805d956f3 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 1 Jun 2025 11:02:04 -0700 Subject: [PATCH 43/59] Fix thinking blocks sometimes showing a white outline --- css/main.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/css/main.css b/css/main.css index 71d67ff4..a9cb36ab 100644 --- a/css/main.css +++ b/css/main.css @@ -1327,6 +1327,10 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { overflow: hidden; } +.thinking-content:focus, .thinking-header:focus { + outline: 0 !important; +} + .dark .thinking-block { background-color: var(--darker-gray); } From 7a81beb0c16ff51a90fbe77e6300076714af1fd0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 1 Jun 2025 18:23:23 -0700 Subject: [PATCH 44/59] Turn long pasted text into an attachment automatically --- js/main.js | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/js/main.js b/js/main.js index 05c19571..8090937f 100644 --- a/js/main.js +++ b/js/main.js @@ -865,6 +865,46 @@ function navigateLastAssistantMessage(direction) { return false; } +//------------------------------------------------ +// Paste Handler for Long Text +//------------------------------------------------ + +const MAX_PLAIN_TEXT_LENGTH = 2500; + +function setupPasteHandler() { + const textbox = document.querySelector("#chat-input textarea[data-testid=\"textbox\"]"); + const fileInput = document.querySelector("#chat-input input[data-testid=\"file-upload\"]"); + + if (!textbox || !fileInput) { + setTimeout(setupPasteHandler, 500); + return; + } + + textbox.addEventListener("paste", async (event) => { + const text = event.clipboardData?.getData("text"); + + if (text && text.length > MAX_PLAIN_TEXT_LENGTH) { + event.preventDefault(); + + const file = new File([text], "pasted_text.txt", { + type: "text/plain", + lastModified: Date.now() + }); + + const dataTransfer = new DataTransfer(); + dataTransfer.items.add(file); + fileInput.files = dataTransfer.files; + fileInput.dispatchEvent(new Event("change", { bubbles: true })); + } + }); +} + +if (document.readyState === "loading") { + document.addEventListener("DOMContentLoaded", setupPasteHandler); +} else { + setupPasteHandler(); +} + //------------------------------------------------ // Tooltips //------------------------------------------------ From 92adceb7b57464ef03886cba5324a32e7d8f8b67 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 1 Jun 2025 19:22:21 -0700 Subject: [PATCH 45/59] UI: Fix the model downloader progress bar --- download-model.py | 52 ++++++++++++------ modules/ui_model_menu.py | 115 +++++++++++++++++++++++++++------------ 2 files changed, 115 insertions(+), 52 deletions(-) diff --git a/download-model.py b/download-model.py index 25517491..576a8b79 100644 --- a/download-model.py +++ b/download-model.py @@ -32,6 +32,7 @@ class ModelDownloader: self.max_retries = max_retries self.session = self.get_session() self._progress_bar_slots = None + self.progress_queue = None def get_session(self): session = requests.Session() @@ -218,33 +219,45 @@ class ModelDownloader: max_retries = self.max_retries attempt = 0 + file_downloaded_count_for_progress = 0 + try: while attempt < max_retries: attempt += 1 session = self.session headers = {} mode = 'wb' + current_file_size_on_disk = 0 try: if output_path.exists() and not start_from_scratch: - # Resume download - r = session.get(url, stream=True, timeout=20) - total_size = int(r.headers.get('content-length', 0)) - if output_path.stat().st_size >= total_size: + current_file_size_on_disk = output_path.stat().st_size + r_head = session.head(url, timeout=20) + r_head.raise_for_status() + total_size = int(r_head.headers.get('content-length', 0)) + + if current_file_size_on_disk >= total_size and total_size > 0: + if self.progress_queue is not None and total_size > 0: + self.progress_queue.put((1.0, str(filename))) return - headers = {'Range': f'bytes={output_path.stat().st_size}-'} + headers = {'Range': f'bytes={current_file_size_on_disk}-'} mode = 'ab' with session.get(url, stream=True, headers=headers, timeout=30) as r: - r.raise_for_status() # If status is not 2xx, raise an error - total_size = int(r.headers.get('content-length', 0)) - block_size = 1024 * 1024 # 1MB + r.raise_for_status() + total_size_from_stream = int(r.headers.get('content-length', 0)) + if mode == 'ab': + effective_total_size = current_file_size_on_disk + total_size_from_stream + else: + effective_total_size = total_size_from_stream - filename_str = str(filename) # Convert PosixPath to string if necessary + block_size = 1024 * 1024 + filename_str = str(filename) tqdm_kwargs = { - 'total': total_size, + 'total': effective_total_size, + 'initial': current_file_size_on_disk if mode == 'ab' else 0, 'unit': 'B', 'unit_scale': True, 'unit_divisor': 1024, @@ -261,16 +274,20 @@ class ModelDownloader: }) with open(output_path, mode) as f: + if mode == 'ab': + f.seek(current_file_size_on_disk) + with tqdm.tqdm(**tqdm_kwargs) as t: - count = 0 + file_downloaded_count_for_progress = current_file_size_on_disk for data in r.iter_content(block_size): f.write(data) t.update(len(data)) - if total_size != 0 and self.progress_bar is not None: - count += len(data) - self.progress_bar(float(count) / float(total_size), f"{filename_str}") + if effective_total_size != 0 and self.progress_queue is not None: + file_downloaded_count_for_progress += len(data) + progress_fraction = float(file_downloaded_count_for_progress) / float(effective_total_size) + self.progress_queue.put((progress_fraction, filename_str)) + break - break # Exit loop if successful except (RequestException, ConnectionError, Timeout) as e: print(f"Error downloading {filename}: {e}.") print(f"That was attempt {attempt}/{max_retries}.", end=' ') @@ -295,10 +312,9 @@ class ModelDownloader: finally: print(f"\nDownload of {len(file_list)} files to {output_folder} completed.") - def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False): - self.progress_bar = progress_bar + def download_model_files(self, model, branch, links, sha256, output_folder, progress_queue=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False): + self.progress_queue = progress_queue - # Create the folder and writing the metadata output_folder.mkdir(parents=True, exist_ok=True) if not is_llamacpp: diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 862b3893..2a7d3d9d 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -1,4 +1,6 @@ import importlib +import queue +import threading import traceback from functools import partial from pathlib import Path @@ -205,48 +207,51 @@ def load_lora_wrapper(selected_loras): def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False): + downloader_module = importlib.import_module("download-model") + downloader = downloader_module.ModelDownloader() + update_queue = queue.Queue() + try: # Handle direct GGUF URLs if repo_id.startswith("https://") and ("huggingface.co" in repo_id) and (repo_id.endswith(".gguf") or repo_id.endswith(".gguf?download=true")): try: path = repo_id.split("huggingface.co/")[1] - - # Extract the repository ID (first two parts of the path) parts = path.split("/") if len(parts) >= 2: extracted_repo_id = f"{parts[0]}/{parts[1]}" - - # Extract the filename (last part of the path) - filename = repo_id.split("/")[-1] - if "?download=true" in filename: - filename = filename.replace("?download=true", "") - + filename = repo_id.split("/")[-1].replace("?download=true", "") repo_id = extracted_repo_id specific_file = filename - except: - pass + except Exception as e: + yield f"Error parsing GGUF URL: {e}" + progress(0.0) + return - if repo_id == "": - yield ("Please enter a model path") + if not repo_id: + yield "Please enter a model path." + progress(0.0) return repo_id = repo_id.strip() specific_file = specific_file.strip() - downloader = importlib.import_module("download-model").ModelDownloader() - progress(0.0) + progress(0.0, "Preparing download...") + model, branch = downloader.sanitize_model_and_branch_names(repo_id, None) - - yield ("Getting the download links from Hugging Face") + yield "Getting download links from Hugging Face..." links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file) + if not links: + yield "No files found to download for the given model/criteria." + progress(0.0) + return + # Check for multiple GGUF files gguf_files = [link for link in links if link.lower().endswith('.gguf')] if len(gguf_files) > 1 and not specific_file: output = "Multiple GGUF files found. Please copy one of the following filenames to the 'File name' field:\n\n```\n" for link in gguf_files: output += f"{Path(link).name}\n" - output += "```" yield output return @@ -255,17 +260,13 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur output = "```\n" for link in links: output += f"{Path(link).name}" + "\n" - output += "```" yield output return - yield ("Getting the output folder") + yield "Determining output folder..." output_folder = downloader.get_output_folder( - model, - branch, - is_lora, - is_llamacpp=is_llamacpp, + model, branch, is_lora, is_llamacpp=is_llamacpp, model_dir=shared.args.model_dir if shared.args.model_dir != shared.args_defaults.model_dir else None ) @@ -275,19 +276,65 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur output_folder = Path(shared.args.lora_dir) if check: - progress(0.5) - - yield ("Checking previously downloaded files") + yield "Checking previously downloaded files..." + progress(0.5, "Verifying files...") downloader.check_model_files(model, branch, links, sha256, output_folder) - progress(1.0) - else: - yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`") - downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp) + progress(1.0, "Verification complete.") + yield "File check complete." + return - yield (f"Model successfully saved to `{output_folder}/`.") - except: - progress(1.0) - yield traceback.format_exc().replace('\n', '\n\n') + yield "" + progress(0.0, "Download starting...") + + def downloader_thread_target(): + try: + downloader.download_model_files( + model, branch, links, sha256, output_folder, + progress_queue=update_queue, + threads=4, + is_llamacpp=is_llamacpp, + specific_file=specific_file + ) + update_queue.put(("COMPLETED", f"Model successfully saved to `{output_folder}/`.")) + except Exception as e: + tb_str = traceback.format_exc().replace('\n', '\n\n') + update_queue.put(("ERROR", tb_str)) + + download_thread = threading.Thread(target=downloader_thread_target) + download_thread.start() + + while True: + try: + message = update_queue.get(timeout=0.2) + if not isinstance(message, tuple) or len(message) != 2: + continue + + msg_identifier, data = message + + if msg_identifier == "COMPLETED": + progress(1.0, "Download complete!") + yield data + break + elif msg_identifier == "ERROR": + progress(0.0, "Error occurred") + yield data + break + elif isinstance(msg_identifier, float): + progress_value = msg_identifier + description_str = data + progress(progress_value, f"Downloading: {description_str}") + + except queue.Empty: + if not download_thread.is_alive(): + yield "Download process finished." + break + + download_thread.join() + + except Exception as e: + progress(0.0) + tb_str = traceback.format_exc().replace('\n', '\n\n') + yield tb_str def update_truncation_length(current_length, state): From ad6d0218ae0c015694bef7a43f5f628d281a1c36 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 1 Jun 2025 19:27:14 -0700 Subject: [PATCH 46/59] Fix after 219f0a773166deeb0326c2874b29e66e382df524 --- modules/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/models.py b/modules/models.py index d329ae3c..c1e7fb56 100644 --- a/modules/models.py +++ b/modules/models.py @@ -116,7 +116,7 @@ def unload_model(keep_model_name=False): return is_llamacpp = (shared.model.__class__.__name__ == 'LlamaServer') - if shared.args.loader == 'ExLlamav3_HF': + if shared.model.__class__.__name__ == 'Exllamav3HF': shared.model.unload() shared.model = shared.tokenizer = None From 2db7745cbde543d7e1abd81c0389c544c84621db Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 1 Jun 2025 22:12:24 -0700 Subject: [PATCH 47/59] Show llama.cpp prompt processing on one line instead of many lines --- modules/llama_cpp_server.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index d695c74e..aa712541 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -409,14 +409,31 @@ class LlamaServer: def filter_stderr_with_progress(process_stderr): progress_pattern = re.compile(r'slot update_slots: id.*progress = (\d+\.\d+)') + last_was_progress = False + try: for line in iter(process_stderr.readline, ''): + line = line.rstrip('\n\r') # Remove existing newlines progress_match = progress_pattern.search(line) + if progress_match: - sys.stderr.write(line) + if last_was_progress: + # Overwrite the previous progress line using carriage return + sys.stderr.write(f'\r{line}') + else: + # First progress line - print normally + sys.stderr.write(line) sys.stderr.flush() + last_was_progress = True elif not line.startswith(('srv ', 'slot ')) and 'log_server_r: request: GET /health' not in line: - sys.stderr.write(line) + if last_was_progress: + # Finish the progress line with a newline, then print the new line + sys.stderr.write(f'\n{line}\n') + else: + # Normal line - print with newline + sys.stderr.write(f'{line}\n') sys.stderr.flush() + last_was_progress = False + # For filtered lines, don't change last_was_progress state except (ValueError, IOError): pass From 45c9ae312c1ff60ce13c721d1290b65f01bf9660 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 1 Jun 2025 22:17:22 -0700 Subject: [PATCH 48/59] Use the flash-attention wheels in https://github.com/kingbri1/flash-attention --- requirements/full/requirements.txt | 2 +- requirements/full/requirements_noavx2.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index e61677a6..04d97220 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -41,5 +41,5 @@ https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+ https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" -https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index e0cb84b4..7c3635cc 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -41,5 +41,5 @@ https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+ https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" -https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" From bb409c926e986e57b8c3eea3582abb466f32ad08 Mon Sep 17 00:00:00 2001 From: oobabooga Date: Mon, 2 Jun 2025 09:50:17 -0300 Subject: [PATCH 49/59] Update only the last message during streaming + add back dynamic UI update speed (#7038) --- js/global_scope_js.js | 25 ++- modules/chat.py | 4 +- modules/html_generator.py | 274 ++++++++++++++++--------------- modules/shared.py | 3 +- modules/text_generation.py | 18 +- modules/ui.py | 6 +- modules/ui_chat.py | 4 +- modules/ui_parameters.py | 2 - user_data/settings-template.yaml | 1 - 9 files changed, 181 insertions(+), 156 deletions(-) diff --git a/js/global_scope_js.js b/js/global_scope_js.js index 3274f47e..d5140c93 100644 --- a/js/global_scope_js.js +++ b/js/global_scope_js.js @@ -229,10 +229,23 @@ function removeLastClick() { document.getElementById("Remove-last").click(); } -function handleMorphdomUpdate(text) { +function handleMorphdomUpdate(data) { + // Determine target element and use it as query scope + var target_element, target_html; + if (data.last_message_only) { + const childNodes = document.getElementsByClassName("messages")[0].childNodes; + target_element = childNodes[childNodes.length - 1]; + target_html = data.html; + } else { + target_element = document.getElementById("chat").parentNode; + target_html = "
" + data.html + "
"; + } + + const queryScope = target_element; + // Track open blocks const openBlocks = new Set(); - document.querySelectorAll(".thinking-block").forEach(block => { + queryScope.querySelectorAll(".thinking-block").forEach(block => { const blockId = block.getAttribute("data-block-id"); // If block exists and is open, add to open set if (blockId && block.hasAttribute("open")) { @@ -242,7 +255,7 @@ function handleMorphdomUpdate(text) { // Store scroll positions for any open blocks const scrollPositions = {}; - document.querySelectorAll(".thinking-block[open]").forEach(block => { + queryScope.querySelectorAll(".thinking-block[open]").forEach(block => { const content = block.querySelector(".thinking-content"); const blockId = block.getAttribute("data-block-id"); if (content && blockId) { @@ -255,8 +268,8 @@ function handleMorphdomUpdate(text) { }); morphdom( - document.getElementById("chat").parentNode, - "
" + text + "
", + target_element, + target_html, { onBeforeElUpdated: function(fromEl, toEl) { // Preserve code highlighting @@ -307,7 +320,7 @@ function handleMorphdomUpdate(text) { ); // Add toggle listeners for new blocks - document.querySelectorAll(".thinking-block").forEach(block => { + queryScope.querySelectorAll(".thinking-block").forEach(block => { if (!block._hasToggleListener) { block.addEventListener("toggle", function(e) { if (this.open) { diff --git a/modules/chat.py b/modules/chat.py index 1222d2bb..f1ea16f1 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -825,7 +825,9 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False): last_save_time = time.monotonic() save_interval = 8 for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True, for_ui=True)): - yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']), history + yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'], last_message_only=(i > 0)), history + if i == 0: + time.sleep(0.125) # We need this to make sure the first update goes through current_time = time.monotonic() # Save on first iteration or if save_interval seconds have passed diff --git a/modules/html_generator.py b/modules/html_generator.py index 03b5d485..f90e3b04 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -462,64 +462,69 @@ def actions_html(history, i, role, info_message=""): f'{version_nav_html}') -def generate_instruct_html(history): - output = f'
' +def generate_instruct_html(history, last_message_only=False): + if not last_message_only: + output = f'
' + else: + output = "" - for i in range(len(history['visible'])): - row_visible = history['visible'][i] - row_internal = history['internal'][i] - converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible] + def create_message(role, content, raw_content): + """Inner function that captures variables from outer scope.""" + class_name = "user-message" if role == "user" else "assistant-message" - # Get timestamps - user_timestamp = format_message_timestamp(history, "user", i) - assistant_timestamp = format_message_timestamp(history, "assistant", i) + # Get role-specific data + timestamp = format_message_timestamp(history, role, i) + attachments = format_message_attachments(history, role, i) - # Get attachments - user_attachments = format_message_attachments(history, "user", i) - assistant_attachments = format_message_attachments(history, "assistant", i) + # Create info button if timestamp exists + info_message = "" + if timestamp: + tooltip_text = get_message_tooltip(history, role, i) + info_message = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"') - # Create info buttons for timestamps if they exist - info_message_user = "" - if user_timestamp != "": - tooltip_text = get_message_tooltip(history, "user", i) - info_message_user = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"') - - info_message_assistant = "" - if assistant_timestamp != "": - tooltip_text = get_message_tooltip(history, "assistant", i) - info_message_assistant = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"') - - if converted_visible[0]: # Don't display empty user messages - output += ( - f'
' - f'
' - f'
{converted_visible[0]}
' - f'{user_attachments}' - f'{actions_html(history, i, "user", info_message_user)}' - f'
' - f'
' - ) - - output += ( - f'
' f'
' - f'
{converted_visible[1]}
' - f'{assistant_attachments}' - f'{actions_html(history, i, "assistant", info_message_assistant)}' + f'
{content}
' + f'{attachments}' + f'{actions_html(history, i, role, info_message)}' f'
' f'
' ) - output += "
" + # Determine range + start_idx = len(history['visible']) - 1 if last_message_only else 0 + end_idx = len(history['visible']) + + for i in range(start_idx, end_idx): + row_visible = history['visible'][i] + row_internal = history['internal'][i] + + # Convert content + if last_message_only: + converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)] + else: + converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible] + + # Generate messages + if not last_message_only and converted_visible[0]: + output += create_message("user", converted_visible[0], row_internal[0]) + + output += create_message("assistant", converted_visible[1], row_internal[1]) + + if not last_message_only: + output += "
" + return output -def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False): - output = f'
' +def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False, last_message_only=False): + if not last_message_only: + output = f'
' + else: + output = "" # We use ?character and ?time.time() to force the browser to reset caches img_bot = ( @@ -527,110 +532,117 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache= if Path("user_data/cache/pfp_character_thumb.png").exists() else '' ) - img_me = ( - f'' - if Path("user_data/cache/pfp_me.png").exists() else '' - ) + def create_message(role, content, raw_content): + """Inner function for CAI-style messages.""" + circle_class = "circle-you" if role == "user" else "circle-bot" + name = name1 if role == "user" else name2 - for i in range(len(history['visible'])): - row_visible = history['visible'][i] - row_internal = history['internal'][i] - converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible] + # Get role-specific data + timestamp = format_message_timestamp(history, role, i, tooltip_include_timestamp=False) + attachments = format_message_attachments(history, role, i) - # Get timestamps - user_timestamp = format_message_timestamp(history, "user", i, tooltip_include_timestamp=False) - assistant_timestamp = format_message_timestamp(history, "assistant", i, tooltip_include_timestamp=False) + # Get appropriate image + if role == "user": + img = (f'' + if Path("user_data/cache/pfp_me.png").exists() else '') + else: + img = img_bot - # Get attachments - user_attachments = format_message_attachments(history, "user", i) - assistant_attachments = format_message_attachments(history, "assistant", i) - - if converted_visible[0]: # Don't display empty user messages - output += ( - f'
' - f'
{img_me}
' - f'
' - f'
{name1}{user_timestamp}
' - f'
{converted_visible[0]}
' - f'{user_attachments}' - f'{actions_html(history, i, "user")}' - f'
' - f'
' - ) - - output += ( + return ( f'
' - f'
{img_bot}
' + f'
{img}
' f'
' - f'
{name2}{assistant_timestamp}
' - f'
{converted_visible[1]}
' - f'{assistant_attachments}' - f'{actions_html(history, i, "assistant")}' + f'
{name}{timestamp}
' + f'
{content}
' + f'{attachments}' + f'{actions_html(history, i, role)}' f'
' f'
' ) - output += "
" + # Determine range + start_idx = len(history['visible']) - 1 if last_message_only else 0 + end_idx = len(history['visible']) + + for i in range(start_idx, end_idx): + row_visible = history['visible'][i] + row_internal = history['internal'][i] + + # Convert content + if last_message_only: + converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)] + else: + converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible] + + # Generate messages + if not last_message_only and converted_visible[0]: + output += create_message("user", converted_visible[0], row_internal[0]) + + output += create_message("assistant", converted_visible[1], row_internal[1]) + + if not last_message_only: + output += "
" + return output -def generate_chat_html(history, name1, name2, reset_cache=False): - output = f'
' +def generate_chat_html(history, name1, name2, reset_cache=False, last_message_only=False): + if not last_message_only: + output = f'
' + else: + output = "" - for i in range(len(history['visible'])): - row_visible = history['visible'][i] - row_internal = history['internal'][i] - converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible] + def create_message(role, content, raw_content): + """Inner function for WPP-style messages.""" + text_class = "text-you" if role == "user" else "text-bot" - # Get timestamps - user_timestamp = format_message_timestamp(history, "user", i) - assistant_timestamp = format_message_timestamp(history, "assistant", i) + # Get role-specific data + timestamp = format_message_timestamp(history, role, i) + attachments = format_message_attachments(history, role, i) - # Get attachments - user_attachments = format_message_attachments(history, "user", i) - assistant_attachments = format_message_attachments(history, "assistant", i) + # Create info button if timestamp exists + info_message = "" + if timestamp: + tooltip_text = get_message_tooltip(history, role, i) + info_message = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"') - # Create info buttons for timestamps if they exist - info_message_user = "" - if user_timestamp != "": - tooltip_text = get_message_tooltip(history, "user", i) - info_message_user = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"') - - info_message_assistant = "" - if assistant_timestamp != "": - tooltip_text = get_message_tooltip(history, "assistant", i) - info_message_assistant = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"') - - if converted_visible[0]: # Don't display empty user messages - output += ( - f'
' - f'
' - f'
{converted_visible[0]}
' - f'{user_attachments}' - f'{actions_html(history, i, "user", info_message_user)}' - f'
' - f'
' - ) - - output += ( + return ( f'
' - f'
' - f'
{converted_visible[1]}
' - f'{assistant_attachments}' - f'{actions_html(history, i, "assistant", info_message_assistant)}' + f'
' + f'
{content}
' + f'{attachments}' + f'{actions_html(history, i, role, info_message)}' f'
' f'
' ) - output += "
" + # Determine range + start_idx = len(history['visible']) - 1 if last_message_only else 0 + end_idx = len(history['visible']) + + for i in range(start_idx, end_idx): + row_visible = history['visible'][i] + row_internal = history['internal'][i] + + # Convert content + if last_message_only: + converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)] + else: + converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible] + + # Generate messages + if not last_message_only and converted_visible[0]: + output += create_message("user", converted_visible[0], row_internal[0]) + + output += create_message("assistant", converted_visible[1], row_internal[1]) + + if not last_message_only: + output += "
" + return output @@ -644,15 +656,15 @@ def time_greeting(): return "Good evening!" -def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False): +def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False, last_message_only=False): if len(history['visible']) == 0: greeting = f"
{time_greeting()} How can I help you today?
" result = f'
{greeting}
' elif mode == 'instruct': - result = generate_instruct_html(history) + result = generate_instruct_html(history, last_message_only=last_message_only) elif style == 'wpp': - result = generate_chat_html(history, name1, name2) + result = generate_chat_html(history, name1, name2, last_message_only=last_message_only) else: - result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache) + result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache=reset_cache, last_message_only=last_message_only) - return {'html': result} + return {'html': result, 'last_message_only': last_message_only} diff --git a/modules/shared.py b/modules/shared.py index d2305f30..f712f7f8 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -21,7 +21,7 @@ lora_names = [] # Generation variables stop_everything = False generation_lock = None -processing_message = '*Is typing...*' +processing_message = '' # UI variables gradio = {} @@ -47,7 +47,6 @@ settings = { 'max_new_tokens_max': 4096, 'prompt_lookup_num_tokens': 0, 'max_tokens_second': 0, - 'max_updates_second': 12, 'auto_max_new_tokens': True, 'ban_eos_token': False, 'add_bos_token': True, diff --git a/modules/text_generation.py b/modules/text_generation.py index 1fd6d810..0d499d50 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -65,41 +65,39 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap all_stop_strings += st shared.stop_everything = False - last_update = -1 reply = '' is_stream = state['stream'] if len(all_stop_strings) > 0 and not state['stream']: state = copy.deepcopy(state) state['stream'] = True - min_update_interval = 0 - if state.get('max_updates_second', 0) > 0: - min_update_interval = 1 / state['max_updates_second'] - # Generate + last_update = -1 + latency_threshold = 1 / 1000 for reply in generate_func(question, original_question, state, stopping_strings, is_chat=is_chat): + cur_time = time.monotonic() reply, stop_found = apply_stopping_strings(reply, all_stop_strings) if escape_html: reply = html.escape(reply) if is_stream: - cur_time = time.time() - # Limit number of tokens/second to make text readable in real time if state['max_tokens_second'] > 0: diff = 1 / state['max_tokens_second'] - (cur_time - last_update) if diff > 0: time.sleep(diff) - last_update = time.time() + last_update = time.monotonic() yield reply # Limit updates to avoid lag in the Gradio UI # API updates are not limited else: - if cur_time - last_update > min_update_interval: - last_update = cur_time + # If 'generate_func' takes less than 0.001 seconds to yield the next token + # (equivalent to more than 1000 tok/s), assume that the UI is lagging behind and skip yielding + if (cur_time - last_update) > latency_threshold: yield reply + last_update = time.monotonic() if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything): break diff --git a/modules/ui.py b/modules/ui.py index 9f4d67cb..14a09d2b 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -6,6 +6,7 @@ import yaml import extensions from modules import shared +from modules.chat import load_history with open(Path(__file__).resolve().parent / '../css/NotoSans/stylesheet.css', 'r') as f: css = f.read() @@ -194,7 +195,6 @@ def list_interface_input_elements(): 'max_new_tokens', 'prompt_lookup_num_tokens', 'max_tokens_second', - 'max_updates_second', 'do_sample', 'dynamic_temperature', 'temperature_last', @@ -270,6 +270,10 @@ def gather_interface_values(*args): if not shared.args.multi_user: shared.persistent_interface_state = output + # Prevent history loss if backend is restarted but UI is not refreshed + if output['history'] is None and output['unique_id'] is not None: + output['history'] = load_history(output['unique_id'], output['character_menu'], output['mode']) + return output diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 822b77b8..0d5a2c18 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -18,7 +18,7 @@ def create_ui(): mu = shared.args.multi_user shared.gradio['Chat input'] = gr.State() - shared.gradio['history'] = gr.JSON(visible=False) + shared.gradio['history'] = gr.State({'internal': [], 'visible': [], 'metadata': {}}) with gr.Tab('Chat', id='Chat', elem_id='chat-tab'): with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']): @@ -195,7 +195,7 @@ def create_event_handlers(): shared.reload_inputs = gradio(reload_arr) # Morph HTML updates instead of updating everything - shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data.html)") + shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data)") shared.gradio['Generate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 733d0901..84f9fbfc 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -71,8 +71,6 @@ def create_ui(default_preset): shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.') shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.') shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.') - shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.') - with gr.Column(): with gr.Row(): with gr.Column(): diff --git a/user_data/settings-template.yaml b/user_data/settings-template.yaml index ce0f77e1..db481e84 100644 --- a/user_data/settings-template.yaml +++ b/user_data/settings-template.yaml @@ -18,7 +18,6 @@ max_new_tokens_min: 1 max_new_tokens_max: 4096 prompt_lookup_num_tokens: 0 max_tokens_second: 0 -max_updates_second: 12 auto_max_new_tokens: true ban_eos_token: false add_bos_token: true From 7278548cd18a9ba05062eb2db59d7f2965d8a9f6 Mon Sep 17 00:00:00 2001 From: oobabooga Date: Mon, 2 Jun 2025 09:57:55 -0300 Subject: [PATCH 50/59] Simplify the one-click installer (#7039) --- one_click.py | 253 +++++++++++++++++++++++++-------------------------- 1 file changed, 123 insertions(+), 130 deletions(-) diff --git a/one_click.py b/one_click.py index 482a6aa9..cccb0dc9 100644 --- a/one_click.py +++ b/one_click.py @@ -70,12 +70,8 @@ def is_installed(): def cpu_has_avx2(): try: import cpuinfo - info = cpuinfo.get_cpu_info() - if 'avx2' in info['flags']: - return True - else: - return False + return 'avx2' in info['flags'] except: return True @@ -83,30 +79,112 @@ def cpu_has_avx2(): def cpu_has_amx(): try: import cpuinfo - info = cpuinfo.get_cpu_info() - if 'amx' in info['flags']: - return True - else: - return False + return 'amx' in info['flags'] except: return True -def torch_version(): - site_packages_path = None - for sitedir in site.getsitepackages(): - if "site-packages" in sitedir and conda_env_path in sitedir: - site_packages_path = sitedir - break +def load_state(): + """Load installer state from JSON file""" + if os.path.exists(state_file): + try: + with open(state_file, 'r') as f: + return json.load(f) + except: + return {} + return {} - if site_packages_path: - torch_version_file = open(os.path.join(site_packages_path, 'torch', 'version.py')).read().splitlines() - torver = [line for line in torch_version_file if line.startswith('__version__')][0].split('__version__ = ')[1].strip("'") + +def save_state(state): + """Save installer state to JSON file""" + with open(state_file, 'w') as f: + json.dump(state, f) + + +def get_gpu_choice(): + """Get GPU choice from state file or ask user""" + state = load_state() + gpu_choice = state.get('gpu_choice') + + if not gpu_choice: + if "GPU_CHOICE" in os.environ: + choice = os.environ["GPU_CHOICE"].upper() + print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.") + else: + choice = get_user_choice( + "What is your GPU?", + { + 'A': 'NVIDIA - CUDA 12.4', + 'B': 'AMD - Linux/macOS only, requires ROCm 6.2.4', + 'C': 'Apple M Series', + 'D': 'Intel Arc (beta)', + 'N': 'CPU mode' + }, + ) + + # Convert choice to GPU name + gpu_choice = {"A": "NVIDIA", "B": "AMD", "C": "APPLE", "D": "INTEL", "N": "NONE"}[choice] + + # Save choice to state + state['gpu_choice'] = gpu_choice + save_state(state) + + return gpu_choice + + +def get_pytorch_install_command(gpu_choice): + """Get PyTorch installation command based on GPU choice""" + base_cmd = f"python -m pip install torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} " + + if gpu_choice == "NVIDIA": + return base_cmd + "--index-url https://download.pytorch.org/whl/cu124" + elif gpu_choice == "AMD": + return base_cmd + "--index-url https://download.pytorch.org/whl/rocm6.2.4" + elif gpu_choice in ["APPLE", "NONE"]: + return base_cmd + "--index-url https://download.pytorch.org/whl/cpu" + elif gpu_choice == "INTEL": + if is_linux(): + return "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" + else: + return "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" else: - from torch import __version__ as torver + return base_cmd - return torver + +def get_pytorch_update_command(gpu_choice): + """Get PyTorch update command based on GPU choice""" + base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}" + + if gpu_choice == "NVIDIA": + return f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124" + elif gpu_choice == "AMD": + return f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.2.4" + elif gpu_choice in ["APPLE", "NONE"]: + return f"{base_cmd} --index-url https://download.pytorch.org/whl/cpu" + elif gpu_choice == "INTEL": + intel_extension = "intel-extension-for-pytorch==2.1.10+xpu" if is_linux() else "intel-extension-for-pytorch==2.1.10" + return f"{base_cmd} {intel_extension} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" + else: + return base_cmd + + +def get_requirements_file(gpu_choice): + """Get requirements file path based on GPU choice""" + requirements_base = os.path.join("requirements", "full") + + if gpu_choice == "AMD": + file_name = f"requirements_amd{'_noavx2' if not cpu_has_avx2() else ''}.txt" + elif gpu_choice == "APPLE": + file_name = f"requirements_apple_{'intel' if is_x86_64() else 'silicon'}.txt" + elif gpu_choice in ["INTEL", "NONE"]: + file_name = f"requirements_cpu_only{'_noavx2' if not cpu_has_avx2() else ''}.txt" + elif gpu_choice == "NVIDIA": + file_name = f"requirements{'_noavx2' if not cpu_has_avx2() else ''}.txt" + else: + raise ValueError(f"Unknown GPU choice: {gpu_choice}") + + return os.path.join(requirements_base, file_name) def get_current_commit(): @@ -209,28 +287,8 @@ def get_user_choice(question, options_dict): def update_pytorch_and_python(): print_big_message("Checking for PyTorch updates.") - - # Update the Python version. Left here for future reference in case this becomes necessary. - # print_big_message("Checking for PyTorch and Python updates.") - # current_python_version = f"{sys.version_info.major}.{sys.version_info.minor}" - # if current_python_version != PYTHON_VERSION: - # run_cmd(f"conda install -y python={PYTHON_VERSION}", assert_success=True, environment=True) - - torver = torch_version() - base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}" - - if "+cu" in torver: - install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124" - elif "+rocm" in torver: - install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.2.4" - elif "+cpu" in torver: - install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cpu" - elif "+cxx11" in torver: - intel_extension = "intel-extension-for-pytorch==2.1.10+xpu" if is_linux() else "intel-extension-for-pytorch==2.1.10" - install_cmd = f"{base_cmd} {intel_extension} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" - else: - install_cmd = base_cmd - + gpu_choice = get_gpu_choice() + install_cmd = get_pytorch_update_command(gpu_choice) run_cmd(install_cmd, assert_success=True, environment=True) @@ -256,43 +314,11 @@ def install_webui(): if os.path.isfile(state_file): os.remove(state_file) - # Ask the user for the GPU vendor - if "GPU_CHOICE" in os.environ: - choice = os.environ["GPU_CHOICE"].upper() - print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.") - - # Warn about changed meanings and handle old choices - if choice == "B": - print_big_message("Warning: GPU_CHOICE='B' now means 'AMD' in the new version.") - elif choice == "C": - print_big_message("Warning: GPU_CHOICE='C' now means 'Apple M Series' in the new version.") - elif choice == "D": - print_big_message("Warning: GPU_CHOICE='D' now means 'Intel Arc' in the new version.") - else: - choice = get_user_choice( - "What is your GPU?", - { - 'A': 'NVIDIA - CUDA 12.4', - 'B': 'AMD - Linux/macOS only, requires ROCm 6.2.4', - 'C': 'Apple M Series', - 'D': 'Intel Arc (beta)', - 'N': 'CPU mode' - }, - ) - - # Convert choices to GPU names for compatibility - gpu_choice_to_name = { - "A": "NVIDIA", - "B": "AMD", - "C": "APPLE", - "D": "INTEL", - "N": "NONE" - } - - selected_gpu = gpu_choice_to_name[choice] + # Get GPU choice and save it to state + gpu_choice = get_gpu_choice() # Write a flag to CMD_FLAGS.txt for CPU mode - if selected_gpu == "NONE": + if gpu_choice == "NONE": cmd_flags_path = os.path.join(script_dir, "user_data", "CMD_FLAGS.txt") with open(cmd_flags_path, 'r+') as cmd_flags_file: if "--cpu" not in cmd_flags_file.read(): @@ -300,34 +326,20 @@ def install_webui(): cmd_flags_file.write("\n--cpu\n") # Handle CUDA version display - elif any((is_windows(), is_linux())) and selected_gpu == "NVIDIA": + elif any((is_windows(), is_linux())) and gpu_choice == "NVIDIA": print("CUDA: 12.4") # No PyTorch for AMD on Windows (?) - elif is_windows() and selected_gpu == "AMD": + elif is_windows() and gpu_choice == "AMD": print("PyTorch setup on Windows is not implemented yet. Exiting...") sys.exit(1) - # Find the Pytorch installation command - install_pytorch = f"python -m pip install torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} " - - if selected_gpu == "NVIDIA": - install_pytorch += "--index-url https://download.pytorch.org/whl/cu124" - elif selected_gpu == "AMD": - install_pytorch += "--index-url https://download.pytorch.org/whl/rocm6.2.4" - elif selected_gpu in ["APPLE", "NONE"]: - install_pytorch += "--index-url https://download.pytorch.org/whl/cpu" - elif selected_gpu == "INTEL": - if is_linux(): - install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" - else: - install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" - # Install Git and then Pytorch print_big_message("Installing PyTorch.") + install_pytorch = get_pytorch_install_command(gpu_choice) run_cmd(f"conda install -y ninja git && {install_pytorch} && python -m pip install py-cpuinfo==9.0.0", assert_success=True, environment=True) - if selected_gpu == "INTEL": + if gpu_choice == "INTEL": # Install oneAPI dependencies via conda print_big_message("Installing Intel oneAPI runtime libraries.") run_cmd("conda install -y -c https://software.repos.intel.com/python/conda/ -c conda-forge dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0", environment=True) @@ -349,31 +361,15 @@ def update_requirements(initial_installation=False, pull=True): assert_success=True ) - torver = torch_version() - requirements_base = os.path.join("requirements", "full") - - if "+rocm" in torver: - file_name = f"requirements_amd{'_noavx2' if not cpu_has_avx2() else ''}.txt" - elif "+cpu" in torver or "+cxx11" in torver: - file_name = f"requirements_cpu_only{'_noavx2' if not cpu_has_avx2() else ''}.txt" - elif is_macos(): - file_name = f"requirements_apple_{'intel' if is_x86_64() else 'silicon'}.txt" - else: - file_name = f"requirements{'_noavx2' if not cpu_has_avx2() else ''}.txt" - - requirements_file = os.path.join(requirements_base, file_name) - - # Load state from JSON file current_commit = get_current_commit() - wheels_changed = False - if os.path.exists(state_file): - with open(state_file, 'r') as f: - last_state = json.load(f) - - if 'wheels_changed' in last_state or last_state.get('last_installed_commit') != current_commit: + wheels_changed = not os.path.exists(state_file) + if not wheels_changed: + state = load_state() + if 'wheels_changed' in state or state.get('last_installed_commit') != current_commit: wheels_changed = True - else: - wheels_changed = True + + gpu_choice = get_gpu_choice() + requirements_file = get_requirements_file(gpu_choice) if pull: # Read .whl lines before pulling @@ -409,19 +405,17 @@ def update_requirements(initial_installation=False, pull=True): print_big_message(f"File '{file}' was updated during 'git pull'. Please run the script again.") # Save state before exiting - current_state = {} + state = load_state() if wheels_changed: - current_state['wheels_changed'] = True - - with open(state_file, 'w') as f: - json.dump(current_state, f) - + state['wheels_changed'] = True + save_state(state) sys.exit(1) # Save current state - current_state = {'last_installed_commit': current_commit} - with open(state_file, 'w') as f: - json.dump(current_state, f) + state = load_state() + state['last_installed_commit'] = current_commit + state.pop('wheels_changed', None) # Remove wheels_changed flag + save_state(state) if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"): install_extensions_requirements() @@ -432,11 +426,10 @@ def update_requirements(initial_installation=False, pull=True): # Update PyTorch if not initial_installation: update_pytorch_and_python() - torver = torch_version() clean_outdated_pytorch_cuda_dependencies() print_big_message(f"Installing webui requirements from file: {requirements_file}") - print(f"TORCH: {torver}\n") + print(f"GPU Choice: {gpu_choice}\n") # Prepare the requirements file textgen_requirements = open(requirements_file).read().splitlines() From b30a73016d626e985e248de15fa65e5a531c8bd2 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 2 Jun 2025 07:49:22 -0700 Subject: [PATCH 51/59] Remove the "Is typing..." yield by default --- modules/chat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index f1ea16f1..3c4c3636 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -656,7 +656,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess update_message_metadata(output['metadata'], "user", row_idx, timestamp=get_current_timestamp()) # *Is typing...* - if loading_message: + if loading_message and shared.processing_message: yield { 'visible': output['visible'][:-1] + [[output['visible'][-1][0], shared.processing_message]], 'internal': output['internal'], @@ -680,7 +680,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess }) output['metadata'][key]["current_version_index"] = len(output['metadata'][key]["versions"]) - 1 - if loading_message: + if loading_message and shared.processing_message: yield { 'visible': output['visible'][:-1] + [[visible_text, shared.processing_message]], 'internal': output['internal'][:-1] + [[text, '']], From b38ec0ec385d44d49d3fe7adf2ad77ae62302214 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 2 Jun 2025 11:33:17 -0700 Subject: [PATCH 52/59] Update llama.cpp --- requirements/full/requirements.txt | 4 ++-- requirements/full/requirements_amd.txt | 4 ++-- requirements/full/requirements_amd_noavx2.txt | 4 ++-- requirements/full/requirements_apple_intel.txt | 4 ++-- requirements/full/requirements_apple_silicon.txt | 6 +++--- requirements/full/requirements_cpu_only.txt | 4 ++-- requirements/full/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/full/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements.txt | 4 ++-- requirements/portable/requirements_apple_intel.txt | 4 ++-- requirements/portable/requirements_apple_silicon.txt | 6 +++--- requirements/portable/requirements_cpu_only.txt | 4 ++-- requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/portable/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements_vulkan.txt | 4 ++-- requirements/portable/requirements_vulkan_noavx2.txt | 4 ++-- 16 files changed, 34 insertions(+), 34 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 04d97220..277f8249 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -34,8 +34,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index f807199d..dbf35c34 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -33,7 +33,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index 4fb70eb1..2e5eb6c9 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -33,7 +33,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index a311ab9b..9a19ab29 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -33,7 +33,7 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 30e8409a..973d9bfb 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -33,8 +33,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 70949949..4a48a51f 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -33,5 +33,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 318bb93a..76bde864 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -33,5 +33,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index 7c3635cc..6cd0fa65 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -34,8 +34,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index bde310e1..60ce941e 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -19,5 +19,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index 521edc0c..b1649bc9 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -19,5 +19,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index ef7946ff..571eba52 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -19,6 +19,6 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index a3ad743e..88170cf3 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -19,5 +19,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index eec052d3..e96cef49 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -19,5 +19,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index c9898a05..78f94aa5 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -19,5 +19,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 0de9c7cb..3e41427d 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -19,5 +19,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index 2bfb4d51..022ebb61 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -19,5 +19,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" From 93b3752cdf9f43dd391462168e2e14dd2ab75643 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 4 Jun 2025 09:40:30 -0700 Subject: [PATCH 53/59] Revert "Remove the "Is typing..." yield by default" This reverts commit b30a73016d626e985e248de15fa65e5a531c8bd2. --- modules/chat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 3c4c3636..f1ea16f1 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -656,7 +656,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess update_message_metadata(output['metadata'], "user", row_idx, timestamp=get_current_timestamp()) # *Is typing...* - if loading_message and shared.processing_message: + if loading_message: yield { 'visible': output['visible'][:-1] + [[output['visible'][-1][0], shared.processing_message]], 'internal': output['internal'], @@ -680,7 +680,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess }) output['metadata'][key]["current_version_index"] = len(output['metadata'][key]["versions"]) - 1 - if loading_message and shared.processing_message: + if loading_message: yield { 'visible': output['visible'][:-1] + [[visible_text, shared.processing_message]], 'internal': output['internal'][:-1] + [[text, '']], From 9bd7359ffab5e434b7cdfdb43ee91cb3ad397c0d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 4 Jun 2025 10:47:14 -0700 Subject: [PATCH 54/59] Scroll the textarea into view when editing a message --- js/global_scope_js.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/js/global_scope_js.js b/js/global_scope_js.js index d5140c93..801f1574 100644 --- a/js/global_scope_js.js +++ b/js/global_scope_js.js @@ -95,6 +95,12 @@ function startEditing(messageElement, messageBody, isUserMessage) { editingInterface.textarea.focus(); editingInterface.textarea.setSelectionRange(rawText.length, rawText.length); + // Scroll the textarea into view + editingInterface.textarea.scrollIntoView({ + behavior: "smooth", + block: "center" + }); + // Setup event handlers setupEditingHandlers(editingInterface.textarea, messageElement, originalHTML, messageBody, isUserMessage); } From 66a75c899a4b0786cd8744886a189864923287b5 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 4 Jun 2025 10:59:43 -0700 Subject: [PATCH 55/59] Improve the scrollbars in code blocks --- js/main.js | 1 + 1 file changed, 1 insertion(+) diff --git a/js/main.js b/js/main.js index 8090937f..2e72d82e 100644 --- a/js/main.js +++ b/js/main.js @@ -229,6 +229,7 @@ function doSyntaxHighlighting() { codeBlocks.forEach((codeBlock) => { hljs.highlightElement(codeBlock); codeBlock.setAttribute("data-highlighted", "true"); + codeBlock.classList.add("pretty_scrollbar"); }); renderMathInElement(messageBody, { From 3d676cd50f8661ca96a20a452611422acb47177c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 4 Jun 2025 11:02:04 -0700 Subject: [PATCH 56/59] Optimize syntax highlighting --- js/main.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/main.js b/js/main.js index 2e72d82e..9a620fa9 100644 --- a/js/main.js +++ b/js/main.js @@ -217,7 +217,7 @@ function isElementVisibleOnScreen(element) { } function doSyntaxHighlighting() { - const messageBodies = document.querySelectorAll(".message-body"); + const messageBodies = document.getElementById("chat").querySelectorAll(".message-body"); if (messageBodies.length > 0) { observer.disconnect(); From 3829507d0fd66eccc532b5d8d0e3d77c38143d0c Mon Sep 17 00:00:00 2001 From: Hanusz Leszek Date: Wed, 4 Jun 2025 20:13:36 +0200 Subject: [PATCH 57/59] Stop model during graceful shutdown (#7042) --- server.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/server.py b/server.py index c22ed1f1..99d2e171 100644 --- a/server.py +++ b/server.py @@ -60,6 +60,14 @@ from modules.utils import gradio def signal_handler(sig, frame): logger.info("Received Ctrl+C. Shutting down Text generation web UI gracefully.") + + # Explicitly stop LlamaServer to avoid __del__ cleanup issues during shutdown + if shared.model and shared.model.__class__.__name__ == 'LlamaServer': + try: + shared.model.stop() + except: + pass + sys.exit(0) From 977ec801b7682c3239fe3e6fdfcb8b90c1e802f3 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 5 Jun 2025 06:33:45 -0700 Subject: [PATCH 58/59] Improve table colors in instruct mode --- css/html_instruct_style.css | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css index 6ad250aa..9831ee8f 100644 --- a/css/html_instruct_style.css +++ b/css/html_instruct_style.css @@ -17,6 +17,14 @@ color: #d1d5db !important; } +.chat .message-body :is(th, td) { + border-color: #40404096 !important; +} + +.dark .chat .message-body :is(th, td) { + border-color: #ffffff75 !important; +} + .chat .message-body :is(p, ul, ol) { margin: 1.25em 0 !important; } From d47c8eb956a72ebc7c1f582718758697aef62118 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 5 Jun 2025 06:56:24 -0700 Subject: [PATCH 59/59] Remove quotes from LLM-generated websearch query (closes #7045). Fix by @Quiet-Joker --- modules/chat.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/chat.py b/modules/chat.py index f1ea16f1..14f2a4f7 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -604,7 +604,12 @@ def generate_search_query(user_message, state): query = "" for reply in generate_reply(formatted_prompt, search_state, stopping_strings=[], is_chat=True): - query = reply.strip() + query = reply + + # Strip and remove surrounding quotes if present + query = query.strip() + if len(query) >= 2 and query.startswith('"') and query.endswith('"'): + query = query[1:-1] return query