mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-06-07 14:17:09 -04:00
Merge 596ac3bf4c
into bf42b2c3a1
This commit is contained in:
commit
a080abc45c
9 changed files with 173 additions and 155 deletions
|
@ -229,10 +229,23 @@ function removeLastClick() {
|
||||||
document.getElementById("Remove-last").click();
|
document.getElementById("Remove-last").click();
|
||||||
}
|
}
|
||||||
|
|
||||||
function handleMorphdomUpdate(text) {
|
function handleMorphdomUpdate(data) {
|
||||||
|
// Determine target element and use it as query scope
|
||||||
|
var target_element, target_html;
|
||||||
|
if (data.last_message_only) {
|
||||||
|
const childNodes = document.getElementsByClassName("messages")[0].childNodes;
|
||||||
|
target_element = childNodes[childNodes.length - 1];
|
||||||
|
target_html = data.html;
|
||||||
|
} else {
|
||||||
|
target_element = document.getElementById("chat").parentNode;
|
||||||
|
target_html = "<div class=\"prose svelte-1ybaih5\">" + data.html + "</div>";
|
||||||
|
}
|
||||||
|
|
||||||
|
const queryScope = target_element;
|
||||||
|
|
||||||
// Track open blocks
|
// Track open blocks
|
||||||
const openBlocks = new Set();
|
const openBlocks = new Set();
|
||||||
document.querySelectorAll(".thinking-block").forEach(block => {
|
queryScope.querySelectorAll(".thinking-block").forEach(block => {
|
||||||
const blockId = block.getAttribute("data-block-id");
|
const blockId = block.getAttribute("data-block-id");
|
||||||
// If block exists and is open, add to open set
|
// If block exists and is open, add to open set
|
||||||
if (blockId && block.hasAttribute("open")) {
|
if (blockId && block.hasAttribute("open")) {
|
||||||
|
@ -242,7 +255,7 @@ function handleMorphdomUpdate(text) {
|
||||||
|
|
||||||
// Store scroll positions for any open blocks
|
// Store scroll positions for any open blocks
|
||||||
const scrollPositions = {};
|
const scrollPositions = {};
|
||||||
document.querySelectorAll(".thinking-block[open]").forEach(block => {
|
queryScope.querySelectorAll(".thinking-block[open]").forEach(block => {
|
||||||
const content = block.querySelector(".thinking-content");
|
const content = block.querySelector(".thinking-content");
|
||||||
const blockId = block.getAttribute("data-block-id");
|
const blockId = block.getAttribute("data-block-id");
|
||||||
if (content && blockId) {
|
if (content && blockId) {
|
||||||
|
@ -255,8 +268,8 @@ function handleMorphdomUpdate(text) {
|
||||||
});
|
});
|
||||||
|
|
||||||
morphdom(
|
morphdom(
|
||||||
document.getElementById("chat").parentNode,
|
target_element,
|
||||||
"<div class=\"prose svelte-1ybaih5\">" + text + "</div>",
|
target_html,
|
||||||
{
|
{
|
||||||
onBeforeElUpdated: function(fromEl, toEl) {
|
onBeforeElUpdated: function(fromEl, toEl) {
|
||||||
// Preserve code highlighting
|
// Preserve code highlighting
|
||||||
|
@ -307,7 +320,7 @@ function handleMorphdomUpdate(text) {
|
||||||
);
|
);
|
||||||
|
|
||||||
// Add toggle listeners for new blocks
|
// Add toggle listeners for new blocks
|
||||||
document.querySelectorAll(".thinking-block").forEach(block => {
|
queryScope.querySelectorAll(".thinking-block").forEach(block => {
|
||||||
if (!block._hasToggleListener) {
|
if (!block._hasToggleListener) {
|
||||||
block.addEventListener("toggle", function(e) {
|
block.addEventListener("toggle", function(e) {
|
||||||
if (this.open) {
|
if (this.open) {
|
||||||
|
|
|
@ -825,7 +825,7 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False):
|
||||||
last_save_time = time.monotonic()
|
last_save_time = time.monotonic()
|
||||||
save_interval = 8
|
save_interval = 8
|
||||||
for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True, for_ui=True)):
|
for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True, for_ui=True)):
|
||||||
yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']), history
|
yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'], last_message_only=(i > 0)), history
|
||||||
|
|
||||||
current_time = time.monotonic()
|
current_time = time.monotonic()
|
||||||
# Save on first iteration or if save_interval seconds have passed
|
# Save on first iteration or if save_interval seconds have passed
|
||||||
|
|
|
@ -462,64 +462,69 @@ def actions_html(history, i, role, info_message=""):
|
||||||
f'{version_nav_html}')
|
f'{version_nav_html}')
|
||||||
|
|
||||||
|
|
||||||
def generate_instruct_html(history):
|
def generate_instruct_html(history, last_message_only=False):
|
||||||
output = f'<style>{instruct_css}</style><div class="chat" id="chat" data-mode="instruct"><div class="messages">'
|
if not last_message_only:
|
||||||
|
output = f'<style>{instruct_css}</style><div class="chat" id="chat" data-mode="instruct"><div class="messages">'
|
||||||
|
else:
|
||||||
|
output = ""
|
||||||
|
|
||||||
for i in range(len(history['visible'])):
|
def create_message(role, content, raw_content):
|
||||||
row_visible = history['visible'][i]
|
"""Inner function that captures variables from outer scope."""
|
||||||
row_internal = history['internal'][i]
|
class_name = "user-message" if role == "user" else "assistant-message"
|
||||||
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
|
||||||
|
|
||||||
# Get timestamps
|
# Get role-specific data
|
||||||
user_timestamp = format_message_timestamp(history, "user", i)
|
timestamp = format_message_timestamp(history, role, i)
|
||||||
assistant_timestamp = format_message_timestamp(history, "assistant", i)
|
attachments = format_message_attachments(history, role, i)
|
||||||
|
|
||||||
# Get attachments
|
# Create info button if timestamp exists
|
||||||
user_attachments = format_message_attachments(history, "user", i)
|
info_message = ""
|
||||||
assistant_attachments = format_message_attachments(history, "assistant", i)
|
if timestamp:
|
||||||
|
tooltip_text = get_message_tooltip(history, role, i)
|
||||||
|
info_message = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
|
||||||
|
|
||||||
# Create info buttons for timestamps if they exist
|
return (
|
||||||
info_message_user = ""
|
f'<div class="{class_name}" '
|
||||||
if user_timestamp != "":
|
f'data-raw="{html.escape(raw_content, quote=True)}"'
|
||||||
tooltip_text = get_message_tooltip(history, "user", i)
|
|
||||||
info_message_user = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
|
|
||||||
|
|
||||||
info_message_assistant = ""
|
|
||||||
if assistant_timestamp != "":
|
|
||||||
tooltip_text = get_message_tooltip(history, "assistant", i)
|
|
||||||
info_message_assistant = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
|
|
||||||
|
|
||||||
if converted_visible[0]: # Don't display empty user messages
|
|
||||||
output += (
|
|
||||||
f'<div class="user-message" '
|
|
||||||
f'data-raw="{html.escape(row_internal[0], quote=True)}"'
|
|
||||||
f'data-index={i}>'
|
|
||||||
f'<div class="text">'
|
|
||||||
f'<div class="message-body">{converted_visible[0]}</div>'
|
|
||||||
f'{user_attachments}'
|
|
||||||
f'{actions_html(history, i, "user", info_message_user)}'
|
|
||||||
f'</div>'
|
|
||||||
f'</div>'
|
|
||||||
)
|
|
||||||
|
|
||||||
output += (
|
|
||||||
f'<div class="assistant-message" '
|
|
||||||
f'data-raw="{html.escape(row_internal[1], quote=True)}"'
|
|
||||||
f'data-index={i}>'
|
f'data-index={i}>'
|
||||||
f'<div class="text">'
|
f'<div class="text">'
|
||||||
f'<div class="message-body">{converted_visible[1]}</div>'
|
f'<div class="message-body">{content}</div>'
|
||||||
f'{assistant_attachments}'
|
f'{attachments}'
|
||||||
f'{actions_html(history, i, "assistant", info_message_assistant)}'
|
f'{actions_html(history, i, role, info_message)}'
|
||||||
f'</div>'
|
f'</div>'
|
||||||
f'</div>'
|
f'</div>'
|
||||||
)
|
)
|
||||||
|
|
||||||
output += "</div></div>"
|
# Determine range
|
||||||
|
start_idx = len(history['visible']) - 1 if last_message_only else 0
|
||||||
|
end_idx = len(history['visible'])
|
||||||
|
|
||||||
|
for i in range(start_idx, end_idx):
|
||||||
|
row_visible = history['visible'][i]
|
||||||
|
row_internal = history['internal'][i]
|
||||||
|
|
||||||
|
# Convert content
|
||||||
|
if last_message_only:
|
||||||
|
converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
|
||||||
|
else:
|
||||||
|
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
||||||
|
|
||||||
|
# Generate messages
|
||||||
|
if not last_message_only and converted_visible[0]:
|
||||||
|
output += create_message("user", converted_visible[0], row_internal[0])
|
||||||
|
|
||||||
|
output += create_message("assistant", converted_visible[1], row_internal[1])
|
||||||
|
|
||||||
|
if not last_message_only:
|
||||||
|
output += "</div></div>"
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False):
|
def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False, last_message_only=False):
|
||||||
output = f'<style>{chat_styles[style]}</style><div class="chat" id="chat"><div class="messages">'
|
if not last_message_only:
|
||||||
|
output = f'<style>{chat_styles[style]}</style><div class="chat" id="chat"><div class="messages">'
|
||||||
|
else:
|
||||||
|
output = ""
|
||||||
|
|
||||||
# We use ?character and ?time.time() to force the browser to reset caches
|
# We use ?character and ?time.time() to force the browser to reset caches
|
||||||
img_bot = (
|
img_bot = (
|
||||||
|
@ -527,110 +532,117 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
|
||||||
if Path("user_data/cache/pfp_character_thumb.png").exists() else ''
|
if Path("user_data/cache/pfp_character_thumb.png").exists() else ''
|
||||||
)
|
)
|
||||||
|
|
||||||
img_me = (
|
def create_message(role, content, raw_content):
|
||||||
f'<img src="file/user_data/cache/pfp_me.png?{time.time() if reset_cache else ""}">'
|
"""Inner function for CAI-style messages."""
|
||||||
if Path("user_data/cache/pfp_me.png").exists() else ''
|
circle_class = "circle-you" if role == "user" else "circle-bot"
|
||||||
)
|
name = name1 if role == "user" else name2
|
||||||
|
|
||||||
for i in range(len(history['visible'])):
|
# Get role-specific data
|
||||||
row_visible = history['visible'][i]
|
timestamp = format_message_timestamp(history, role, i, tooltip_include_timestamp=False)
|
||||||
row_internal = history['internal'][i]
|
attachments = format_message_attachments(history, role, i)
|
||||||
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
|
||||||
|
|
||||||
# Get timestamps
|
# Get appropriate image
|
||||||
user_timestamp = format_message_timestamp(history, "user", i, tooltip_include_timestamp=False)
|
if role == "user":
|
||||||
assistant_timestamp = format_message_timestamp(history, "assistant", i, tooltip_include_timestamp=False)
|
img = (f'<img src="file/user_data/cache/pfp_me.png?{time.time() if reset_cache else ""}">'
|
||||||
|
if Path("user_data/cache/pfp_me.png").exists() else '')
|
||||||
|
else:
|
||||||
|
img = img_bot
|
||||||
|
|
||||||
# Get attachments
|
return (
|
||||||
user_attachments = format_message_attachments(history, "user", i)
|
|
||||||
assistant_attachments = format_message_attachments(history, "assistant", i)
|
|
||||||
|
|
||||||
if converted_visible[0]: # Don't display empty user messages
|
|
||||||
output += (
|
|
||||||
f'<div class="message" '
|
|
||||||
f'data-raw="{html.escape(row_internal[0], quote=True)}"'
|
|
||||||
f'data-index={i}>'
|
|
||||||
f'<div class="circle-you">{img_me}</div>'
|
|
||||||
f'<div class="text">'
|
|
||||||
f'<div class="username">{name1}{user_timestamp}</div>'
|
|
||||||
f'<div class="message-body">{converted_visible[0]}</div>'
|
|
||||||
f'{user_attachments}'
|
|
||||||
f'{actions_html(history, i, "user")}'
|
|
||||||
f'</div>'
|
|
||||||
f'</div>'
|
|
||||||
)
|
|
||||||
|
|
||||||
output += (
|
|
||||||
f'<div class="message" '
|
f'<div class="message" '
|
||||||
f'data-raw="{html.escape(row_internal[1], quote=True)}"'
|
f'data-raw="{html.escape(raw_content, quote=True)}"'
|
||||||
f'data-index={i}>'
|
f'data-index={i}>'
|
||||||
f'<div class="circle-bot">{img_bot}</div>'
|
f'<div class="{circle_class}">{img}</div>'
|
||||||
f'<div class="text">'
|
f'<div class="text">'
|
||||||
f'<div class="username">{name2}{assistant_timestamp}</div>'
|
f'<div class="username">{name}{timestamp}</div>'
|
||||||
f'<div class="message-body">{converted_visible[1]}</div>'
|
f'<div class="message-body">{content}</div>'
|
||||||
f'{assistant_attachments}'
|
f'{attachments}'
|
||||||
f'{actions_html(history, i, "assistant")}'
|
f'{actions_html(history, i, role)}'
|
||||||
f'</div>'
|
f'</div>'
|
||||||
f'</div>'
|
f'</div>'
|
||||||
)
|
)
|
||||||
|
|
||||||
output += "</div></div>"
|
# Determine range
|
||||||
|
start_idx = len(history['visible']) - 1 if last_message_only else 0
|
||||||
|
end_idx = len(history['visible'])
|
||||||
|
|
||||||
|
for i in range(start_idx, end_idx):
|
||||||
|
row_visible = history['visible'][i]
|
||||||
|
row_internal = history['internal'][i]
|
||||||
|
|
||||||
|
# Convert content
|
||||||
|
if last_message_only:
|
||||||
|
converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
|
||||||
|
else:
|
||||||
|
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
||||||
|
|
||||||
|
# Generate messages
|
||||||
|
if not last_message_only and converted_visible[0]:
|
||||||
|
output += create_message("user", converted_visible[0], row_internal[0])
|
||||||
|
|
||||||
|
output += create_message("assistant", converted_visible[1], row_internal[1])
|
||||||
|
|
||||||
|
if not last_message_only:
|
||||||
|
output += "</div></div>"
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
def generate_chat_html(history, name1, name2, reset_cache=False):
|
def generate_chat_html(history, name1, name2, reset_cache=False, last_message_only=False):
|
||||||
output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat"><div class="messages">'
|
if not last_message_only:
|
||||||
|
output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat"><div class="messages">'
|
||||||
|
else:
|
||||||
|
output = ""
|
||||||
|
|
||||||
for i in range(len(history['visible'])):
|
def create_message(role, content, raw_content):
|
||||||
row_visible = history['visible'][i]
|
"""Inner function for WPP-style messages."""
|
||||||
row_internal = history['internal'][i]
|
text_class = "text-you" if role == "user" else "text-bot"
|
||||||
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
|
||||||
|
|
||||||
# Get timestamps
|
# Get role-specific data
|
||||||
user_timestamp = format_message_timestamp(history, "user", i)
|
timestamp = format_message_timestamp(history, role, i)
|
||||||
assistant_timestamp = format_message_timestamp(history, "assistant", i)
|
attachments = format_message_attachments(history, role, i)
|
||||||
|
|
||||||
# Get attachments
|
# Create info button if timestamp exists
|
||||||
user_attachments = format_message_attachments(history, "user", i)
|
info_message = ""
|
||||||
assistant_attachments = format_message_attachments(history, "assistant", i)
|
if timestamp:
|
||||||
|
tooltip_text = get_message_tooltip(history, role, i)
|
||||||
|
info_message = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
|
||||||
|
|
||||||
# Create info buttons for timestamps if they exist
|
return (
|
||||||
info_message_user = ""
|
|
||||||
if user_timestamp != "":
|
|
||||||
tooltip_text = get_message_tooltip(history, "user", i)
|
|
||||||
info_message_user = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
|
|
||||||
|
|
||||||
info_message_assistant = ""
|
|
||||||
if assistant_timestamp != "":
|
|
||||||
tooltip_text = get_message_tooltip(history, "assistant", i)
|
|
||||||
info_message_assistant = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
|
|
||||||
|
|
||||||
if converted_visible[0]: # Don't display empty user messages
|
|
||||||
output += (
|
|
||||||
f'<div class="message" '
|
|
||||||
f'data-raw="{html.escape(row_internal[0], quote=True)}"'
|
|
||||||
f'data-index={i}>'
|
|
||||||
f'<div class="text-you">'
|
|
||||||
f'<div class="message-body">{converted_visible[0]}</div>'
|
|
||||||
f'{user_attachments}'
|
|
||||||
f'{actions_html(history, i, "user", info_message_user)}'
|
|
||||||
f'</div>'
|
|
||||||
f'</div>'
|
|
||||||
)
|
|
||||||
|
|
||||||
output += (
|
|
||||||
f'<div class="message" '
|
f'<div class="message" '
|
||||||
f'data-raw="{html.escape(row_internal[1], quote=True)}"'
|
f'data-raw="{html.escape(raw_content, quote=True)}"'
|
||||||
f'data-index={i}>'
|
f'data-index={i}>'
|
||||||
f'<div class="text-bot">'
|
f'<div class="{text_class}">'
|
||||||
f'<div class="message-body">{converted_visible[1]}</div>'
|
f'<div class="message-body">{content}</div>'
|
||||||
f'{assistant_attachments}'
|
f'{attachments}'
|
||||||
f'{actions_html(history, i, "assistant", info_message_assistant)}'
|
f'{actions_html(history, i, role, info_message)}'
|
||||||
f'</div>'
|
f'</div>'
|
||||||
f'</div>'
|
f'</div>'
|
||||||
)
|
)
|
||||||
|
|
||||||
output += "</div></div>"
|
# Determine range
|
||||||
|
start_idx = len(history['visible']) - 1 if last_message_only else 0
|
||||||
|
end_idx = len(history['visible'])
|
||||||
|
|
||||||
|
for i in range(start_idx, end_idx):
|
||||||
|
row_visible = history['visible'][i]
|
||||||
|
row_internal = history['internal'][i]
|
||||||
|
|
||||||
|
# Convert content
|
||||||
|
if last_message_only:
|
||||||
|
converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
|
||||||
|
else:
|
||||||
|
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
||||||
|
|
||||||
|
# Generate messages
|
||||||
|
if not last_message_only and converted_visible[0]:
|
||||||
|
output += create_message("user", converted_visible[0], row_internal[0])
|
||||||
|
|
||||||
|
output += create_message("assistant", converted_visible[1], row_internal[1])
|
||||||
|
|
||||||
|
if not last_message_only:
|
||||||
|
output += "</div></div>"
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
@ -644,15 +656,15 @@ def time_greeting():
|
||||||
return "Good evening!"
|
return "Good evening!"
|
||||||
|
|
||||||
|
|
||||||
def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False):
|
def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False, last_message_only=False):
|
||||||
if len(history['visible']) == 0:
|
if len(history['visible']) == 0:
|
||||||
greeting = f"<div class=\"welcome-greeting\">{time_greeting()} How can I help you today?</div>"
|
greeting = f"<div class=\"welcome-greeting\">{time_greeting()} How can I help you today?</div>"
|
||||||
result = f'<div class="chat" id="chat">{greeting}</div>'
|
result = f'<div class="chat" id="chat">{greeting}</div>'
|
||||||
elif mode == 'instruct':
|
elif mode == 'instruct':
|
||||||
result = generate_instruct_html(history)
|
result = generate_instruct_html(history, last_message_only=last_message_only)
|
||||||
elif style == 'wpp':
|
elif style == 'wpp':
|
||||||
result = generate_chat_html(history, name1, name2)
|
result = generate_chat_html(history, name1, name2, last_message_only=last_message_only)
|
||||||
else:
|
else:
|
||||||
result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache)
|
result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache=reset_cache, last_message_only=last_message_only)
|
||||||
|
|
||||||
return {'html': result}
|
return {'html': result, 'last_message_only': last_message_only}
|
||||||
|
|
|
@ -47,7 +47,6 @@ settings = {
|
||||||
'max_new_tokens_max': 4096,
|
'max_new_tokens_max': 4096,
|
||||||
'prompt_lookup_num_tokens': 0,
|
'prompt_lookup_num_tokens': 0,
|
||||||
'max_tokens_second': 0,
|
'max_tokens_second': 0,
|
||||||
'max_updates_second': 12,
|
|
||||||
'auto_max_new_tokens': True,
|
'auto_max_new_tokens': True,
|
||||||
'ban_eos_token': False,
|
'ban_eos_token': False,
|
||||||
'add_bos_token': True,
|
'add_bos_token': True,
|
||||||
|
|
|
@ -65,41 +65,39 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
|
||||||
all_stop_strings += st
|
all_stop_strings += st
|
||||||
|
|
||||||
shared.stop_everything = False
|
shared.stop_everything = False
|
||||||
last_update = -1
|
|
||||||
reply = ''
|
reply = ''
|
||||||
is_stream = state['stream']
|
is_stream = state['stream']
|
||||||
if len(all_stop_strings) > 0 and not state['stream']:
|
if len(all_stop_strings) > 0 and not state['stream']:
|
||||||
state = copy.deepcopy(state)
|
state = copy.deepcopy(state)
|
||||||
state['stream'] = True
|
state['stream'] = True
|
||||||
|
|
||||||
min_update_interval = 0
|
|
||||||
if state.get('max_updates_second', 0) > 0:
|
|
||||||
min_update_interval = 1 / state['max_updates_second']
|
|
||||||
|
|
||||||
# Generate
|
# Generate
|
||||||
|
last_update = -1
|
||||||
|
latency_threshold = 1 / 1000
|
||||||
for reply in generate_func(question, original_question, state, stopping_strings, is_chat=is_chat):
|
for reply in generate_func(question, original_question, state, stopping_strings, is_chat=is_chat):
|
||||||
|
cur_time = time.monotonic()
|
||||||
reply, stop_found = apply_stopping_strings(reply, all_stop_strings)
|
reply, stop_found = apply_stopping_strings(reply, all_stop_strings)
|
||||||
if escape_html:
|
if escape_html:
|
||||||
reply = html.escape(reply)
|
reply = html.escape(reply)
|
||||||
|
|
||||||
if is_stream:
|
if is_stream:
|
||||||
cur_time = time.time()
|
|
||||||
|
|
||||||
# Limit number of tokens/second to make text readable in real time
|
# Limit number of tokens/second to make text readable in real time
|
||||||
if state['max_tokens_second'] > 0:
|
if state['max_tokens_second'] > 0:
|
||||||
diff = 1 / state['max_tokens_second'] - (cur_time - last_update)
|
diff = 1 / state['max_tokens_second'] - (cur_time - last_update)
|
||||||
if diff > 0:
|
if diff > 0:
|
||||||
time.sleep(diff)
|
time.sleep(diff)
|
||||||
|
|
||||||
last_update = time.time()
|
last_update = time.monotonic()
|
||||||
yield reply
|
yield reply
|
||||||
|
|
||||||
# Limit updates to avoid lag in the Gradio UI
|
# Limit updates to avoid lag in the Gradio UI
|
||||||
# API updates are not limited
|
# API updates are not limited
|
||||||
else:
|
else:
|
||||||
if cur_time - last_update > min_update_interval:
|
# If 'generate_func' takes less than 0.001 seconds to yield the next token
|
||||||
last_update = cur_time
|
# (equivalent to more than 1000 tok/s), assume that the UI is lagging behind and skip yielding
|
||||||
|
if (cur_time - last_update) > latency_threshold:
|
||||||
yield reply
|
yield reply
|
||||||
|
last_update = time.monotonic()
|
||||||
|
|
||||||
if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything):
|
if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything):
|
||||||
break
|
break
|
||||||
|
|
|
@ -194,7 +194,6 @@ def list_interface_input_elements():
|
||||||
'max_new_tokens',
|
'max_new_tokens',
|
||||||
'prompt_lookup_num_tokens',
|
'prompt_lookup_num_tokens',
|
||||||
'max_tokens_second',
|
'max_tokens_second',
|
||||||
'max_updates_second',
|
|
||||||
'do_sample',
|
'do_sample',
|
||||||
'dynamic_temperature',
|
'dynamic_temperature',
|
||||||
'temperature_last',
|
'temperature_last',
|
||||||
|
|
|
@ -18,7 +18,7 @@ def create_ui():
|
||||||
mu = shared.args.multi_user
|
mu = shared.args.multi_user
|
||||||
|
|
||||||
shared.gradio['Chat input'] = gr.State()
|
shared.gradio['Chat input'] = gr.State()
|
||||||
shared.gradio['history'] = gr.JSON(visible=False)
|
shared.gradio['history'] = gr.State()
|
||||||
|
|
||||||
with gr.Tab('Chat', id='Chat', elem_id='chat-tab'):
|
with gr.Tab('Chat', id='Chat', elem_id='chat-tab'):
|
||||||
with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
|
with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
|
||||||
|
@ -195,7 +195,7 @@ def create_event_handlers():
|
||||||
shared.reload_inputs = gradio(reload_arr)
|
shared.reload_inputs = gradio(reload_arr)
|
||||||
|
|
||||||
# Morph HTML updates instead of updating everything
|
# Morph HTML updates instead of updating everything
|
||||||
shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data.html)")
|
shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data)")
|
||||||
|
|
||||||
shared.gradio['Generate'].click(
|
shared.gradio['Generate'].click(
|
||||||
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
||||||
|
|
|
@ -71,8 +71,6 @@ def create_ui(default_preset):
|
||||||
shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
|
shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
|
||||||
shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
|
shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
|
||||||
shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
|
shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
|
||||||
shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
|
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
|
|
|
@ -18,7 +18,6 @@ max_new_tokens_min: 1
|
||||||
max_new_tokens_max: 4096
|
max_new_tokens_max: 4096
|
||||||
prompt_lookup_num_tokens: 0
|
prompt_lookup_num_tokens: 0
|
||||||
max_tokens_second: 0
|
max_tokens_second: 0
|
||||||
max_updates_second: 12
|
|
||||||
auto_max_new_tokens: true
|
auto_max_new_tokens: true
|
||||||
ban_eos_token: false
|
ban_eos_token: false
|
||||||
add_bos_token: true
|
add_bos_token: true
|
||||||
|
|
Loading…
Add table
Reference in a new issue