diff --git a/js/global_scope_js.js b/js/global_scope_js.js
index 3274f47e..d5140c93 100644
--- a/js/global_scope_js.js
+++ b/js/global_scope_js.js
@@ -229,10 +229,23 @@ function removeLastClick() {
document.getElementById("Remove-last").click();
}
-function handleMorphdomUpdate(text) {
+function handleMorphdomUpdate(data) {
+ // Determine target element and use it as query scope
+ var target_element, target_html;
+ if (data.last_message_only) {
+ const childNodes = document.getElementsByClassName("messages")[0].childNodes;
+ target_element = childNodes[childNodes.length - 1];
+ target_html = data.html;
+ } else {
+ target_element = document.getElementById("chat").parentNode;
+ target_html = "
'
+def generate_chat_html(history, name1, name2, reset_cache=False, last_message_only=False):
+ if not last_message_only:
+ output = f'
'
+ else:
+ output = ""
- for i in range(len(history['visible'])):
- row_visible = history['visible'][i]
- row_internal = history['internal'][i]
- converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+ def create_message(role, content, raw_content):
+ """Inner function for WPP-style messages."""
+ text_class = "text-you" if role == "user" else "text-bot"
- # Get timestamps
- user_timestamp = format_message_timestamp(history, "user", i)
- assistant_timestamp = format_message_timestamp(history, "assistant", i)
+ # Get role-specific data
+ timestamp = format_message_timestamp(history, role, i)
+ attachments = format_message_attachments(history, role, i)
- # Get attachments
- user_attachments = format_message_attachments(history, "user", i)
- assistant_attachments = format_message_attachments(history, "assistant", i)
+ # Create info button if timestamp exists
+ info_message = ""
+ if timestamp:
+ tooltip_text = get_message_tooltip(history, role, i)
+ info_message = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
- # Create info buttons for timestamps if they exist
- info_message_user = ""
- if user_timestamp != "":
- tooltip_text = get_message_tooltip(history, "user", i)
- info_message_user = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
-
- info_message_assistant = ""
- if assistant_timestamp != "":
- tooltip_text = get_message_tooltip(history, "assistant", i)
- info_message_assistant = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
-
- if converted_visible[0]: # Don't display empty user messages
- output += (
- f'
'
- f'
'
- f'
{converted_visible[0]}
'
- f'{user_attachments}'
- f'{actions_html(history, i, "user", info_message_user)}'
- f'
'
- f'
'
- )
-
- output += (
+ return (
f'
'
- f'
'
- f'
{converted_visible[1]}
'
- f'{assistant_attachments}'
- f'{actions_html(history, i, "assistant", info_message_assistant)}'
+ f'
'
+ f'
{content}
'
+ f'{attachments}'
+ f'{actions_html(history, i, role, info_message)}'
f'
'
f'
'
)
- output += "
"
+ # Determine range
+ start_idx = len(history['visible']) - 1 if last_message_only else 0
+ end_idx = len(history['visible'])
+
+ for i in range(start_idx, end_idx):
+ row_visible = history['visible'][i]
+ row_internal = history['internal'][i]
+
+ # Convert content
+ if last_message_only:
+ converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
+ else:
+ converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+
+ # Generate messages
+ if not last_message_only and converted_visible[0]:
+ output += create_message("user", converted_visible[0], row_internal[0])
+
+ output += create_message("assistant", converted_visible[1], row_internal[1])
+
+ if not last_message_only:
+ output += "
"
+
return output
@@ -644,15 +656,15 @@ def time_greeting():
return "Good evening!"
-def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False):
+def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False, last_message_only=False):
if len(history['visible']) == 0:
greeting = f"
{time_greeting()} How can I help you today?
"
result = f'
{greeting}
'
elif mode == 'instruct':
- result = generate_instruct_html(history)
+ result = generate_instruct_html(history, last_message_only=last_message_only)
elif style == 'wpp':
- result = generate_chat_html(history, name1, name2)
+ result = generate_chat_html(history, name1, name2, last_message_only=last_message_only)
else:
- result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache)
+ result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache=reset_cache, last_message_only=last_message_only)
- return {'html': result}
+ return {'html': result, 'last_message_only': last_message_only}
diff --git a/modules/shared.py b/modules/shared.py
index d2305f30..9a181f3e 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -47,7 +47,6 @@ settings = {
'max_new_tokens_max': 4096,
'prompt_lookup_num_tokens': 0,
'max_tokens_second': 0,
- 'max_updates_second': 12,
'auto_max_new_tokens': True,
'ban_eos_token': False,
'add_bos_token': True,
diff --git a/modules/text_generation.py b/modules/text_generation.py
index 1fd6d810..0d499d50 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -65,41 +65,39 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
all_stop_strings += st
shared.stop_everything = False
- last_update = -1
reply = ''
is_stream = state['stream']
if len(all_stop_strings) > 0 and not state['stream']:
state = copy.deepcopy(state)
state['stream'] = True
- min_update_interval = 0
- if state.get('max_updates_second', 0) > 0:
- min_update_interval = 1 / state['max_updates_second']
-
# Generate
+ last_update = -1
+ latency_threshold = 1 / 1000
for reply in generate_func(question, original_question, state, stopping_strings, is_chat=is_chat):
+ cur_time = time.monotonic()
reply, stop_found = apply_stopping_strings(reply, all_stop_strings)
if escape_html:
reply = html.escape(reply)
if is_stream:
- cur_time = time.time()
-
# Limit number of tokens/second to make text readable in real time
if state['max_tokens_second'] > 0:
diff = 1 / state['max_tokens_second'] - (cur_time - last_update)
if diff > 0:
time.sleep(diff)
- last_update = time.time()
+ last_update = time.monotonic()
yield reply
# Limit updates to avoid lag in the Gradio UI
# API updates are not limited
else:
- if cur_time - last_update > min_update_interval:
- last_update = cur_time
+ # If 'generate_func' takes less than 0.001 seconds to yield the next token
+ # (equivalent to more than 1000 tok/s), assume that the UI is lagging behind and skip yielding
+ if (cur_time - last_update) > latency_threshold:
yield reply
+ last_update = time.monotonic()
if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything):
break
diff --git a/modules/ui.py b/modules/ui.py
index 9f4d67cb..422db740 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -194,7 +194,6 @@ def list_interface_input_elements():
'max_new_tokens',
'prompt_lookup_num_tokens',
'max_tokens_second',
- 'max_updates_second',
'do_sample',
'dynamic_temperature',
'temperature_last',
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 822b77b8..a8eaadfa 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -18,7 +18,7 @@ def create_ui():
mu = shared.args.multi_user
shared.gradio['Chat input'] = gr.State()
- shared.gradio['history'] = gr.JSON(visible=False)
+ shared.gradio['history'] = gr.State()
with gr.Tab('Chat', id='Chat', elem_id='chat-tab'):
with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
@@ -195,7 +195,7 @@ def create_event_handlers():
shared.reload_inputs = gradio(reload_arr)
# Morph HTML updates instead of updating everything
- shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data.html)")
+ shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data)")
shared.gradio['Generate'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
index 733d0901..84f9fbfc 100644
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@@ -71,8 +71,6 @@ def create_ui(default_preset):
shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
- shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
-
with gr.Column():
with gr.Row():
with gr.Column():
diff --git a/user_data/settings-template.yaml b/user_data/settings-template.yaml
index ce0f77e1..db481e84 100644
--- a/user_data/settings-template.yaml
+++ b/user_data/settings-template.yaml
@@ -18,7 +18,6 @@ max_new_tokens_min: 1
max_new_tokens_max: 4096
prompt_lookup_num_tokens: 0
max_tokens_second: 0
-max_updates_second: 12
auto_max_new_tokens: true
ban_eos_token: false
add_bos_token: true