diff --git a/js/global_scope_js.js b/js/global_scope_js.js index 3274f47e..d5140c93 100644 --- a/js/global_scope_js.js +++ b/js/global_scope_js.js @@ -229,10 +229,23 @@ function removeLastClick() { document.getElementById("Remove-last").click(); } -function handleMorphdomUpdate(text) { +function handleMorphdomUpdate(data) { + // Determine target element and use it as query scope + var target_element, target_html; + if (data.last_message_only) { + const childNodes = document.getElementsByClassName("messages")[0].childNodes; + target_element = childNodes[childNodes.length - 1]; + target_html = data.html; + } else { + target_element = document.getElementById("chat").parentNode; + target_html = "
" + data.html + "
"; + } + + const queryScope = target_element; + // Track open blocks const openBlocks = new Set(); - document.querySelectorAll(".thinking-block").forEach(block => { + queryScope.querySelectorAll(".thinking-block").forEach(block => { const blockId = block.getAttribute("data-block-id"); // If block exists and is open, add to open set if (blockId && block.hasAttribute("open")) { @@ -242,7 +255,7 @@ function handleMorphdomUpdate(text) { // Store scroll positions for any open blocks const scrollPositions = {}; - document.querySelectorAll(".thinking-block[open]").forEach(block => { + queryScope.querySelectorAll(".thinking-block[open]").forEach(block => { const content = block.querySelector(".thinking-content"); const blockId = block.getAttribute("data-block-id"); if (content && blockId) { @@ -255,8 +268,8 @@ function handleMorphdomUpdate(text) { }); morphdom( - document.getElementById("chat").parentNode, - "
" + text + "
", + target_element, + target_html, { onBeforeElUpdated: function(fromEl, toEl) { // Preserve code highlighting @@ -307,7 +320,7 @@ function handleMorphdomUpdate(text) { ); // Add toggle listeners for new blocks - document.querySelectorAll(".thinking-block").forEach(block => { + queryScope.querySelectorAll(".thinking-block").forEach(block => { if (!block._hasToggleListener) { block.addEventListener("toggle", function(e) { if (this.open) { diff --git a/modules/chat.py b/modules/chat.py index 1222d2bb..fff82613 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -825,7 +825,7 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False): last_save_time = time.monotonic() save_interval = 8 for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True, for_ui=True)): - yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']), history + yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'], last_message_only=(i > 0)), history current_time = time.monotonic() # Save on first iteration or if save_interval seconds have passed diff --git a/modules/html_generator.py b/modules/html_generator.py index 03b5d485..f90e3b04 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -462,64 +462,69 @@ def actions_html(history, i, role, info_message=""): f'{version_nav_html}') -def generate_instruct_html(history): - output = f'
' +def generate_instruct_html(history, last_message_only=False): + if not last_message_only: + output = f'
' + else: + output = "" - for i in range(len(history['visible'])): - row_visible = history['visible'][i] - row_internal = history['internal'][i] - converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible] + def create_message(role, content, raw_content): + """Inner function that captures variables from outer scope.""" + class_name = "user-message" if role == "user" else "assistant-message" - # Get timestamps - user_timestamp = format_message_timestamp(history, "user", i) - assistant_timestamp = format_message_timestamp(history, "assistant", i) + # Get role-specific data + timestamp = format_message_timestamp(history, role, i) + attachments = format_message_attachments(history, role, i) - # Get attachments - user_attachments = format_message_attachments(history, "user", i) - assistant_attachments = format_message_attachments(history, "assistant", i) + # Create info button if timestamp exists + info_message = "" + if timestamp: + tooltip_text = get_message_tooltip(history, role, i) + info_message = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"') - # Create info buttons for timestamps if they exist - info_message_user = "" - if user_timestamp != "": - tooltip_text = get_message_tooltip(history, "user", i) - info_message_user = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"') - - info_message_assistant = "" - if assistant_timestamp != "": - tooltip_text = get_message_tooltip(history, "assistant", i) - info_message_assistant = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"') - - if converted_visible[0]: # Don't display empty user messages - output += ( - f'
' - f'
' - f'
{converted_visible[0]}
' - f'{user_attachments}' - f'{actions_html(history, i, "user", info_message_user)}' - f'
' - f'
' - ) - - output += ( - f'
' f'
' - f'
{converted_visible[1]}
' - f'{assistant_attachments}' - f'{actions_html(history, i, "assistant", info_message_assistant)}' + f'
{content}
' + f'{attachments}' + f'{actions_html(history, i, role, info_message)}' f'
' f'
' ) - output += "
" + # Determine range + start_idx = len(history['visible']) - 1 if last_message_only else 0 + end_idx = len(history['visible']) + + for i in range(start_idx, end_idx): + row_visible = history['visible'][i] + row_internal = history['internal'][i] + + # Convert content + if last_message_only: + converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)] + else: + converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible] + + # Generate messages + if not last_message_only and converted_visible[0]: + output += create_message("user", converted_visible[0], row_internal[0]) + + output += create_message("assistant", converted_visible[1], row_internal[1]) + + if not last_message_only: + output += "
" + return output -def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False): - output = f'
' +def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False, last_message_only=False): + if not last_message_only: + output = f'
' + else: + output = "" # We use ?character and ?time.time() to force the browser to reset caches img_bot = ( @@ -527,110 +532,117 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache= if Path("user_data/cache/pfp_character_thumb.png").exists() else '' ) - img_me = ( - f'' - if Path("user_data/cache/pfp_me.png").exists() else '' - ) + def create_message(role, content, raw_content): + """Inner function for CAI-style messages.""" + circle_class = "circle-you" if role == "user" else "circle-bot" + name = name1 if role == "user" else name2 - for i in range(len(history['visible'])): - row_visible = history['visible'][i] - row_internal = history['internal'][i] - converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible] + # Get role-specific data + timestamp = format_message_timestamp(history, role, i, tooltip_include_timestamp=False) + attachments = format_message_attachments(history, role, i) - # Get timestamps - user_timestamp = format_message_timestamp(history, "user", i, tooltip_include_timestamp=False) - assistant_timestamp = format_message_timestamp(history, "assistant", i, tooltip_include_timestamp=False) + # Get appropriate image + if role == "user": + img = (f'' + if Path("user_data/cache/pfp_me.png").exists() else '') + else: + img = img_bot - # Get attachments - user_attachments = format_message_attachments(history, "user", i) - assistant_attachments = format_message_attachments(history, "assistant", i) - - if converted_visible[0]: # Don't display empty user messages - output += ( - f'
' - f'
{img_me}
' - f'
' - f'
{name1}{user_timestamp}
' - f'
{converted_visible[0]}
' - f'{user_attachments}' - f'{actions_html(history, i, "user")}' - f'
' - f'
' - ) - - output += ( + return ( f'
' - f'
{img_bot}
' + f'
{img}
' f'
' - f'
{name2}{assistant_timestamp}
' - f'
{converted_visible[1]}
' - f'{assistant_attachments}' - f'{actions_html(history, i, "assistant")}' + f'
{name}{timestamp}
' + f'
{content}
' + f'{attachments}' + f'{actions_html(history, i, role)}' f'
' f'
' ) - output += "
" + # Determine range + start_idx = len(history['visible']) - 1 if last_message_only else 0 + end_idx = len(history['visible']) + + for i in range(start_idx, end_idx): + row_visible = history['visible'][i] + row_internal = history['internal'][i] + + # Convert content + if last_message_only: + converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)] + else: + converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible] + + # Generate messages + if not last_message_only and converted_visible[0]: + output += create_message("user", converted_visible[0], row_internal[0]) + + output += create_message("assistant", converted_visible[1], row_internal[1]) + + if not last_message_only: + output += "
" + return output -def generate_chat_html(history, name1, name2, reset_cache=False): - output = f'
' +def generate_chat_html(history, name1, name2, reset_cache=False, last_message_only=False): + if not last_message_only: + output = f'
' + else: + output = "" - for i in range(len(history['visible'])): - row_visible = history['visible'][i] - row_internal = history['internal'][i] - converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible] + def create_message(role, content, raw_content): + """Inner function for WPP-style messages.""" + text_class = "text-you" if role == "user" else "text-bot" - # Get timestamps - user_timestamp = format_message_timestamp(history, "user", i) - assistant_timestamp = format_message_timestamp(history, "assistant", i) + # Get role-specific data + timestamp = format_message_timestamp(history, role, i) + attachments = format_message_attachments(history, role, i) - # Get attachments - user_attachments = format_message_attachments(history, "user", i) - assistant_attachments = format_message_attachments(history, "assistant", i) + # Create info button if timestamp exists + info_message = "" + if timestamp: + tooltip_text = get_message_tooltip(history, role, i) + info_message = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"') - # Create info buttons for timestamps if they exist - info_message_user = "" - if user_timestamp != "": - tooltip_text = get_message_tooltip(history, "user", i) - info_message_user = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"') - - info_message_assistant = "" - if assistant_timestamp != "": - tooltip_text = get_message_tooltip(history, "assistant", i) - info_message_assistant = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"') - - if converted_visible[0]: # Don't display empty user messages - output += ( - f'
' - f'
' - f'
{converted_visible[0]}
' - f'{user_attachments}' - f'{actions_html(history, i, "user", info_message_user)}' - f'
' - f'
' - ) - - output += ( + return ( f'
' - f'
' - f'
{converted_visible[1]}
' - f'{assistant_attachments}' - f'{actions_html(history, i, "assistant", info_message_assistant)}' + f'
' + f'
{content}
' + f'{attachments}' + f'{actions_html(history, i, role, info_message)}' f'
' f'
' ) - output += "
" + # Determine range + start_idx = len(history['visible']) - 1 if last_message_only else 0 + end_idx = len(history['visible']) + + for i in range(start_idx, end_idx): + row_visible = history['visible'][i] + row_internal = history['internal'][i] + + # Convert content + if last_message_only: + converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)] + else: + converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible] + + # Generate messages + if not last_message_only and converted_visible[0]: + output += create_message("user", converted_visible[0], row_internal[0]) + + output += create_message("assistant", converted_visible[1], row_internal[1]) + + if not last_message_only: + output += "
" + return output @@ -644,15 +656,15 @@ def time_greeting(): return "Good evening!" -def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False): +def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False, last_message_only=False): if len(history['visible']) == 0: greeting = f"
{time_greeting()} How can I help you today?
" result = f'
{greeting}
' elif mode == 'instruct': - result = generate_instruct_html(history) + result = generate_instruct_html(history, last_message_only=last_message_only) elif style == 'wpp': - result = generate_chat_html(history, name1, name2) + result = generate_chat_html(history, name1, name2, last_message_only=last_message_only) else: - result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache) + result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache=reset_cache, last_message_only=last_message_only) - return {'html': result} + return {'html': result, 'last_message_only': last_message_only} diff --git a/modules/shared.py b/modules/shared.py index d2305f30..9a181f3e 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -47,7 +47,6 @@ settings = { 'max_new_tokens_max': 4096, 'prompt_lookup_num_tokens': 0, 'max_tokens_second': 0, - 'max_updates_second': 12, 'auto_max_new_tokens': True, 'ban_eos_token': False, 'add_bos_token': True, diff --git a/modules/text_generation.py b/modules/text_generation.py index 1fd6d810..0d499d50 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -65,41 +65,39 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap all_stop_strings += st shared.stop_everything = False - last_update = -1 reply = '' is_stream = state['stream'] if len(all_stop_strings) > 0 and not state['stream']: state = copy.deepcopy(state) state['stream'] = True - min_update_interval = 0 - if state.get('max_updates_second', 0) > 0: - min_update_interval = 1 / state['max_updates_second'] - # Generate + last_update = -1 + latency_threshold = 1 / 1000 for reply in generate_func(question, original_question, state, stopping_strings, is_chat=is_chat): + cur_time = time.monotonic() reply, stop_found = apply_stopping_strings(reply, all_stop_strings) if escape_html: reply = html.escape(reply) if is_stream: - cur_time = time.time() - # Limit number of tokens/second to make text readable in real time if state['max_tokens_second'] > 0: diff = 1 / state['max_tokens_second'] - (cur_time - last_update) if diff > 0: time.sleep(diff) - last_update = time.time() + last_update = time.monotonic() yield reply # Limit updates to avoid lag in the Gradio UI # API updates are not limited else: - if cur_time - last_update > min_update_interval: - last_update = cur_time + # If 'generate_func' takes less than 0.001 seconds to yield the next token + # (equivalent to more than 1000 tok/s), assume that the UI is lagging behind and skip yielding + if (cur_time - last_update) > latency_threshold: yield reply + last_update = time.monotonic() if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything): break diff --git a/modules/ui.py b/modules/ui.py index 9f4d67cb..422db740 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -194,7 +194,6 @@ def list_interface_input_elements(): 'max_new_tokens', 'prompt_lookup_num_tokens', 'max_tokens_second', - 'max_updates_second', 'do_sample', 'dynamic_temperature', 'temperature_last', diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 822b77b8..a8eaadfa 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -18,7 +18,7 @@ def create_ui(): mu = shared.args.multi_user shared.gradio['Chat input'] = gr.State() - shared.gradio['history'] = gr.JSON(visible=False) + shared.gradio['history'] = gr.State() with gr.Tab('Chat', id='Chat', elem_id='chat-tab'): with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']): @@ -195,7 +195,7 @@ def create_event_handlers(): shared.reload_inputs = gradio(reload_arr) # Morph HTML updates instead of updating everything - shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data.html)") + shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data)") shared.gradio['Generate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 733d0901..84f9fbfc 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -71,8 +71,6 @@ def create_ui(default_preset): shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.') shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.') shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.') - shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.') - with gr.Column(): with gr.Row(): with gr.Column(): diff --git a/user_data/settings-template.yaml b/user_data/settings-template.yaml index ce0f77e1..db481e84 100644 --- a/user_data/settings-template.yaml +++ b/user_data/settings-template.yaml @@ -18,7 +18,6 @@ max_new_tokens_min: 1 max_new_tokens_max: 4096 prompt_lookup_num_tokens: 0 max_tokens_second: 0 -max_updates_second: 12 auto_max_new_tokens: true ban_eos_token: false add_bos_token: true