import datetime import functools import html import os import re import time from pathlib import Path import markdown from PIL import Image, ImageOps from modules import shared from modules.sane_markdown_lists import SaneListExtension from modules.utils import get_available_chat_styles # This is to store the paths to the thumbnails of the profile pictures image_cache = {} def minify_css(css: str) -> str: # Step 1: Remove comments css = re.sub(r'/\*.*?\*/', '', css, flags=re.DOTALL) # Step 2: Remove leading and trailing whitespace css = re.sub(r'^[ \t]*|[ \t]*$', '', css, flags=re.MULTILINE) # Step 3: Remove spaces after specific characters ({ : ; ,}) css = re.sub(r'([:{;,])\s+', r'\1', css) # Step 4: Remove spaces before `{` css = re.sub(r'\s+{', '{', css) # Step 5: Remove empty lines css = re.sub(r'^\s*$', '', css, flags=re.MULTILINE) # Step 6: Collapse all lines into one css = re.sub(r'\n', '', css) return css with open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r') as f: readable_css = f.read() with open(Path(__file__).resolve().parent / '../css/html_instruct_style.css', 'r') as f: instruct_css = f.read() # Custom chat styles chat_styles = {} for k in get_available_chat_styles(): chat_styles[k] = open(Path(f'css/chat_style-{k}.css'), 'r').read() # Handle styles that derive from other styles for k in chat_styles: lines = chat_styles[k].split('\n') input_string = lines[0] match = re.search(r'chat_style-([a-z\-]*)\.css', input_string) if match: style = match.group(1) chat_styles[k] = chat_styles.get(style, '') + '\n\n' + '\n'.join(lines[1:]) # Reduce the size of the CSS sources above readable_css = minify_css(readable_css) instruct_css = minify_css(instruct_css) for k in chat_styles: chat_styles[k] = minify_css(chat_styles[k]) def fix_newlines(string): string = string.replace('\n', '\n\n') string = re.sub(r"\n{3,}", "\n\n", string) string = string.strip() return string def replace_quotes(text): # Define a list of quote pairs (opening and closing), using HTML entities quote_pairs = [ ('"', '"'), # Double quotes ('“', '”'), # Unicode left and right double quotation marks ('‘', '’'), # Unicode left and right single quotation marks ('«', '»'), # French quotes ('„', '“'), # German quotes ('‘', '’'), # Alternative single quotes ('“', '”'), # Unicode quotes (numeric entities) ('“', '”'), # Unicode quotes (hex entities) ('\u201C', '\u201D'), # Unicode quotes (literal chars) ] # Create a regex pattern that matches any of the quote pairs, including newlines pattern = '|'.join(f'({re.escape(open_q)})(.*?)({re.escape(close_q)})' for open_q, close_q in quote_pairs) # Replace matched patterns with tags, keeping original quotes def replacer(m): # Find the first non-None group set for i in range(1, len(m.groups()), 3): # Step through each sub-pattern's groups if m.group(i): # If this sub-pattern matched return f'{m.group(i)}{m.group(i + 1)}{m.group(i + 2)}' return m.group(0) # Fallback (shouldn't happen) replaced_text = re.sub(pattern, replacer, text, flags=re.DOTALL) return replaced_text def replace_blockquote(m): return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '') def extract_thinking_block(string): """Extract thinking blocks from the beginning of a string.""" if not string: return None, string THINK_START_TAG = "<think>" THINK_END_TAG = "</think>" # Look for opening tag start_pos = string.lstrip().find(THINK_START_TAG) if start_pos == -1: return None, string # Adjust start position to account for any leading whitespace start_pos = string.find(THINK_START_TAG) # Find the content after the opening tag content_start = start_pos + len(THINK_START_TAG) # Look for closing tag end_pos = string.find(THINK_END_TAG, content_start) if end_pos != -1: # Both tags found - extract content between them thinking_content = string[content_start:end_pos] remaining_content = string[end_pos + len(THINK_END_TAG):] return thinking_content, remaining_content else: # Only opening tag found - everything else is thinking content thinking_content = string[content_start:] return thinking_content, "" @functools.lru_cache(maxsize=None) def convert_to_markdown(string, message_id=None): if not string: return "" # Use a default message ID if none provided if message_id is None: message_id = "unknown" # Extract thinking block if present thinking_content, remaining_content = extract_thinking_block(string) # Process the main content html_output = process_markdown_content(remaining_content) # If thinking content was found, process it using the same function if thinking_content is not None: thinking_html = process_markdown_content(thinking_content) # Generate unique ID for the thinking block block_id = f"thinking-{message_id}-0" # Check if thinking is complete or still in progress is_streaming = not remaining_content title_text = "Thinking..." if is_streaming else "Thought" thinking_block = f'''
{info_svg_small} {title_text}
{thinking_html}
''' # Prepend the thinking block to the message HTML html_output = thinking_block + html_output return html_output def process_markdown_content(string): """Process a string through the markdown conversion pipeline.""" if not string: return "" # Make \[ \] LaTeX equations inline pattern = r'^\s*\\\[\s*\n([\s\S]*?)\n\s*\\\]\s*$' replacement = r'\\[ \1 \\]' string = re.sub(pattern, replacement, string, flags=re.MULTILINE) # Escape backslashes string = string.replace('\\', '\\\\') # Quote to string = replace_quotes(string) # Blockquote string = re.sub(r'(^|[\n])>', r'\1>', string) pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL) string = pattern.sub(replace_blockquote, string) # Code string = string.replace('\\begin{code}', '```') string = string.replace('\\end{code}', '```') string = string.replace('\\begin{align*}', '$$') string = string.replace('\\end{align*}', '$$') string = string.replace('\\begin{align}', '$$') string = string.replace('\\end{align}', '$$') string = string.replace('\\begin{equation}', '$$') string = string.replace('\\end{equation}', '$$') string = string.replace('\\begin{equation*}', '$$') string = string.replace('\\end{equation*}', '$$') string = re.sub(r"(.)```", r"\1\n```", string) result = '' is_code = False is_latex = False for line in string.split('\n'): stripped_line = line.strip() if stripped_line.startswith('```'): is_code = not is_code elif stripped_line.startswith('$$'): is_latex = not is_latex elif stripped_line.endswith('$$'): is_latex = False elif stripped_line.startswith('\\\\['): is_latex = True elif stripped_line.startswith('\\\\]'): is_latex = False elif stripped_line.endswith('\\\\]'): is_latex = False result += line # Don't add an extra \n for code, LaTeX, or tables if is_code or is_latex or line.startswith('|'): result += '\n' # Also don't add an extra \n for lists elif stripped_line.startswith('-') or stripped_line.startswith('*') or stripped_line.startswith('+') or stripped_line.startswith('>') or re.match(r'\d+\.', stripped_line): result += ' \n' else: result += ' \n' result = result.strip() if is_code: result += '\n```' # Unfinished code block # Unfinished list, like "\n1.". A |delete| string is added and then # removed to force a
    or