Simplify things

This commit is contained in:
oobabooga 2025-05-28 12:14:51 -07:00
parent d702a2a962
commit 1f3b1a1b94

View file

@ -3,7 +3,6 @@ import copy
import json import json
import time import time
from collections import deque from collections import deque
from datetime import datetime
import requests import requests
import tiktoken import tiktoken
@ -86,49 +85,34 @@ def process_parameters(body, is_legacy=False):
return generate_params return generate_params
def get_current_timestamp():
"""Returns the current time in 24-hour format"""
return datetime.now().strftime('%b %d, %Y %H:%M')
def process_image_url(url, image_id): def process_image_url(url, image_id):
"""Process an image URL and return attachment data""" """Process an image URL and return attachment data for llama.cpp"""
try: try:
if url.startswith("data:"): if url.startswith("data:"):
# Handle data URL (data:image/jpeg;base64,...)
if "base64," in url: if "base64," in url:
image_data = url.split("base64,", 1)[1] image_data = url.split("base64,", 1)[1]
else: else:
raise ValueError("Unsupported data URL format") raise ValueError("Unsupported data URL format")
else: else:
# Handle regular URL - download image headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
response = requests.get(url, timeout=10, headers=headers) response = requests.get(url, timeout=10, headers=headers)
response.raise_for_status() response.raise_for_status()
image_data = base64.b64encode(response.content).decode('utf-8') image_data = base64.b64encode(response.content).decode('utf-8')
return { return {"image_data": image_data, "image_id": image_id}
"name": f"image_{image_id}",
"type": "image",
"image_data": image_data,
"image_id": image_id,
"file_path": f"api_image_{image_id}",
}
except Exception as e: except Exception as e:
logger.error(f"Error processing image URL {url}: {e}") logger.error(f"Error processing image URL {url}: {e}")
return None return None
def process_multimodal_content(content): def process_multimodal_content(content):
"""Process multimodal content and return text content and attachments""" """Extract text and images from OpenAI multimodal format"""
if isinstance(content, str): if isinstance(content, str):
return content, [] return content, []
if isinstance(content, list): if isinstance(content, list):
text_content = "" text_content = ""
attachments = [] images = []
for item in content: for item in content:
if item.get("type") == "text": if item.get("type") == "text":
@ -136,14 +120,11 @@ def process_multimodal_content(content):
elif item.get("type") == "image_url": elif item.get("type") == "image_url":
image_url = item.get("image_url", {}).get("url", "") image_url = item.get("image_url", {}).get("url", "")
if image_url: if image_url:
attachment = process_image_url(image_url, len(attachments) + 1) image = process_image_url(image_url, len(images) + 1)
if attachment: if image:
attachments.append(attachment) images.append(image)
else:
# Log warning but continue processing
logger.warning(f"Failed to process image URL: {image_url}")
return text_content, attachments return text_content, images
return str(content), [] return str(content), []
@ -159,10 +140,7 @@ def convert_history(history):
user_input = "" user_input = ""
user_input_last = True user_input_last = True
system_message = "" system_message = ""
metadata = {} all_images = [] # Simple list to collect all images
# Keep track of attachments for the current message being built
pending_attachments = []
for entry in history: for entry in history:
content = entry["content"] content = entry["content"]
@ -170,16 +148,20 @@ def convert_history(history):
if role == "user": if role == "user":
# Process multimodal content # Process multimodal content
processed_content, attachments = process_multimodal_content(content) processed_content, images = process_multimodal_content(content)
if images:
image_refs = "".join(f"[img-{img['image_id']}]" for img in images)
processed_content = f"{processed_content} {image_refs}"
user_input = processed_content user_input = processed_content
user_input_last = True user_input_last = True
all_images.extend(images) # Add any images to our collection
if current_message: if current_message:
chat_dialogue.append([current_message, '', '']) chat_dialogue.append([current_message, '', ''])
current_message = "" current_message = ""
current_message = processed_content current_message = processed_content
pending_attachments = attachments # Store attachments for when message is added
elif role == "assistant": elif role == "assistant":
if "tool_calls" in entry and isinstance(entry["tool_calls"], list) and len(entry["tool_calls"]) > 0 and content.strip() == "": if "tool_calls" in entry and isinstance(entry["tool_calls"], list) and len(entry["tool_calls"]) > 0 and content.strip() == "":
@ -187,18 +169,7 @@ def convert_history(history):
current_reply = content current_reply = content
user_input_last = False user_input_last = False
if current_message: if current_message:
row_idx = len(chat_dialogue) # Calculate index here, right before adding
chat_dialogue.append([current_message, current_reply, '']) chat_dialogue.append([current_message, current_reply, ''])
# Add attachments to metadata if any
if pending_attachments:
user_key = f"user_{row_idx}"
metadata[user_key] = {
"timestamp": get_current_timestamp(),
"attachments": pending_attachments
}
pending_attachments = [] # Clear pending attachments
current_message = "" current_message = ""
current_reply = "" current_reply = ""
else: else:
@ -209,19 +180,14 @@ def convert_history(history):
elif role == "system": elif role == "system":
system_message += f"\n{content}" if system_message else content system_message += f"\n{content}" if system_message else content
# Handle case where there's a pending user message at the end
if current_message and pending_attachments:
row_idx = len(chat_dialogue) # This will be the index when the message is processed
user_key = f"user_{row_idx}"
metadata[user_key] = {
"timestamp": get_current_timestamp(),
"attachments": pending_attachments
}
if not user_input_last: if not user_input_last:
user_input = "" user_input = ""
return user_input, system_message, {'internal': chat_dialogue, 'visible': copy.deepcopy(chat_dialogue), 'metadata': metadata} return user_input, system_message, {
'internal': chat_dialogue,
'visible': copy.deepcopy(chat_dialogue),
'images': all_images # Simple list of all images from the conversation
}
def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, prompt_only=False) -> dict: def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, prompt_only=False) -> dict:
@ -298,15 +264,6 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
# History # History
user_input, custom_system_message, history = convert_history(messages) user_input, custom_system_message, history = convert_history(messages)
# Collect image attachments for multimodal support
image_attachments = []
if 'metadata' in history:
for key, value in history['metadata'].items():
if 'attachments' in value:
for attachment in value['attachments']:
if attachment.get('type') == 'image':
image_attachments.append(attachment)
generate_params.update({ generate_params.update({
'mode': body['mode'], 'mode': body['mode'],
'name1': name1, 'name1': name1,
@ -323,9 +280,9 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
'stream': stream 'stream': stream
}) })
# Add image attachments to state for llama.cpp multimodal support # Add images to state for llama.cpp multimodal support
if image_attachments: if history.get('images'):
generate_params['image_attachments'] = image_attachments generate_params['image_attachments'] = history['images']
max_tokens = generate_params['max_new_tokens'] max_tokens = generate_params['max_new_tokens']
if max_tokens in [None, 0]: if max_tokens in [None, 0]: