Integrate with the API

This commit is contained in:
oobabooga 2025-05-28 09:14:26 -07:00
parent f92e1f44a0
commit 2e21b1f5e3

View file

@ -1,8 +1,11 @@
import base64
import copy import copy
import json import json
import time import time
from collections import deque from collections import deque
from datetime import datetime
import requests
import tiktoken import tiktoken
from pydantic import ValidationError from pydantic import ValidationError
@ -16,6 +19,7 @@ from modules.chat import (
load_character_memoized, load_character_memoized,
load_instruction_template_memoized load_instruction_template_memoized
) )
from modules.logging_colors import logger
from modules.presets import load_preset_memoized from modules.presets import load_preset_memoized
from modules.text_generation import decode, encode, generate_reply from modules.text_generation import decode, encode, generate_reply
@ -82,6 +86,67 @@ def process_parameters(body, is_legacy=False):
return generate_params return generate_params
def get_current_timestamp():
"""Returns the current time in 24-hour format"""
return datetime.now().strftime('%b %d, %Y %H:%M')
def process_image_url(url, image_id):
"""Process an image URL and return attachment data"""
try:
if url.startswith("data:"):
# Handle data URL (data:image/jpeg;base64,...)
if "base64," in url:
image_data = url.split("base64,", 1)[1]
else:
raise ValueError("Unsupported data URL format")
else:
# Handle regular URL - download image
response = requests.get(url, timeout=10)
response.raise_for_status()
image_data = base64.b64encode(response.content).decode('utf-8')
return {
"name": f"image_{image_id}",
"type": "image",
"image_data": image_data,
"image_id": image_id,
"file_path": f"api_image_{image_id}", # Add this for consistency with UI
}
except Exception as e:
logger.error(f"Error processing image URL {url}: {e}")
return None
def process_multimodal_content(content):
"""Process multimodal content and return text content and attachments"""
if isinstance(content, str):
return content, []
if isinstance(content, list):
text_content = ""
image_refs = ""
attachments = []
for item in content:
if item.get("type") == "text":
text_content += item.get("text", "")
elif item.get("type") == "image_url":
image_url = item.get("image_url", {}).get("url", "")
if image_url:
attachment = process_image_url(image_url, len(attachments) + 1)
if attachment:
attachments.append(attachment)
image_refs += f"[img-{attachment['image_id']}]"
else:
# Log warning but continue processing
logger.warning(f"Failed to process image URL: {image_url}")
return f"{image_refs}{text_content}", attachments
return str(content), []
def convert_history(history): def convert_history(history):
''' '''
Chat histories in this program are in the format [message, reply]. Chat histories in this program are in the format [message, reply].
@ -93,26 +158,46 @@ def convert_history(history):
user_input = "" user_input = ""
user_input_last = True user_input_last = True
system_message = "" system_message = ""
metadata = {}
# Keep track of attachments for the current message being built
pending_attachments = []
for entry in history: for entry in history:
content = entry["content"] content = entry["content"]
role = entry["role"] role = entry["role"]
if role == "user": if role == "user":
user_input = content # Process multimodal content
processed_content, attachments = process_multimodal_content(content)
user_input = processed_content
user_input_last = True user_input_last = True
if current_message: if current_message:
chat_dialogue.append([current_message, '', '']) chat_dialogue.append([current_message, '', ''])
current_message = "" current_message = ""
current_message = content current_message = processed_content
pending_attachments = attachments # Store attachments for when message is added
elif role == "assistant": elif role == "assistant":
if "tool_calls" in entry and isinstance(entry["tool_calls"], list) and len(entry["tool_calls"]) > 0 and content.strip() == "": if "tool_calls" in entry and isinstance(entry["tool_calls"], list) and len(entry["tool_calls"]) > 0 and content.strip() == "":
continue # skip tool calls continue # skip tool calls
current_reply = content current_reply = content
user_input_last = False user_input_last = False
if current_message: if current_message:
row_idx = len(chat_dialogue) # Calculate index here, right before adding
chat_dialogue.append([current_message, current_reply, '']) chat_dialogue.append([current_message, current_reply, ''])
# Add attachments to metadata if any
if pending_attachments:
user_key = f"user_{row_idx}"
metadata[user_key] = {
"timestamp": get_current_timestamp(),
"attachments": pending_attachments
}
pending_attachments = [] # Clear pending attachments
current_message = "" current_message = ""
current_reply = "" current_reply = ""
else: else:
@ -123,10 +208,19 @@ def convert_history(history):
elif role == "system": elif role == "system":
system_message += f"\n{content}" if system_message else content system_message += f"\n{content}" if system_message else content
# Handle case where there's a pending user message at the end
if current_message and pending_attachments:
row_idx = len(chat_dialogue) # This will be the index when the message is processed
user_key = f"user_{row_idx}"
metadata[user_key] = {
"timestamp": get_current_timestamp(),
"attachments": pending_attachments
}
if not user_input_last: if not user_input_last:
user_input = "" user_input = ""
return user_input, system_message, {'internal': chat_dialogue, 'visible': copy.deepcopy(chat_dialogue)} return user_input, system_message, {'internal': chat_dialogue, 'visible': copy.deepcopy(chat_dialogue), 'metadata': metadata}
def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, prompt_only=False) -> dict: def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, prompt_only=False) -> dict:
@ -150,9 +244,23 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
elif m['role'] == 'function': elif m['role'] == 'function':
raise InvalidRequestError(message="role: function is not supported.", param='messages') raise InvalidRequestError(message="role: function is not supported.", param='messages')
if 'content' not in m and "image_url" not in m: # Handle multimodal content validation
content = m.get('content')
if content is None:
raise InvalidRequestError(message="messages: missing content", param='messages') raise InvalidRequestError(message="messages: missing content", param='messages')
# Validate multimodal content structure
if isinstance(content, list):
for item in content:
if not isinstance(item, dict) or 'type' not in item:
raise InvalidRequestError(message="messages: invalid content item format", param='messages')
if item['type'] not in ['text', 'image_url']:
raise InvalidRequestError(message="messages: unsupported content type", param='messages')
if item['type'] == 'text' and 'text' not in item:
raise InvalidRequestError(message="messages: missing text in content item", param='messages')
if item['type'] == 'image_url' and ('image_url' not in item or 'url' not in item['image_url']):
raise InvalidRequestError(message="messages: missing image_url in content item", param='messages')
# Chat Completions # Chat Completions
object_type = 'chat.completion' if not stream else 'chat.completion.chunk' object_type = 'chat.completion' if not stream else 'chat.completion.chunk'
created_time = int(time.time()) created_time = int(time.time())
@ -189,6 +297,15 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
# History # History
user_input, custom_system_message, history = convert_history(messages) user_input, custom_system_message, history = convert_history(messages)
# Collect image attachments for multimodal support
image_attachments = []
if 'metadata' in history:
for key, value in history['metadata'].items():
if 'attachments' in value:
for attachment in value['attachments']:
if attachment.get('type') == 'image':
image_attachments.append(attachment)
generate_params.update({ generate_params.update({
'mode': body['mode'], 'mode': body['mode'],
'name1': name1, 'name1': name1,
@ -205,6 +322,10 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
'stream': stream 'stream': stream
}) })
# Add image attachments to state for llama.cpp multimodal support
if image_attachments:
generate_params['image_attachments'] = image_attachments
max_tokens = generate_params['max_new_tokens'] max_tokens = generate_params['max_new_tokens']
if max_tokens in [None, 0]: if max_tokens in [None, 0]:
generate_params['max_new_tokens'] = 512 generate_params['max_new_tokens'] = 512