Fix API issues

This commit is contained in:
oobabooga 2025-05-18 12:45:01 -07:00
parent 126b3a768f
commit 83bfd5c64b

View file

@ -114,8 +114,8 @@ async def openai_completions(request: Request, request_data: CompletionRequest):
if request_data.stream: if request_data.stream:
async def generator(): async def generator():
try: async with streaming_semaphore:
async with streaming_semaphore: try:
response = OAIcompletions.stream_completions(to_dict(request_data), is_legacy=is_legacy) response = OAIcompletions.stream_completions(to_dict(request_data), is_legacy=is_legacy)
async for resp in iterate_in_threadpool(response): async for resp in iterate_in_threadpool(response):
disconnected = await request.is_disconnected() disconnected = await request.is_disconnected()
@ -123,8 +123,9 @@ async def openai_completions(request: Request, request_data: CompletionRequest):
break break
yield {"data": json.dumps(resp)} yield {"data": json.dumps(resp)}
finally: finally:
return stop_everything_event()
return
return EventSourceResponse(generator()) # SSE streaming return EventSourceResponse(generator()) # SSE streaming
@ -145,8 +146,8 @@ async def openai_chat_completions(request: Request, request_data: ChatCompletion
if request_data.stream: if request_data.stream:
async def generator(): async def generator():
try: async with streaming_semaphore:
async with streaming_semaphore: try:
response = OAIcompletions.stream_chat_completions(to_dict(request_data), is_legacy=is_legacy) response = OAIcompletions.stream_chat_completions(to_dict(request_data), is_legacy=is_legacy)
async for resp in iterate_in_threadpool(response): async for resp in iterate_in_threadpool(response):
disconnected = await request.is_disconnected() disconnected = await request.is_disconnected()
@ -154,8 +155,9 @@ async def openai_chat_completions(request: Request, request_data: ChatCompletion
break break
yield {"data": json.dumps(resp)} yield {"data": json.dumps(resp)}
finally: finally:
return stop_everything_event()
return
return EventSourceResponse(generator()) # SSE streaming return EventSourceResponse(generator()) # SSE streaming