API: Fix a regression

2025-06-07 14:17:09 -04:00 · 2025-05-16 13:02:27 -07:00 · 2025-05-16 13:02:27 -07:00 · e4d3f4449d
commit e4d3f4449d
parent 470c822f44
1 changed files with 5 additions and 2 deletions
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@ -146,8 +146,9 @@ class LlamaServer:
            pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(printable_payload)
            print()
-        # Make a direct request with streaming enabled using a context manager
+        # Make the generation request
-        with self.session.post(url, json=payload, stream=True) as response:
+        response = self.session.post(url, json=payload, stream=True)
        try:
            response.raise_for_status()  # Raise an exception for HTTP errors
            full_text = ""
@ -184,6 +185,8 @@ class LlamaServer:
                    print(f"JSON decode error: {e}")
                    print(f"Problematic line: {line}")
                    continue
        finally:
            response.close()
    def generate(self, prompt, state):
        output = ""