diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index 3fc7a0cc..d695c74e 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -146,8 +146,9 @@ class LlamaServer: pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(printable_payload) print() - # Make a direct request with streaming enabled using a context manager - with self.session.post(url, json=payload, stream=True) as response: + # Make the generation request + response = self.session.post(url, json=payload, stream=True) + try: response.raise_for_status() # Raise an exception for HTTP errors full_text = "" @@ -184,6 +185,8 @@ class LlamaServer: print(f"JSON decode error: {e}") print(f"Problematic line: {line}") continue + finally: + response.close() def generate(self, prompt, state): output = ""