diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py
index 123f9471..5319e7af 100644
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@@ -8,6 +8,7 @@ import time
 
 import llama_cpp_binaries
 import requests
+import sseclient
 
 from modules import shared
 from modules.logging_colors import logger
@@ -138,42 +139,43 @@ class LlamaServer:
             pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(printable_payload)
             print()
 
-        # Make a direct request with streaming enabled
-        response = requests.post(url, json=payload, stream=True)
-        response.raise_for_status()  # Raise an exception for HTTP errors
+        # Configure headers for Server-Sent Events
+        headers = {
+            'Content-Type': 'application/json',
+            'Accept': 'text/event-stream'
+        }
+
+        response = requests.post(url, json=payload, stream=True, headers=headers)
+        response.raise_for_status()
+
+        # Initialize SSE client for proper event stream parsing
+        client = sseclient.SSEClient(response)
 
         full_text = ""
 
-        # Process the streaming response
-        for line in response.iter_lines():
+        for event in client.events():
             if shared.stop_everything:
                 break
 
-            if line:
-                try:
-                    # Check if the line starts with "data: " and remove it
-                    line_str = line.decode('utf-8')
-                    if line_str.startswith('data: '):
-                        line_str = line_str[6:]  # Remove the "data: " prefix
+            try:
+                # Handle stream termination marker
+                if event.data == '[DONE]':
+                    break
 
-                    # Parse the JSON data
-                    data = json.loads(line_str)
+                data = json.loads(event.data)
 
-                    # Extract the token content
-                    if 'content' in data:
-                        token_text = data['content']
-                        full_text += token_text
-                        yield full_text
+                if 'content' in data:
+                    token_text = data['content']
+                    full_text += token_text
+                    yield full_text
 
-                    # Check if generation is complete
-                    if data.get('stop', False):
-                        break
+                if data.get('stop', False):
+                    break
 
-                except json.JSONDecodeError as e:
-                    # Log the error and the problematic line
-                    print(f"JSON decode error: {e}")
-                    print(f"Problematic line: {line}")
-                    continue
+            except json.JSONDecodeError as e:
+                print(f"JSON decode error: {e}")
+                print(f"Problematic data: {event.data}")
+                continue
 
     def generate(self, prompt, state):
         output = ""
diff --git a/requirements.txt b/requirements.txt
index 607efda0..b6759806 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,6 +19,7 @@ requests
 rich
 safetensors==0.5.*
 scipy
+sseclient-py==1.8.0
 sentencepiece
 tensorboard
 transformers==4.50.*
diff --git a/requirements_amd.txt b/requirements_amd.txt
index b242d4ad..e156bc55 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -18,6 +18,7 @@ requests
 rich
 safetensors==0.5.*
 scipy
+sseclient-py==1.8.0
 sentencepiece
 tensorboard
 transformers==4.50.*
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index b6105209..6becd514 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -18,6 +18,7 @@ requests
 rich
 safetensors==0.5.*
 scipy
+sseclient-py==1.8.0
 sentencepiece
 tensorboard
 transformers==4.50.*
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index ce730f63..1223b4d3 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -18,6 +18,7 @@ requests
 rich
 safetensors==0.5.*
 scipy
+sseclient-py==1.8.0
 sentencepiece
 tensorboard
 transformers==4.50.*
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index a7be282d..f47f9991 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -18,6 +18,7 @@ requests
 rich
 safetensors==0.5.*
 scipy
+sseclient-py==1.8.0
 sentencepiece
 tensorboard
 transformers==4.50.*
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index 2437c2ae..007f9ef1 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -18,6 +18,7 @@ requests
 rich
 safetensors==0.5.*
 scipy
+sseclient-py==1.8.0
 sentencepiece
 tensorboard
 transformers==4.50.*
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index cbaa8e96..4219273d 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -18,6 +18,7 @@ requests
 rich
 safetensors==0.5.*
 scipy
+sseclient-py==1.8.0
 sentencepiece
 tensorboard
 transformers==4.50.*
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index cce27aa2..58e0e5a1 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -19,6 +19,7 @@ requests
 rich
 safetensors==0.5.*
 scipy
+sseclient-py==1.8.0
 sentencepiece
 tensorboard
 transformers==4.50.*
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index 3b61ca39..c210f6a0 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -18,6 +18,7 @@ requests
 rich
 safetensors==0.5.*
 scipy
+sseclient-py==1.8.0
 sentencepiece
 tensorboard
 transformers==4.50.*