mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-06-07 14:17:09 -04:00
Add support for .docx attachments
This commit is contained in:
parent
dc8ed6dbe7
commit
1d88456659
20 changed files with 69 additions and 1 deletions
|
@ -16,7 +16,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
|
||||||
- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory.
|
- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory.
|
||||||
- 100% offline and private, with zero telemetry, external resources, or remote update requests.
|
- 100% offline and private, with zero telemetry, external resources, or remote update requests.
|
||||||
- Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats.
|
- Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats.
|
||||||
- **File attachments**: Upload text files and PDF documents to talk about their contents.
|
- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
|
||||||
- **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation.
|
- **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation.
|
||||||
- Aesthetic UI with dark and light themes.
|
- Aesthetic UI with dark and light themes.
|
||||||
- `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters.
|
- `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters.
|
||||||
|
|
|
@ -500,6 +500,9 @@ def add_message_attachment(history, row_idx, file_path, is_user=True):
|
||||||
# Process PDF file
|
# Process PDF file
|
||||||
content = extract_pdf_text(path)
|
content = extract_pdf_text(path)
|
||||||
file_type = "application/pdf"
|
file_type = "application/pdf"
|
||||||
|
elif file_extension == '.docx':
|
||||||
|
content = extract_docx_text(path)
|
||||||
|
file_type = "application/docx"
|
||||||
else:
|
else:
|
||||||
# Default handling for text files
|
# Default handling for text files
|
||||||
with open(path, 'r', encoding='utf-8') as f:
|
with open(path, 'r', encoding='utf-8') as f:
|
||||||
|
@ -538,6 +541,53 @@ def extract_pdf_text(pdf_path):
|
||||||
return f"[Error extracting PDF text: {str(e)}]"
|
return f"[Error extracting PDF text: {str(e)}]"
|
||||||
|
|
||||||
|
|
||||||
|
def extract_docx_text(docx_path):
|
||||||
|
"""
|
||||||
|
Extract text from a .docx file, including headers,
|
||||||
|
body (paragraphs and tables), and footers.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import docx
|
||||||
|
|
||||||
|
doc = docx.Document(docx_path)
|
||||||
|
parts = []
|
||||||
|
|
||||||
|
# 1) Extract non-empty header paragraphs from each section
|
||||||
|
for section in doc.sections:
|
||||||
|
for para in section.header.paragraphs:
|
||||||
|
text = para.text.strip()
|
||||||
|
if text:
|
||||||
|
parts.append(text)
|
||||||
|
|
||||||
|
# 2) Extract body blocks (paragraphs and tables) in document order
|
||||||
|
parent_elm = doc.element.body
|
||||||
|
for child in parent_elm.iterchildren():
|
||||||
|
if isinstance(child, docx.oxml.text.paragraph.CT_P):
|
||||||
|
para = docx.text.paragraph.Paragraph(child, doc)
|
||||||
|
text = para.text.strip()
|
||||||
|
if text:
|
||||||
|
parts.append(text)
|
||||||
|
|
||||||
|
elif isinstance(child, docx.oxml.table.CT_Tbl):
|
||||||
|
table = docx.table.Table(child, doc)
|
||||||
|
for row in table.rows:
|
||||||
|
cells = [cell.text.strip() for cell in row.cells]
|
||||||
|
parts.append("\t".join(cells))
|
||||||
|
|
||||||
|
# 3) Extract non-empty footer paragraphs from each section
|
||||||
|
for section in doc.sections:
|
||||||
|
for para in section.footer.paragraphs:
|
||||||
|
text = para.text.strip()
|
||||||
|
if text:
|
||||||
|
parts.append(text)
|
||||||
|
|
||||||
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error extracting text from DOCX: {e}")
|
||||||
|
return f"[Error extracting DOCX text: {str(e)}]"
|
||||||
|
|
||||||
|
|
||||||
def generate_search_query(user_message, state):
|
def generate_search_query(user_message, state):
|
||||||
"""Generate a search query from user message using the LLM"""
|
"""Generate a search query from user message using the LLM"""
|
||||||
# Augment the user message with search instruction
|
# Augment the user message with search instruction
|
||||||
|
|
|
@ -16,6 +16,7 @@ Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -15,6 +15,7 @@ Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -15,6 +15,7 @@ Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -15,6 +15,7 @@ Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -15,6 +15,7 @@ Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -15,6 +15,7 @@ Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -15,6 +15,7 @@ Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -16,6 +16,7 @@ Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -15,6 +15,7 @@ Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
||||||
numpy==1.26.*
|
numpy==1.26.*
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
||||||
numpy==1.26.*
|
numpy==1.26.*
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
||||||
numpy==1.26.*
|
numpy==1.26.*
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
||||||
numpy==1.26.*
|
numpy==1.26.*
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
||||||
numpy==1.26.*
|
numpy==1.26.*
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
||||||
numpy==1.26.*
|
numpy==1.26.*
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
||||||
numpy==1.26.*
|
numpy==1.26.*
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
||||||
numpy==1.26.*
|
numpy==1.26.*
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
||||||
numpy==1.26.*
|
numpy==1.26.*
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
rich
|
rich
|
||||||
|
|
Loading…
Add table
Reference in a new issue