mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-06-07 14:17:09 -04:00
Add support for .docx attachments
This commit is contained in:
parent
dc8ed6dbe7
commit
1d88456659
20 changed files with 69 additions and 1 deletions
|
@ -16,7 +16,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
|
|||
- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory.
|
||||
- 100% offline and private, with zero telemetry, external resources, or remote update requests.
|
||||
- Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats.
|
||||
- **File attachments**: Upload text files and PDF documents to talk about their contents.
|
||||
- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
|
||||
- **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation.
|
||||
- Aesthetic UI with dark and light themes.
|
||||
- `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters.
|
||||
|
|
|
@ -500,6 +500,9 @@ def add_message_attachment(history, row_idx, file_path, is_user=True):
|
|||
# Process PDF file
|
||||
content = extract_pdf_text(path)
|
||||
file_type = "application/pdf"
|
||||
elif file_extension == '.docx':
|
||||
content = extract_docx_text(path)
|
||||
file_type = "application/docx"
|
||||
else:
|
||||
# Default handling for text files
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
|
@ -538,6 +541,53 @@ def extract_pdf_text(pdf_path):
|
|||
return f"[Error extracting PDF text: {str(e)}]"
|
||||
|
||||
|
||||
def extract_docx_text(docx_path):
|
||||
"""
|
||||
Extract text from a .docx file, including headers,
|
||||
body (paragraphs and tables), and footers.
|
||||
"""
|
||||
try:
|
||||
import docx
|
||||
|
||||
doc = docx.Document(docx_path)
|
||||
parts = []
|
||||
|
||||
# 1) Extract non-empty header paragraphs from each section
|
||||
for section in doc.sections:
|
||||
for para in section.header.paragraphs:
|
||||
text = para.text.strip()
|
||||
if text:
|
||||
parts.append(text)
|
||||
|
||||
# 2) Extract body blocks (paragraphs and tables) in document order
|
||||
parent_elm = doc.element.body
|
||||
for child in parent_elm.iterchildren():
|
||||
if isinstance(child, docx.oxml.text.paragraph.CT_P):
|
||||
para = docx.text.paragraph.Paragraph(child, doc)
|
||||
text = para.text.strip()
|
||||
if text:
|
||||
parts.append(text)
|
||||
|
||||
elif isinstance(child, docx.oxml.table.CT_Tbl):
|
||||
table = docx.table.Table(child, doc)
|
||||
for row in table.rows:
|
||||
cells = [cell.text.strip() for cell in row.cells]
|
||||
parts.append("\t".join(cells))
|
||||
|
||||
# 3) Extract non-empty footer paragraphs from each section
|
||||
for section in doc.sections:
|
||||
for para in section.footer.paragraphs:
|
||||
text = para.text.strip()
|
||||
if text:
|
||||
parts.append(text)
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting text from DOCX: {e}")
|
||||
return f"[Error extracting DOCX text: {str(e)}]"
|
||||
|
||||
|
||||
def generate_search_query(user_message, state):
|
||||
"""Generate a search query from user message using the LLM"""
|
||||
# Augment the user message with search instruction
|
||||
|
|
|
@ -16,6 +16,7 @@ Pillow>=9.5.0
|
|||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -15,6 +15,7 @@ Pillow>=9.5.0
|
|||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -15,6 +15,7 @@ Pillow>=9.5.0
|
|||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -15,6 +15,7 @@ Pillow>=9.5.0
|
|||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -15,6 +15,7 @@ Pillow>=9.5.0
|
|||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -15,6 +15,7 @@ Pillow>=9.5.0
|
|||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -15,6 +15,7 @@ Pillow>=9.5.0
|
|||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -16,6 +16,7 @@ Pillow>=9.5.0
|
|||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -15,6 +15,7 @@ Pillow>=9.5.0
|
|||
psutil
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
|||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
|||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
|||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
|||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
|||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
|||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
|||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
|||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
|
@ -7,6 +7,7 @@ markdown
|
|||
numpy==1.26.*
|
||||
pydantic==2.8.2
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
|
|
Loading…
Add table
Reference in a new issue