From 1d88456659d8e71800f6fb732b8cad7d36fa4c20 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 31 May 2025 20:15:07 -0700 Subject: [PATCH] Add support for .docx attachments --- README.md | 2 +- modules/chat.py | 50 +++++++++++++++++++ requirements/full/requirements.txt | 1 + requirements/full/requirements_amd.txt | 1 + requirements/full/requirements_amd_noavx2.txt | 1 + .../full/requirements_apple_intel.txt | 1 + .../full/requirements_apple_silicon.txt | 1 + requirements/full/requirements_cpu_only.txt | 1 + .../full/requirements_cpu_only_noavx2.txt | 1 + requirements/full/requirements_noavx2.txt | 1 + requirements/full/requirements_nowheels.txt | 1 + requirements/portable/requirements.txt | 1 + .../portable/requirements_apple_intel.txt | 1 + .../portable/requirements_apple_silicon.txt | 1 + .../portable/requirements_cpu_only.txt | 1 + .../portable/requirements_cpu_only_noavx2.txt | 1 + requirements/portable/requirements_noavx2.txt | 1 + .../portable/requirements_nowheels.txt | 1 + requirements/portable/requirements_vulkan.txt | 1 + .../portable/requirements_vulkan_noavx2.txt | 1 + 20 files changed, 69 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 55df33d2..16b02539 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. - Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory. - 100% offline and private, with zero telemetry, external resources, or remote update requests. - Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats. -- **File attachments**: Upload text files and PDF documents to talk about their contents. +- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents. - **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation. - Aesthetic UI with dark and light themes. - `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters. diff --git a/modules/chat.py b/modules/chat.py index 881f7330..ba61c7a9 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -500,6 +500,9 @@ def add_message_attachment(history, row_idx, file_path, is_user=True): # Process PDF file content = extract_pdf_text(path) file_type = "application/pdf" + elif file_extension == '.docx': + content = extract_docx_text(path) + file_type = "application/docx" else: # Default handling for text files with open(path, 'r', encoding='utf-8') as f: @@ -538,6 +541,53 @@ def extract_pdf_text(pdf_path): return f"[Error extracting PDF text: {str(e)}]" +def extract_docx_text(docx_path): + """ + Extract text from a .docx file, including headers, + body (paragraphs and tables), and footers. + """ + try: + import docx + + doc = docx.Document(docx_path) + parts = [] + + # 1) Extract non-empty header paragraphs from each section + for section in doc.sections: + for para in section.header.paragraphs: + text = para.text.strip() + if text: + parts.append(text) + + # 2) Extract body blocks (paragraphs and tables) in document order + parent_elm = doc.element.body + for child in parent_elm.iterchildren(): + if isinstance(child, docx.oxml.text.paragraph.CT_P): + para = docx.text.paragraph.Paragraph(child, doc) + text = para.text.strip() + if text: + parts.append(text) + + elif isinstance(child, docx.oxml.table.CT_Tbl): + table = docx.table.Table(child, doc) + for row in table.rows: + cells = [cell.text.strip() for cell in row.cells] + parts.append("\t".join(cells)) + + # 3) Extract non-empty footer paragraphs from each section + for section in doc.sections: + for para in section.footer.paragraphs: + text = para.text.strip() + if text: + parts.append(text) + + return "\n".join(parts) + + except Exception as e: + logger.error(f"Error extracting text from DOCX: {e}") + return f"[Error extracting DOCX text: {str(e)}]" + + def generate_search_query(user_message, state): """Generate a search query from user message using the LLM""" # Augment the user message with search instruction diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index ec055876..e61677a6 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -16,6 +16,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index acdbd455..f807199d 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -15,6 +15,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index a478d7d3..4fb70eb1 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -15,6 +15,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 96a48f32..a311ab9b 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -15,6 +15,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 14b74081..30e8409a 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -15,6 +15,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 0877d968..70949949 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -15,6 +15,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index cab78237..318bb93a 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -15,6 +15,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index de507308..e0cb84b4 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -16,6 +16,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index 5d9f84ce..a412367c 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -15,6 +15,7 @@ Pillow>=9.5.0 psutil pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index fdae681d..bde310e1 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index a58f39f7..521edc0c 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index 91ea3a6d..ef7946ff 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index 37e5aa40..a3ad743e 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index dcb2884b..eec052d3 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index 8f1295bb..c9898a05 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt index 21805fe2..f6c866cf 100644 --- a/requirements/portable/requirements_nowheels.txt +++ b/requirements/portable/requirements_nowheels.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 858b4488..0de9c7cb 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index 569bae99..2bfb4d51 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -7,6 +7,7 @@ markdown numpy==1.26.* pydantic==2.8.2 PyPDF2==3.0.1 +python-docx==1.1.2 pyyaml requests rich