mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-06-07 06:06:20 -04:00
Add web search support (#7023)
This commit is contained in:
parent
1b0e2d8750
commit
077bbc6b10
22 changed files with 181 additions and 2 deletions
|
@ -31,6 +31,7 @@ from modules.text_generation import (
|
|||
get_max_prompt_length
|
||||
)
|
||||
from modules.utils import delete_file, get_available_characters, save_file
|
||||
from modules.web_search import add_web_search_attachments
|
||||
|
||||
|
||||
def strftime_now(format):
|
||||
|
@ -566,6 +567,9 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
|
|||
for file_path in files:
|
||||
add_message_attachment(output, row_idx, file_path, is_user=True)
|
||||
|
||||
# Add web search results as attachments if enabled
|
||||
add_web_search_attachments(output, row_idx, text, state)
|
||||
|
||||
# Apply extensions
|
||||
text, visible_text = apply_extensions('chat_input', text, visible_text, state)
|
||||
text = apply_extensions('input', text, state, is_chat=True)
|
||||
|
|
|
@ -157,8 +157,6 @@ def list_model_elements():
|
|||
|
||||
def list_interface_input_elements():
|
||||
elements = [
|
||||
'navigate_message_index',
|
||||
'navigate_direction',
|
||||
'temperature',
|
||||
'dynatemp_low',
|
||||
'dynatemp_high',
|
||||
|
@ -218,6 +216,10 @@ def list_interface_input_elements():
|
|||
'edit_message_text',
|
||||
'edit_message_role',
|
||||
'branch_index',
|
||||
'enable_web_search',
|
||||
'web_search_pages',
|
||||
'navigate_message_index',
|
||||
'navigate_direction',
|
||||
]
|
||||
|
||||
# Chat elements
|
||||
|
|
|
@ -86,6 +86,12 @@ def create_ui():
|
|||
with gr.Row():
|
||||
shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
|
||||
|
||||
with gr.Row():
|
||||
shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search')
|
||||
|
||||
with gr.Row(visible=shared.settings.get('enable_web_search', False)) as shared.gradio['web_search_row']:
|
||||
shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label='Number of pages to download', minimum=1, maximum=10)
|
||||
|
||||
with gr.Row():
|
||||
shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
|
||||
|
||||
|
@ -369,3 +375,9 @@ def create_event_handlers():
|
|||
shared.gradio['count_tokens'].click(
|
||||
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
||||
chat.count_prompt_tokens, gradio('textbox', 'interface_state'), gradio('token_display'), show_progress=False)
|
||||
|
||||
shared.gradio['enable_web_search'].change(
|
||||
lambda x: gr.update(visible=x),
|
||||
gradio('enable_web_search'),
|
||||
gradio('web_search_row')
|
||||
)
|
||||
|
|
125
modules/web_search.py
Normal file
125
modules/web_search.py
Normal file
|
@ -0,0 +1,125 @@
|
|||
from datetime import datetime
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from duckduckgo_search import DDGS
|
||||
|
||||
from modules.logging_colors import logger
|
||||
from modules.text_generation import generate_reply
|
||||
|
||||
|
||||
def get_current_timestamp():
|
||||
"""Returns the current time in 24-hour format"""
|
||||
return datetime.now().strftime('%b %d, %Y %H:%M')
|
||||
|
||||
|
||||
def generate_search_query(user_message, state):
|
||||
"""Generate a search query from user message using the LLM"""
|
||||
search_prompt = f"{user_message}\n\n=====\n\nPlease turn the message above into a short web search query in the same language as the message. Respond with only the search query, nothing else."
|
||||
|
||||
# Use a minimal state for search query generation
|
||||
search_state = state.copy()
|
||||
search_state['max_new_tokens'] = 64
|
||||
search_state['temperature'] = 0.1
|
||||
|
||||
query = ""
|
||||
for reply in generate_reply(search_prompt, search_state, stopping_strings=[], is_chat=False):
|
||||
query = reply.strip()
|
||||
|
||||
return query
|
||||
|
||||
|
||||
def download_web_page(url, timeout=10):
|
||||
"""Download and extract text from a web page"""
|
||||
try:
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
}
|
||||
response = requests.get(url, headers=headers, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
|
||||
# Remove script and style elements
|
||||
for script in soup(["script", "style"]):
|
||||
script.decompose()
|
||||
|
||||
# Get text and clean it up
|
||||
text = soup.get_text()
|
||||
lines = (line.strip() for line in text.splitlines())
|
||||
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
||||
text = ' '.join(chunk for chunk in chunks if chunk)
|
||||
|
||||
return text
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading {url}: {e}")
|
||||
return f"[Error downloading content from {url}: {str(e)}]"
|
||||
|
||||
|
||||
def perform_web_search(query, num_pages=3):
|
||||
"""Perform web search and return results with content"""
|
||||
try:
|
||||
with DDGS() as ddgs:
|
||||
results = list(ddgs.text(query, max_results=num_pages))
|
||||
|
||||
search_results = []
|
||||
for i, result in enumerate(results):
|
||||
url = result.get('href', '')
|
||||
title = result.get('title', f'Search Result {i+1}')
|
||||
|
||||
# Download page content
|
||||
content = download_web_page(url)
|
||||
|
||||
search_results.append({
|
||||
'title': title,
|
||||
'url': url,
|
||||
'content': content
|
||||
})
|
||||
|
||||
return search_results
|
||||
except Exception as e:
|
||||
logger.error(f"Error performing web search: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def add_web_search_attachments(history, row_idx, user_message, state):
|
||||
"""Perform web search and add results as attachments"""
|
||||
if not state.get('enable_web_search', False):
|
||||
return
|
||||
|
||||
try:
|
||||
# Generate search query
|
||||
search_query = generate_search_query(user_message, state)
|
||||
if not search_query:
|
||||
logger.warning("Failed to generate search query")
|
||||
return
|
||||
|
||||
logger.info(f"Generated search query: {search_query}")
|
||||
|
||||
# Perform web search
|
||||
num_pages = int(state.get('web_search_pages', 3))
|
||||
search_results = perform_web_search(search_query, num_pages)
|
||||
|
||||
if not search_results:
|
||||
logger.warning("No search results found")
|
||||
return
|
||||
|
||||
# Add search results as attachments
|
||||
key = f"user_{row_idx}"
|
||||
if key not in history['metadata']:
|
||||
history['metadata'][key] = {"timestamp": get_current_timestamp()}
|
||||
if "attachments" not in history['metadata'][key]:
|
||||
history['metadata'][key]["attachments"] = []
|
||||
|
||||
for result in search_results:
|
||||
attachment = {
|
||||
"name": f"{result['title']}",
|
||||
"type": "text/html",
|
||||
"content": f"URL: {result['url']}\n\n{result['content']}"
|
||||
}
|
||||
history['metadata'][key]["attachments"].append(attachment)
|
||||
|
||||
logger.info(f"Added {len(search_results)} web search results as attachments")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in web search: {e}")
|
|
@ -1,7 +1,9 @@
|
|||
accelerate==1.5.*
|
||||
beautifulsoup4==4.13.4
|
||||
bitsandbytes==0.45.*
|
||||
colorama
|
||||
datasets
|
||||
duckduckgo_search==8.0.2
|
||||
einops
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
accelerate==1.5.*
|
||||
beautifulsoup4==4.13.4
|
||||
colorama
|
||||
datasets
|
||||
duckduckgo_search==8.0.2
|
||||
einops
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
accelerate==1.5.*
|
||||
beautifulsoup4==4.13.4
|
||||
colorama
|
||||
datasets
|
||||
duckduckgo_search==8.0.2
|
||||
einops
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
accelerate==1.5.*
|
||||
beautifulsoup4==4.13.4
|
||||
colorama
|
||||
datasets
|
||||
duckduckgo_search==8.0.2
|
||||
einops
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
accelerate==1.5.*
|
||||
beautifulsoup4==4.13.4
|
||||
colorama
|
||||
datasets
|
||||
duckduckgo_search==8.0.2
|
||||
einops
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
accelerate==1.5.*
|
||||
beautifulsoup4==4.13.4
|
||||
colorama
|
||||
datasets
|
||||
duckduckgo_search==8.0.2
|
||||
einops
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
accelerate==1.5.*
|
||||
beautifulsoup4==4.13.4
|
||||
colorama
|
||||
datasets
|
||||
duckduckgo_search==8.0.2
|
||||
einops
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
accelerate==1.5.*
|
||||
beautifulsoup4==4.13.4
|
||||
bitsandbytes==0.45.*
|
||||
colorama
|
||||
datasets
|
||||
duckduckgo_search==8.0.2
|
||||
einops
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
accelerate==1.5.*
|
||||
beautifulsoup4==4.13.4
|
||||
colorama
|
||||
datasets
|
||||
duckduckgo_search==8.0.2
|
||||
einops
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
beautifulsoup4==4.13.4
|
||||
duckduckgo_search==8.0.2
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
jinja2==3.1.6
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
beautifulsoup4==4.13.4
|
||||
duckduckgo_search==8.0.2
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
jinja2==3.1.6
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
beautifulsoup4==4.13.4
|
||||
duckduckgo_search==8.0.2
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
jinja2==3.1.6
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
beautifulsoup4==4.13.4
|
||||
duckduckgo_search==8.0.2
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
jinja2==3.1.6
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
beautifulsoup4==4.13.4
|
||||
duckduckgo_search==8.0.2
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
jinja2==3.1.6
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
beautifulsoup4==4.13.4
|
||||
duckduckgo_search==8.0.2
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
jinja2==3.1.6
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
beautifulsoup4==4.13.4
|
||||
duckduckgo_search==8.0.2
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
jinja2==3.1.6
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
beautifulsoup4==4.13.4
|
||||
duckduckgo_search==8.0.2
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
jinja2==3.1.6
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
beautifulsoup4==4.13.4
|
||||
duckduckgo_search==8.0.2
|
||||
fastapi==0.112.4
|
||||
gradio==4.37.*
|
||||
jinja2==3.1.6
|
||||
|
|
Loading…
Add table
Reference in a new issue