mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-06-09 07:07:16 -04:00
Download fetched web search results in parallel
This commit is contained in:
parent
7080a02252
commit
75d6cfd14d
1 changed files with 34 additions and 10 deletions
|
@ -1,3 +1,5 @@
|
|||
import concurrent.futures
|
||||
from concurrent.futures import as_completed
|
||||
from datetime import datetime
|
||||
|
||||
import requests
|
||||
|
@ -5,7 +7,6 @@ from bs4 import BeautifulSoup
|
|||
from duckduckgo_search import DDGS
|
||||
|
||||
from modules.logging_colors import logger
|
||||
from modules.text_generation import generate_reply
|
||||
|
||||
|
||||
def get_current_timestamp():
|
||||
|
@ -40,27 +41,50 @@ def download_web_page(url, timeout=5):
|
|||
return f"[Error downloading content from {url}: {str(e)}]"
|
||||
|
||||
|
||||
def perform_web_search(query, num_pages=3):
|
||||
def perform_web_search(query, num_pages=3, max_workers=5):
|
||||
"""Perform web search and return results with content"""
|
||||
try:
|
||||
with DDGS() as ddgs:
|
||||
results = list(ddgs.text(query, max_results=num_pages))
|
||||
|
||||
search_results = []
|
||||
# Prepare download tasks
|
||||
download_tasks = []
|
||||
for i, result in enumerate(results):
|
||||
url = result.get('href', '')
|
||||
title = result.get('title', f'Search Result {i+1}')
|
||||
download_tasks.append((url, title, i))
|
||||
|
||||
# Download page content
|
||||
content = download_web_page(url)
|
||||
search_results = [None] * len(download_tasks) # Pre-allocate to maintain order
|
||||
|
||||
search_results.append({
|
||||
# Download pages in parallel
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
# Submit all download tasks
|
||||
future_to_task = {
|
||||
executor.submit(download_web_page, task[0]): task
|
||||
for task in download_tasks
|
||||
}
|
||||
|
||||
# Collect results as they complete
|
||||
for future in as_completed(future_to_task):
|
||||
url, title, index = future_to_task[future]
|
||||
try:
|
||||
content = future.result()
|
||||
search_results[index] = {
|
||||
'title': title,
|
||||
'url': url,
|
||||
'content': content
|
||||
})
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading {url}: {e}")
|
||||
# Include failed downloads with empty content
|
||||
search_results[index] = {
|
||||
'title': title,
|
||||
'url': url,
|
||||
'content': ''
|
||||
}
|
||||
|
||||
return search_results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error performing web search: {e}")
|
||||
return []
|
||||
|
|
Loading…
Add table
Reference in a new issue