handle the wysiwyg editor better

This commit is contained in:
Ryan Voots 2024-12-19 11:54:58 -05:00
parent 11ad767336
commit 264e3e48c8

View file

@ -20,19 +20,30 @@ def fetch_page_content(page, url):
page.goto(url)
time.sleep(random.uniform(1, 3)) # Random delay to mimic human behavior
# Wait for the edit link to be visible
edit_link = page.query_selector('a[href*="?action=edit"]')
if not edit_link:
print(f"Edit link not found for {url}")
# Wait for the edit links to be visible
edit_links = page.query_selector_all('a[href*="?action=edit"]')
if not edit_links:
print(f"No edit links found for {url}")
return None
edit_href = edit_link.get_attribute('href')
if not edit_href:
print(f"Could not get edit URL for {url}")
return None
edit_source_link = None
edit_wysiwyg_link = None
# Construct the full edit URL
edit_url = urljoin(BASE_URL, edit_href)
for link in edit_links:
href = link.get_attribute('href')
if href and '?action=edit&mode=source' in href:
edit_source_link = urljoin(BASE_URL, href)
elif href and '?action=edit' in href:
edit_wysiwyg_link = urljoin(BASE_URL, href)
# Prioritize the edit source link
if edit_source_link:
edit_url = edit_source_link
elif edit_wysiwyg_link:
edit_url = edit_wysiwyg_link
else:
print(f"Could not find valid edit URL for {url}")
return None
page.goto(edit_url)
time.sleep(random.uniform(1, 3)) # Random delay to mimic human behavior