When parsing a multi-page site, it gives an error: StaleElementReferenceException, how to be?

from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait from selenium.common.exceptions import StaleElementReferenceException import time print("ENTER MAIL: ") mail = input() print("ENTER PASSWORD: ") pas = input() driver= webdriver.Firefox() driver.get('https://www.udemy.com') print("GOT URL\n") time.sleep(5) driver.find_element_by_xpath("//button[@data-purpose='header-login']").click() print("OPEN LOGIN FORM\n") time.sleep(5) webdriver.ActionChains(driver).move_by_offset(570, 295).click().send_keys(mail).perform() time.sleep(5) print("PRINT MAIL\n") webdriver.ActionChains(driver).move_by_offset(100, 65).click().send_keys(pas).perform() time.sleep(5) print("PRINT PASSWORD\n") webdriver.ActionChains(driver).move_by_offset(0, 60).click().perform() time.sleep(5) print("AUTORIZATION\n") driver.find_element_by_xpath("//a[@data-purpose='my-courses']").click() time.sleep(6) print("GO TO URL\n") #strongs = driver.find_elements_by_xpath("//strong[@class = 'details__name']") f = open('udemy_titles','w', encoding='utf8') f.write("USERNAME: ") f.write(mail + "\n") f.write("PASSWORD: ") f.write(pas + "\n") f.write("TOTAL COURSES: ") try: div = driver.find_element_by_xpath("//div[@class='pager-label']").text div_1 = div.split(" ")[-2] f.write(div_1 + "\n") hrf = driver.find_element_by_xpath("//ul[@class='pagination pagination-expanded']/li[7]").text y = 0 while int(hrf) > y: # strongs = driver.find_elements_by_xpath("//strong[@class = 'details__name']") # time.sleep(1) driver.implicitly_wait(10) strongs = driver.find_elements_by_xpath("//strong[@class = 'details__name']") for strong in strongs: print(strong.text) y += 1 f.write("COURSE " + "#") f.write(str(y) + ": ") f.write(strong.text + "\n") driver.find_element_by_xpath("//span[@class = 'pagination-next udi udi-next']").click() time.sleep(5) #driver.get('https://www.udemy.com/home/my-courses/learning/?p=8') except: print("ONE PAGE\n") selem = 0 for strong in strongs: print(strong.text) selem += 1 f.write("COURSE " + "#") f.write(str(selem) + ": ") f.write(strong.text + "\n") f.close() driver.close() time.sleep(2) 

Here is the site itself

The script works up to 8 pages, further knocks out an error, there are no any more ideas.

  • First, do not use time.sleep, use webdriverwait - danilshik instead
  • I can't check your code without login and password - danilshik pm
  • macparag@hotmail.com: mileage00 - Koplik 5:24 pm
  • so you have an error in the print line (strong.text). - danilshik
  • Thanks for the code, it seems to work. But there is no error like in “print (strong.text)”. (At least with me) - Koplik

1 answer 1

I usually go through url.

 from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait from selenium.common.exceptions import StaleElementReferenceException from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By import time def parse_pagination(driver): #Находим элемент Ul у pagination ul_pagination = driver.find_element_by_css_selector("ul.pagination.pagination-expanded") #Находим li элементы, берем предпоследний элемент li_pagination = ul_pagination.find_elements_by_css_selector("li")[-2] #Определяем количество страниц count_page = int(li_pagination.find_element_by_css_selector("a").text) #Здесь будят хранятся ссылки urls = [] for i in range(2, count_page + 1): urls.append("https://www.udemy.com/home/my-courses/learning/?p=" + str(i)) return urls def parse_list(driver): #Набор карточек, ожидаем появления div_card_wrapper = WebDriverWait(driver, 10).until( EC.visibility_of_element_located((By.CSS_SELECTOR, "div.card-wrapper"))) # Карточки div_cards = div_card_wrapper.find_elements_by_css_selector("div.card.card--learning") for div_card in div_cards: #Ссылка a = div_card.find_element_by_css_selector("a.card--learning__details > div > strong") name = a.text print(name) print("ENTER MAIL: ") mail = "" print("ENTER PASSWORD: ") pas = "" driver= webdriver.Firefox() driver.get('https://www.udemy.com') print("GOT URL\n") time.sleep(5) driver.find_element_by_xpath("//button[@data-purpose='header-login']").click() print("OPEN LOGIN FORM\n") time.sleep(5) webdriver.ActionChains(driver).move_by_offset(570, 295).click().send_keys(mail).perform() time.sleep(5) print("PRINT MAIL\n") webdriver.ActionChains(driver).move_by_offset(100, 65).click().send_keys(pas).perform() time.sleep(5) print("PRINT PASSWORD\n") webdriver.ActionChains(driver).move_by_offset(0, 60).click().perform() time.sleep(5) print("AUTORIZATION\n") driver.find_element_by_xpath("//a[@data-purpose='my-courses']").click() time.sleep(6) print("GO TO URL\n") #strongs = driver.find_elements_by_xpath("//strong[@class = 'details__name']") f = open('udemy_titles','w', encoding='utf8') f.write("USERNAME: ") f.write(mail + "\n") f.write("PASSWORD: ") f.write(pas + "\n") f.write("TOTAL COURSES: ") parse_list(driver) urls = parse_pagination(driver) for url in urls: driver.get(url) parse_list(driver) driver.close() time.sleep(2) 

Add the rest yourself, that I deleted. And if possible, use Webdriver, this will increase the speed of parsing.