Mistake:
... for res in _socket.getaddrinfo (host, port, family, type, proto, flags):
socket.gaierror: [Errno 11001] getaddrinfo failed
During loading
Python Code:
# -*- coding: utf-8 -*- import urllib.request from lxml.html import parse WEBSITE = 'http://allrecipes.com' URL_PAGE = 'http://allrecipes.com/recipes/110/appetizers-and-snacks/deviled-eggs/?page=' START_PAGE = 1 END_PAGE = 5 def correct_str(s): return s.encode('utf-8').decode('ascii', 'ignore').strip() for i in range(START_PAGE, END_PAGE+1): URL = URL_PAGE + str(i) HTML = urllib.request.urlopen(URL) page = parse(HTML).getroot() # пропускаем видео for elem in page.xpath('//*[@id="grid"]/article[not(contains(@class, "video-card"))]/a[1]'): href = WEBSITE + elem.get('href') title = correct_str(elem.find('h3').text) recipe_page = parse(urllib.request.urlopen(href)).getroot() photo_url = recipe_page.xpath('//img[@class="rec-photo"]')[0].get('src') print('\nName: |', title) print('Photo: |', photo_url) Console - Results:
Traceback (most recent call last): Name: | Crab-Stuffed Deviled Eggs File "C:\Users\In\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 1240, in do_open Photo: | http://images.media-allrecipes.com/userphotos/720x405/1091564.jpg h.request(req.get_method(), req.selector, req.data, headers) File "C:\Users\In\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 1083, in request self._send_request(method, url, body, headers) File "C:\Users\In\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 1128, in _send_request self.endheaders(body) File "C:\Users\In\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 1079, in endheaders self._send_output(message_body) File "C:\Users\In\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 911, in _send_output self.send(msg) File "C:\Users\In\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 854, in send self.connect() File "C:\Users\In\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 826, in connect (self.host,self.port), self.timeout, self.source_address) File "C:\Users\In\AppData\Local\Programs\Python\Python35-32\lib\socket.py", line 693, in create_connection for res in getaddrinfo(host, port, 0, SOCK_STREAM): File "C:\Users\In\AppData\Local\Programs\Python\Python35-32\lib\socket.py", line 732, in getaddrinfo for res in _socket.getaddrinfo(host, port, family, type, proto, flags): socket.gaierror: [Errno 11001] getaddrinfo failed During handling of the above exception, another exception occurred: Traceback (most recent call last): File "C:/Users/In/Dropbox/parser/test.py", line 27, in <module> recipe_page = parse(urllib.request.urlopen(href)).getroot() File "C:\Users\In\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 162, in urlopen return opener.open(url, data, timeout) File "C:\Users\In\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 465, in open response = self._open(req, data) File "C:\Users\In\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 483, in _open '_open', req) File "C:\Users\In\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 443, in _call_chain result = func(*args) File "C:\Users\In\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 1268, in http_open return self.do_open(http.client.HTTPConnection, req) File "C:\Users\In\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 1242, in do_open raise URLError(err) urllib.error.URLError: <urlopen error [Errno 11001] getaddrinfo failed> Process finished with exit code 1