Hello everybody.
I try to download links and information on them from the main page of the habr. Pulling out the links themselves is obtained, but when I try to do the same with the text, I get:
list index out of range.
I tried to check the URL, so nothing at all. Looks weird, help me figure it out. Thank.
from urllib.request import urlopen from urllib.parse import urljoin from lxml.html import fromstring URL = 'https://habrahabr.ru' ITEM_PATH = '.post__header .post__title_link' DESCRIBE_PATH = '.content html_format' def parse_cources(): f = urlopen( URL ) list_html = f.read().decode( 'utf-8' ) list_doc = fromstring(list_html) for elem in list_doc.cssselect(ITEM_PATH): a = elem.cssselect('a')[0] href = a.get('href') name = a.text details_html = urlopen(URL).read().decode('utf-8') details_doc = fromstring(details_html) descr_elem = details_doc.cssselect(DESCRIBE_PATH)[0] ''' if URL in details_doc: descr_elem = details_doc.cssselect(DESCRIBE_PATH)[0] print(descr_elem.text_content()) else: continue ''' print(descr_elem.text_content()) def main(): parse_cources() if __name__ == '__main__': main()