Selenium爬取文章实例

一个葫芦瓢啊 / 2024-01-23 / 原文

这一篇主要是解决一个问题:浏览器操作下拉才能显示更多的内容

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import time

thepaper_dict = {}

web = webdriver.Chrome(service=Service('chromedriver.exe'))
web.implicitly_wait(10)

web.get('https://www.thepaper.cn')
time.sleep(5)
web.maximize_window()
time.sleep(5)
web.find_element(By.XPATH,'//*[@id="navMenu"]/ul/li[5]/a').click()
time.sleep(10)
'''重复下拉,尽可能多的获取内容'''
for i in range(5):
    web.execute_script('window.scrollTo(0,3500)')
    time.sleep(5)

tes_list = web.find_elements(By.CLASS_NAME,"small_toplink__GmZhY")
for i in tes_list:
    title = i.find_element(By.TAG_NAME,'h2').text
    url =i.find_element(By.TAG_NAME,'a').get_attribute('href')
    thepaper_dict.update({title:url})

web.close()