09通过正则表达式提取电影名称
import requests import re from time import sleep # 目标电影网;https://nnyy.best/ ,爬取电影名字做测试。 res = requests.session() headers = { 'authority': 'nnyy.best', 'cache-control': 'max-age=0', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'navigate', 'sec-fetch-user': '?1', 'sec-fetch-dest': 'document', 'referer': 'https://nnyy.best/dianying?page=2', 'accept-language': 'zh-CN,zh;q=0.9', } for j in range(1,11): params = { 'page': j, } print(f"爬取第{j}页") response = res.get('https://nnyy.best/dianying', params=params, headers=headers) data = response.text # data_text = re.findall(r'<a href="/movie/.*?truncate group-hover:.*?text-center">(.*?)</a>',data,re.S) data_text = re.findall(r'<a href="/movie/.*?\[#ec2d7a] text-center">(.*?)</a>', data, re.S) for i in data_text: print(i) sleep(3)