'''''' ''' 一、请求url https://www.wandoujia.com/category/6001 二、请求方式 GET 三、请求头
User-Agent:
Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36
Cookie
''' #爬虫三部曲 #1.发送请求 import requests def get_page(url): reponse = requests.get(url) #print(reponse.text) return reponse # 2.解析数据 ''' (.*?)休闲益智 ''' import re def parse_index(html): movie_list = re.findall('捕鱼大作战
捕鱼大作战,经典街机新体验.*?.*?(.*?)万人安装 ・ .*?MB',html,re.S) return movie_list # 3.保存数据 def save_data(movie): detail_url, app_name, download_num, app_size = movie data = f''' 游戏名称:{app_name} 详情页url:{detail_url} 下载人数:{download_num}万人 app大小:{app_size}MB \n \n ''' print(data) with open('wandoujia.txt','a',encoding='utf-8') as f: f.write(data) if __name__ == '__main__': #拼接所有主页 url=f'https://www.wandoujia.com/category/6001' #1.往每个主页发送请求 index_res = get_page(url) #2.解析主页获取电影信息 movie_list = parse_index(index_res.text) for movie in movie_list: #3.保存数据 #print(movie_list) save_data(movie)