forked from daacheng/PythonBasic
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpythonForTouTiaoNews.py
More file actions
55 lines (49 loc) · 1.53 KB
/
pythonForTouTiaoNews.py
File metadata and controls
55 lines (49 loc) · 1.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from bs4 import BeautifulSoup
import requests
import json
import csv
# 获取爬取到的数据,并保存成csv格式文件
def get_data(url):
res = requests.get(url)
res.encoding = 'utf-8'
content = json.loads(res.text)
data = content['data']
title = ''
comment_count = ''
article_url = ''
keywords = ''
for news in data:
# print(news)
content = []
if 'extra' in news:
if 'titles_terms' in news['extra']:
title = news['extra']['titles_terms'].replace(' ', '')
if 'comments_count' in news:
comment_count = news['comments_count']
if 'article_url' in news:
article_url = news['article_url']
# print('标题:',title,' 评论数:',comment_count,' 文章链接:',article_url)
if 'keywords' in news:
keywords = news['keywords']
content = [title, article_url, keywords, str(comment_count)]
# print(content)
with open('toutiao_news.csv', 'a', newline='', encoding='utf-8') as f:
f_csv = csv.writer(f)
f_csv.writerow(content)
f.close()
pkeywords = '''阿里巴巴
人工智能
AI
腾讯
王者荣耀
小米
马云
雷军
无人汽车
机器人'''.split('\n')
print(pkeywords)
for keywords in pkeywords:
for i in range(9):
url = 'https://www.toutiao.com/search_content/?offset=' + str(
i * 20) + '&format=json&keyword=' + keywords + '&autoload=true&count=20&cur_tab=1&from=search_tab'
get_data(url)