用ChatGPT爬取CNN新闻并总结

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
import datetime
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import openai
import time

# 在这里设置你的 OpenAI API 密钥
openai.api_key = 'sk-rCJsSuapC0JknFaaxxxxxxxxxxxxxxxxx'

# 获取当前日期
current_date = datetime.date.today()

# 创建文件夹路径
folder_path = os.path.join("C:/桌面/每日新闻", str(current_date))

# 创建文件夹
os.makedirs(folder_path, exist_ok=True)

# 创建文件路径
file_path = os.path.join(folder_path, "CNN新闻.txt")

url = "https://edition.cnn.com/"

response = requests.get(url)
html_content = response.content

soup = BeautifulSoup(html_content, "html.parser")

container = soup.find(class_="container__field-links container_ribbon__field-links")

if container:
links = container.find_all("a")

# 打开文件并写入内容
with open(file_path, "w", encoding="utf-8") as file:
# 遍历链接并访问每个链接
for link in links:
href = link.get("href")
full_link = urljoin(url, href)

try:
response = requests.get(full_link)
response.raise_for_status() # 检查是否有异常状态码
html = BeautifulSoup(response.content, "html.parser")

articles = html.find_all(class_="article__content")

if articles is None:
continue

content = ' '.join([article.get_text() for article in articles])

user_input = f"摘要以下文章内容:\n{content}\n摘要:"

# 控制请求频率
time_between_requests = 60 / 30 # 3 RPM
time.sleep(time_between_requests)

summary_response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": user_input}
],
temperature=1,
max_tokens=256,
)

summary = summary_response.choices[0].message['content'].strip()

# 将摘要写入文件
file.write(summary + "\n\n")

# 打印摘要
print(summary)
print('---------------------------------------------------------------------------------')

except requests.RequestException as e:
print(f"请求出错:{str(e)}")

print("文件写入完成!")