修改获取网页内容代码

This commit is contained in:
太一 2025-04-13 16:34:44 +08:00
parent bd0c6a6ff0
commit b9c1a797cb
3 changed files with 58 additions and 7 deletions

View File

@ -35,3 +35,44 @@ def call_dify_workflow(input_data):
# print("article:", article)
return article
# ==========================调用coze工作流==========================
def call_coze_workflow(workflow_id, access_token, parameters,is_async=False):
"""
调用 Coze 工作流的函数
:param workflow_id: Coze 工作流 ID
:param access_token: 个人访问令牌Access Token
:param parameters: 传递给工作流的输入参数字典格式
:param app_id: 应用 ID可选
:param is_async: 是否异步执行默认 False
:return: 工作流的执行结果
"""
url = "https://api.coze.cn/v1/workflow/run"
headers = {
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/json"
}
data = {
"workflow_id": workflow_id,
"parameters": parameters,
"is_async": is_async
}
response = requests.post(url, json=data, headers=headers)
if response.status_code == 200:
# data = json.loads(response.text)['data']
# print("data",data['output'])
return response.text
else:
return {
"error": f"请求失败,状态码:{response.status_code}",
"detail": response.text
}

View File

@ -20,15 +20,17 @@ def process_link(link):
处理单个链接
"""
try:
title_text, article_text, img_urls = "","",[]
if str(link).startswith("https://www.toutiao.com/w"):
if link.startswith("https://www.toutiao.com"):
title_text, article_text, img_urls = toutiao_w_extract_content(link)
elif str(link).startswith("https://www.toutiao.com/article/"):
if title_text == "":
title_text, article_text, img_urls = toutiao_extract_content(link)
elif link.startswith("https://mp.weixin.qq.co"):
title_text, article_text, img_urls = wechat_extract_content(link)
else:
title_text, article_text, img_urls = "", "", []
if title_text == "":
return
# 获取数据库配置
host = CONFIG['Database']['host']
@ -162,6 +164,7 @@ def worker():
# 处理链接
try:
logger.info(f"开始处理链接:{link}")
process_link(link)
result_queue.put((link, True, None)) # 成功
except Exception as e:
@ -181,6 +184,12 @@ def process_links_with_threads(links, num_threads=None):
else:
num_threads = min(num_threads, MAX_THREADS, len(links))
# 清空任务队列和结果队列
while not task_queue.empty():
task_queue.get()
while not result_queue.empty():
result_queue.get()
# 创建工作线程
threads = []
for _ in range(num_threads):

View File

@ -1,7 +1,8 @@
from get_web_content import toutiao_extract_content
from get_web_content import wechat_extract_content,toutiao_w_extract_content,toutiao_extract_content
title,article,imgs = toutiao_extract_content("https://www.toutiao.com/article/7491890368917602825/?log_from=ab01481cf63ba_1744526333347")
title,article,imgs = wechat_extract_content("https://mp.weixin.qq.com/s/3KejJOMuY2y6LA5k1tNwcg")
# title,article,imgs = toutiao_extract_content("https://www.toutiao.com/article/7491890368917602825/?log_from=ab01481cf63ba_1744526333347")
print("title:",title)