修改获取网页内容代码
This commit is contained in:
parent
bd0c6a6ff0
commit
b9c1a797cb
@ -35,3 +35,44 @@ def call_dify_workflow(input_data):
|
||||
# print("article:", article)
|
||||
return article
|
||||
|
||||
|
||||
|
||||
# ==========================调用coze工作流==========================
|
||||
|
||||
|
||||
def call_coze_workflow(workflow_id, access_token, parameters,is_async=False):
|
||||
"""
|
||||
调用 Coze 工作流的函数
|
||||
|
||||
:param workflow_id: Coze 工作流 ID
|
||||
:param access_token: 个人访问令牌(Access Token)
|
||||
:param parameters: 传递给工作流的输入参数(字典格式)
|
||||
:param app_id: 应用 ID(可选)
|
||||
:param is_async: 是否异步执行(默认 False)
|
||||
:return: 工作流的执行结果
|
||||
"""
|
||||
url = "https://api.coze.cn/v1/workflow/run"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
data = {
|
||||
"workflow_id": workflow_id,
|
||||
"parameters": parameters,
|
||||
"is_async": is_async
|
||||
}
|
||||
|
||||
|
||||
response = requests.post(url, json=data, headers=headers)
|
||||
|
||||
if response.status_code == 200:
|
||||
# data = json.loads(response.text)['data']
|
||||
# print("data:",data['output'])
|
||||
|
||||
return response.text
|
||||
else:
|
||||
return {
|
||||
"error": f"请求失败,状态码:{response.status_code}",
|
||||
"detail": response.text
|
||||
}
|
||||
|
||||
|
||||
@ -20,15 +20,17 @@ def process_link(link):
|
||||
处理单个链接
|
||||
"""
|
||||
try:
|
||||
title_text, article_text, img_urls = "","",[]
|
||||
if str(link).startswith("https://www.toutiao.com/w"):
|
||||
if link.startswith("https://www.toutiao.com"):
|
||||
title_text, article_text, img_urls = toutiao_w_extract_content(link)
|
||||
elif str(link).startswith("https://www.toutiao.com/article/"):
|
||||
title_text, article_text, img_urls = toutiao_extract_content(link)
|
||||
if title_text == "":
|
||||
title_text, article_text, img_urls = toutiao_extract_content(link)
|
||||
elif link.startswith("https://mp.weixin.qq.co"):
|
||||
title_text, article_text, img_urls = wechat_extract_content(link)
|
||||
else:
|
||||
title_text, article_text, img_urls = "", "", []
|
||||
|
||||
|
||||
if title_text == "":
|
||||
return
|
||||
|
||||
# 获取数据库配置
|
||||
host = CONFIG['Database']['host']
|
||||
@ -162,6 +164,7 @@ def worker():
|
||||
|
||||
# 处理链接
|
||||
try:
|
||||
logger.info(f"开始处理链接:{link}")
|
||||
process_link(link)
|
||||
result_queue.put((link, True, None)) # 成功
|
||||
except Exception as e:
|
||||
@ -180,6 +183,12 @@ def process_links_with_threads(links, num_threads=None):
|
||||
num_threads = min(MAX_THREADS, len(links))
|
||||
else:
|
||||
num_threads = min(num_threads, MAX_THREADS, len(links))
|
||||
|
||||
# 清空任务队列和结果队列
|
||||
while not task_queue.empty():
|
||||
task_queue.get()
|
||||
while not result_queue.empty():
|
||||
result_queue.get()
|
||||
|
||||
# 创建工作线程
|
||||
threads = []
|
||||
|
||||
@ -1,7 +1,8 @@
|
||||
from get_web_content import toutiao_extract_content
|
||||
from get_web_content import wechat_extract_content,toutiao_w_extract_content,toutiao_extract_content
|
||||
|
||||
|
||||
title,article,imgs = toutiao_extract_content("https://www.toutiao.com/article/7491890368917602825/?log_from=ab01481cf63ba_1744526333347")
|
||||
title,article,imgs = wechat_extract_content("https://mp.weixin.qq.com/s/3KejJOMuY2y6LA5k1tNwcg")
|
||||
# title,article,imgs = toutiao_extract_content("https://www.toutiao.com/article/7491890368917602825/?log_from=ab01481cf63ba_1744526333347")
|
||||
|
||||
print("title:",title)
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user