修改获取网页内容代码
This commit is contained in:
parent
bd0c6a6ff0
commit
b9c1a797cb
@ -35,3 +35,44 @@ def call_dify_workflow(input_data):
|
|||||||
# print("article:", article)
|
# print("article:", article)
|
||||||
return article
|
return article
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# ==========================调用coze工作流==========================
|
||||||
|
|
||||||
|
|
||||||
|
def call_coze_workflow(workflow_id, access_token, parameters,is_async=False):
|
||||||
|
"""
|
||||||
|
调用 Coze 工作流的函数
|
||||||
|
|
||||||
|
:param workflow_id: Coze 工作流 ID
|
||||||
|
:param access_token: 个人访问令牌(Access Token)
|
||||||
|
:param parameters: 传递给工作流的输入参数(字典格式)
|
||||||
|
:param app_id: 应用 ID(可选)
|
||||||
|
:param is_async: 是否异步执行(默认 False)
|
||||||
|
:return: 工作流的执行结果
|
||||||
|
"""
|
||||||
|
url = "https://api.coze.cn/v1/workflow/run"
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {access_token}",
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
}
|
||||||
|
data = {
|
||||||
|
"workflow_id": workflow_id,
|
||||||
|
"parameters": parameters,
|
||||||
|
"is_async": is_async
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
response = requests.post(url, json=data, headers=headers)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
# data = json.loads(response.text)['data']
|
||||||
|
# print("data:",data['output'])
|
||||||
|
|
||||||
|
return response.text
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"error": f"请求失败,状态码:{response.status_code}",
|
||||||
|
"detail": response.text
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@ -20,15 +20,17 @@ def process_link(link):
|
|||||||
处理单个链接
|
处理单个链接
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
title_text, article_text, img_urls = "","",[]
|
if link.startswith("https://www.toutiao.com"):
|
||||||
if str(link).startswith("https://www.toutiao.com/w"):
|
|
||||||
title_text, article_text, img_urls = toutiao_w_extract_content(link)
|
title_text, article_text, img_urls = toutiao_w_extract_content(link)
|
||||||
elif str(link).startswith("https://www.toutiao.com/article/"):
|
if title_text == "":
|
||||||
title_text, article_text, img_urls = toutiao_extract_content(link)
|
title_text, article_text, img_urls = toutiao_extract_content(link)
|
||||||
|
elif link.startswith("https://mp.weixin.qq.co"):
|
||||||
|
title_text, article_text, img_urls = wechat_extract_content(link)
|
||||||
else:
|
else:
|
||||||
title_text, article_text, img_urls = "", "", []
|
title_text, article_text, img_urls = "", "", []
|
||||||
|
|
||||||
|
if title_text == "":
|
||||||
|
return
|
||||||
|
|
||||||
# 获取数据库配置
|
# 获取数据库配置
|
||||||
host = CONFIG['Database']['host']
|
host = CONFIG['Database']['host']
|
||||||
@ -162,6 +164,7 @@ def worker():
|
|||||||
|
|
||||||
# 处理链接
|
# 处理链接
|
||||||
try:
|
try:
|
||||||
|
logger.info(f"开始处理链接:{link}")
|
||||||
process_link(link)
|
process_link(link)
|
||||||
result_queue.put((link, True, None)) # 成功
|
result_queue.put((link, True, None)) # 成功
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -180,6 +183,12 @@ def process_links_with_threads(links, num_threads=None):
|
|||||||
num_threads = min(MAX_THREADS, len(links))
|
num_threads = min(MAX_THREADS, len(links))
|
||||||
else:
|
else:
|
||||||
num_threads = min(num_threads, MAX_THREADS, len(links))
|
num_threads = min(num_threads, MAX_THREADS, len(links))
|
||||||
|
|
||||||
|
# 清空任务队列和结果队列
|
||||||
|
while not task_queue.empty():
|
||||||
|
task_queue.get()
|
||||||
|
while not result_queue.empty():
|
||||||
|
result_queue.get()
|
||||||
|
|
||||||
# 创建工作线程
|
# 创建工作线程
|
||||||
threads = []
|
threads = []
|
||||||
|
|||||||
@ -1,7 +1,8 @@
|
|||||||
from get_web_content import toutiao_extract_content
|
from get_web_content import wechat_extract_content,toutiao_w_extract_content,toutiao_extract_content
|
||||||
|
|
||||||
|
|
||||||
title,article,imgs = toutiao_extract_content("https://www.toutiao.com/article/7491890368917602825/?log_from=ab01481cf63ba_1744526333347")
|
title,article,imgs = wechat_extract_content("https://mp.weixin.qq.com/s/3KejJOMuY2y6LA5k1tNwcg")
|
||||||
|
# title,article,imgs = toutiao_extract_content("https://www.toutiao.com/article/7491890368917602825/?log_from=ab01481cf63ba_1744526333347")
|
||||||
|
|
||||||
print("title:",title)
|
print("title:",title)
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user