修改获取网页内容代码
This commit is contained in:
parent
65618b2c0b
commit
2d377de6fd
@ -13,6 +13,7 @@ def call_dify_workflow(input_data):
|
|||||||
:param input_data: 传递给工作流的输入数据
|
:param input_data: 传递给工作流的输入数据
|
||||||
:return: 工作流的输出结果
|
:return: 工作流的输出结果
|
||||||
"""
|
"""
|
||||||
|
logger.info("Dify开始工作。。。")
|
||||||
api_key = CONFIG['Dify']['api_key']
|
api_key = CONFIG['Dify']['api_key']
|
||||||
user_id = CONFIG['Dify']['user_id']
|
user_id = CONFIG['Dify']['user_id']
|
||||||
url = CONFIG['Dify']['url']
|
url = CONFIG['Dify']['url']
|
||||||
@ -40,17 +41,18 @@ def call_dify_workflow(input_data):
|
|||||||
# ==========================调用coze工作流==========================
|
# ==========================调用coze工作流==========================
|
||||||
|
|
||||||
|
|
||||||
def call_coze_workflow(workflow_id, access_token, parameters,is_async=False):
|
def call_coze_workflow(parameters):
|
||||||
"""
|
"""
|
||||||
调用 Coze 工作流的函数
|
调用 Coze 工作流的函数
|
||||||
|
|
||||||
:param workflow_id: Coze 工作流 ID
|
|
||||||
:param access_token: 个人访问令牌(Access Token)
|
|
||||||
:param parameters: 传递给工作流的输入参数(字典格式)
|
:param parameters: 传递给工作流的输入参数(字典格式)
|
||||||
:param app_id: 应用 ID(可选)
|
|
||||||
:param is_async: 是否异步执行(默认 False)
|
|
||||||
:return: 工作流的执行结果
|
:return: 工作流的执行结果
|
||||||
"""
|
"""
|
||||||
|
logger.info("Coze开始工作。。。。")
|
||||||
|
workflow_id = CONFIG['Coze']['workflow_id']
|
||||||
|
access_token = CONFIG['Coze']['access_token']
|
||||||
|
is_async = CONFIG['Coze']['is_async'].lower() == 'true'
|
||||||
|
|
||||||
url = "https://api.coze.cn/v1/workflow/run"
|
url = "https://api.coze.cn/v1/workflow/run"
|
||||||
headers = {
|
headers = {
|
||||||
"Authorization": f"Bearer {access_token}",
|
"Authorization": f"Bearer {access_token}",
|
||||||
|
|||||||
@ -15,6 +15,11 @@ DEFAULT_CONFIG = {
|
|||||||
"title_file": "文章链接.xlsx",
|
"title_file": "文章链接.xlsx",
|
||||||
"max_threads": "3"
|
"max_threads": "3"
|
||||||
},
|
},
|
||||||
|
"Coze": {
|
||||||
|
"workflow_id": "",
|
||||||
|
"access_token": "",
|
||||||
|
"is_async": "false"
|
||||||
|
},
|
||||||
"Database": {
|
"Database": {
|
||||||
"host": "27.106.125.150",
|
"host": "27.106.125.150",
|
||||||
"user": "root",
|
"user": "root",
|
||||||
|
|||||||
@ -1,19 +1,18 @@
|
|||||||
|
|
||||||
import threading
|
import threading
|
||||||
import queue
|
import queue
|
||||||
|
import json # 导入 json 模块
|
||||||
|
|
||||||
from ai_studio import call_dify_workflow, call_coze_workflow
|
from ai_studio import call_dify_workflow, call_coze_workflow
|
||||||
from databases import *
|
from databases import *
|
||||||
|
|
||||||
|
|
||||||
from images_edit import download_and_process_images
|
from images_edit import download_and_process_images
|
||||||
from utils import *
|
from utils import *
|
||||||
from get_web_content import *
|
from get_web_content import *
|
||||||
from config import *
|
from config import *
|
||||||
|
|
||||||
|
|
||||||
# ==============================主程序===========================
|
# ==============================主程序===========================
|
||||||
def process_link(link, ai_service="dify"):
|
def process_link(link, ai_service):
|
||||||
"""
|
"""
|
||||||
处理单个链接
|
处理单个链接
|
||||||
:param link: 要处理的链接
|
:param link: 要处理的链接
|
||||||
@ -31,8 +30,10 @@ def process_link(link, ai_service="dify"):
|
|||||||
|
|
||||||
if title_text == "":
|
if title_text == "":
|
||||||
return
|
return
|
||||||
|
elif len(title_text) > 100:
|
||||||
|
return
|
||||||
|
|
||||||
# 获取数据库配置
|
# 获取数据库配置
|
||||||
host = CONFIG['Database']['host']
|
host = CONFIG['Database']['host']
|
||||||
user = CONFIG['Database']['user']
|
user = CONFIG['Database']['user']
|
||||||
password = CONFIG['Database']['password']
|
password = CONFIG['Database']['password']
|
||||||
@ -41,11 +42,6 @@ def process_link(link, ai_service="dify"):
|
|||||||
# 判断文章内容是否有违禁词
|
# 判断文章内容是否有违禁词
|
||||||
check_keywords = check_keywords_in_text(title_text)
|
check_keywords = check_keywords_in_text(title_text)
|
||||||
|
|
||||||
if check_keywords:
|
|
||||||
print("文章中有违禁词!")
|
|
||||||
check_link_insert(host, user, password, database, link)
|
|
||||||
return
|
|
||||||
|
|
||||||
title = extract_content_until_punctuation(article_text).replace("正文:", "")
|
title = extract_content_until_punctuation(article_text).replace("正文:", "")
|
||||||
|
|
||||||
print(title)
|
print(title)
|
||||||
@ -59,20 +55,55 @@ def process_link(link, ai_service="dify"):
|
|||||||
# 打印当前时间
|
# 打印当前时间
|
||||||
print("当前时间:", current_time)
|
print("当前时间:", current_time)
|
||||||
|
|
||||||
input_data = {
|
|
||||||
"old_article": article_text
|
|
||||||
}
|
|
||||||
|
|
||||||
if ai_service == "dify":
|
if ai_service == "dify":
|
||||||
input_data = {
|
if check_keywords:
|
||||||
"old_article": article_text
|
print("文章中有违禁词!")
|
||||||
}
|
check_link_insert(host, user, password, database, link)
|
||||||
|
return
|
||||||
|
# 从配置加载 input_data 模板
|
||||||
|
input_data_template_str = CONFIG['Dify'].get('input_data_template', '{{"old_article": "{article_text}"}}')
|
||||||
|
try:
|
||||||
|
# 解析模板字符串为字典
|
||||||
|
input_data_template = json.loads(input_data_template_str)
|
||||||
|
# 使用实际变量格式化模板
|
||||||
|
input_data = {k: v.format(article_text=article_text) for k, v in input_data_template.items()}
|
||||||
|
except (json.JSONDecodeError, KeyError, AttributeError) as e:
|
||||||
|
logger.error(f"处理 Dify input_data 模板时出错: {e}. 使用默认模板.")
|
||||||
|
input_data = {
|
||||||
|
"old_article": article_text
|
||||||
|
}
|
||||||
|
|
||||||
|
# input_data = {
|
||||||
|
# "old_article": article_text
|
||||||
|
# }
|
||||||
message_content = call_dify_workflow(input_data)
|
message_content = call_dify_workflow(input_data)
|
||||||
elif ai_service == "coze":
|
elif ai_service == "coze":
|
||||||
input_data = {
|
logger.info("coze正在处理")
|
||||||
"old_article": article_text
|
weijin = ""
|
||||||
}
|
if check_keywords:
|
||||||
message_content = call_coze_workflow(input_data)
|
weijin = "违禁"
|
||||||
|
# 从配置加载 Coze input_data 模板
|
||||||
|
input_data_template_str = CONFIG['Coze'].get('input_data_template',
|
||||||
|
'{{"article": "{article_text}", "link":"{link}", "weijin":"{weijin}"}}')
|
||||||
|
try:
|
||||||
|
# 解析模板字符串为字典
|
||||||
|
input_data_template = json.loads(input_data_template_str)
|
||||||
|
# 使用实际变量格式化模板
|
||||||
|
input_data = {k: v.format(article_text=article_text, link=link, weijin=weijin) for k, v in
|
||||||
|
input_data_template.items()}
|
||||||
|
except (json.JSONDecodeError, KeyError, AttributeError) as e:
|
||||||
|
logger.error(f"处理 Coze input_data 模板时出错: {e}. 使用默认模板.")
|
||||||
|
input_data = {
|
||||||
|
"article": article_text,
|
||||||
|
"link": link,
|
||||||
|
"weijin": weijin
|
||||||
|
}
|
||||||
|
|
||||||
|
msg = call_coze_workflow(input_data)
|
||||||
|
message_content = msg['article']
|
||||||
|
result = msg['result']
|
||||||
|
if result == "已经创作过":
|
||||||
|
return
|
||||||
# 获取当前时间并格式化
|
# 获取当前时间并格式化
|
||||||
current_time = datetime.now().strftime("%H:%M:%S")
|
current_time = datetime.now().strftime("%H:%M:%S")
|
||||||
|
|
||||||
@ -117,7 +148,6 @@ def process_link(link, ai_service="dify"):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def link_to_text(prompt1=None, prompt2=None, num_threads=None, ai_service="dify"):
|
def link_to_text(prompt1=None, prompt2=None, num_threads=None, ai_service="dify"):
|
||||||
use_link_path = 'use_link_path.txt'
|
use_link_path = 'use_link_path.txt'
|
||||||
|
|
||||||
@ -139,14 +169,14 @@ def link_to_text(prompt1=None, prompt2=None, num_threads=None, ai_service="dify"
|
|||||||
else:
|
else:
|
||||||
filtered_links.append(link)
|
filtered_links.append(link)
|
||||||
logger.info(f"链接不存在: {link}")
|
logger.info(f"链接不存在: {link}")
|
||||||
print("链接不存在,存储到过滤器中:",link)
|
print("链接不存在,存储到过滤器中:", link)
|
||||||
|
|
||||||
if not filtered_links:
|
if not filtered_links:
|
||||||
logger.info("没有新链接需要处理")
|
logger.info("没有新链接需要处理")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# 使用多线程处理链接
|
# 使用多线程处理链接
|
||||||
results = process_links_with_threads(filtered_links, num_threads)
|
results = process_links_with_threads(filtered_links, num_threads, ai_service)
|
||||||
|
|
||||||
# 记录已处理的链接
|
# 记录已处理的链接
|
||||||
with open(use_link_path, 'a+', encoding='utf-8') as f:
|
with open(use_link_path, 'a+', encoding='utf-8') as f:
|
||||||
@ -163,7 +193,7 @@ result_queue = queue.Queue()
|
|||||||
|
|
||||||
|
|
||||||
# 工作线程函数
|
# 工作线程函数
|
||||||
def worker():
|
def worker(ai_service):
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
# 从队列中获取任务
|
# 从队列中获取任务
|
||||||
@ -187,12 +217,12 @@ def worker():
|
|||||||
|
|
||||||
|
|
||||||
# 多线程处理链接
|
# 多线程处理链接
|
||||||
def process_links_with_threads(links, num_threads=None):
|
def process_links_with_threads(links, num_threads=None, ai_service="dify"):
|
||||||
if num_threads is None:
|
if num_threads is None:
|
||||||
num_threads = min(MAX_THREADS, len(links))
|
num_threads = min(MAX_THREADS, len(links))
|
||||||
else:
|
else:
|
||||||
num_threads = min(num_threads, MAX_THREADS, len(links))
|
num_threads = min(num_threads, MAX_THREADS, len(links))
|
||||||
|
|
||||||
# 清空任务队列和结果队列
|
# 清空任务队列和结果队列
|
||||||
while not task_queue.empty():
|
while not task_queue.empty():
|
||||||
task_queue.get()
|
task_queue.get()
|
||||||
@ -201,8 +231,10 @@ def process_links_with_threads(links, num_threads=None):
|
|||||||
|
|
||||||
# 创建工作线程
|
# 创建工作线程
|
||||||
threads = []
|
threads = []
|
||||||
|
|
||||||
|
# 将AI服务选择传递给worker函数
|
||||||
for _ in range(num_threads):
|
for _ in range(num_threads):
|
||||||
t = threading.Thread(target=worker)
|
t = threading.Thread(target=worker, args=(ai_service,))
|
||||||
t.daemon = True
|
t.daemon = True
|
||||||
t.start()
|
t.start()
|
||||||
threads.append(t)
|
threads.append(t)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user