修改获取网页内容代码

This commit is contained in:
太一 2025-04-22 09:15:49 +08:00
parent 65618b2c0b
commit 2d377de6fd
3 changed files with 73 additions and 34 deletions

View File

@ -13,6 +13,7 @@ def call_dify_workflow(input_data):
:param input_data: 传递给工作流的输入数据 :param input_data: 传递给工作流的输入数据
:return: 工作流的输出结果 :return: 工作流的输出结果
""" """
logger.info("Dify开始工作。。。")
api_key = CONFIG['Dify']['api_key'] api_key = CONFIG['Dify']['api_key']
user_id = CONFIG['Dify']['user_id'] user_id = CONFIG['Dify']['user_id']
url = CONFIG['Dify']['url'] url = CONFIG['Dify']['url']
@ -40,17 +41,18 @@ def call_dify_workflow(input_data):
# ==========================调用coze工作流========================== # ==========================调用coze工作流==========================
def call_coze_workflow(workflow_id, access_token, parameters,is_async=False): def call_coze_workflow(parameters):
""" """
调用 Coze 工作流的函数 调用 Coze 工作流的函数
:param workflow_id: Coze 工作流 ID
:param access_token: 个人访问令牌Access Token
:param parameters: 传递给工作流的输入参数字典格式 :param parameters: 传递给工作流的输入参数字典格式
:param app_id: 应用 ID可选
:param is_async: 是否异步执行默认 False
:return: 工作流的执行结果 :return: 工作流的执行结果
""" """
logger.info("Coze开始工作。。。。")
workflow_id = CONFIG['Coze']['workflow_id']
access_token = CONFIG['Coze']['access_token']
is_async = CONFIG['Coze']['is_async'].lower() == 'true'
url = "https://api.coze.cn/v1/workflow/run" url = "https://api.coze.cn/v1/workflow/run"
headers = { headers = {
"Authorization": f"Bearer {access_token}", "Authorization": f"Bearer {access_token}",

View File

@ -15,6 +15,11 @@ DEFAULT_CONFIG = {
"title_file": "文章链接.xlsx", "title_file": "文章链接.xlsx",
"max_threads": "3" "max_threads": "3"
}, },
"Coze": {
"workflow_id": "",
"access_token": "",
"is_async": "false"
},
"Database": { "Database": {
"host": "27.106.125.150", "host": "27.106.125.150",
"user": "root", "user": "root",

View File

@ -1,19 +1,18 @@
import threading import threading
import queue import queue
import json # 导入 json 模块
from ai_studio import call_dify_workflow, call_coze_workflow from ai_studio import call_dify_workflow, call_coze_workflow
from databases import * from databases import *
from images_edit import download_and_process_images from images_edit import download_and_process_images
from utils import * from utils import *
from get_web_content import * from get_web_content import *
from config import * from config import *
# ==============================主程序=========================== # ==============================主程序===========================
def process_link(link, ai_service="dify"): def process_link(link, ai_service):
""" """
处理单个链接 处理单个链接
:param link: 要处理的链接 :param link: 要处理的链接
@ -31,8 +30,10 @@ def process_link(link, ai_service="dify"):
if title_text == "": if title_text == "":
return return
elif len(title_text) > 100:
return
# 获取数据库配置 # 获取数据库配置
host = CONFIG['Database']['host'] host = CONFIG['Database']['host']
user = CONFIG['Database']['user'] user = CONFIG['Database']['user']
password = CONFIG['Database']['password'] password = CONFIG['Database']['password']
@ -41,11 +42,6 @@ def process_link(link, ai_service="dify"):
# 判断文章内容是否有违禁词 # 判断文章内容是否有违禁词
check_keywords = check_keywords_in_text(title_text) check_keywords = check_keywords_in_text(title_text)
if check_keywords:
print("文章中有违禁词!")
check_link_insert(host, user, password, database, link)
return
title = extract_content_until_punctuation(article_text).replace("正文:", "") title = extract_content_until_punctuation(article_text).replace("正文:", "")
print(title) print(title)
@ -59,20 +55,55 @@ def process_link(link, ai_service="dify"):
# 打印当前时间 # 打印当前时间
print("当前时间:", current_time) print("当前时间:", current_time)
input_data = {
"old_article": article_text
}
if ai_service == "dify": if ai_service == "dify":
input_data = { if check_keywords:
"old_article": article_text print("文章中有违禁词!")
} check_link_insert(host, user, password, database, link)
return
# 从配置加载 input_data 模板
input_data_template_str = CONFIG['Dify'].get('input_data_template', '{{"old_article": "{article_text}"}}')
try:
# 解析模板字符串为字典
input_data_template = json.loads(input_data_template_str)
# 使用实际变量格式化模板
input_data = {k: v.format(article_text=article_text) for k, v in input_data_template.items()}
except (json.JSONDecodeError, KeyError, AttributeError) as e:
logger.error(f"处理 Dify input_data 模板时出错: {e}. 使用默认模板.")
input_data = {
"old_article": article_text
}
# input_data = {
# "old_article": article_text
# }
message_content = call_dify_workflow(input_data) message_content = call_dify_workflow(input_data)
elif ai_service == "coze": elif ai_service == "coze":
input_data = { logger.info("coze正在处理")
"old_article": article_text weijin = ""
} if check_keywords:
message_content = call_coze_workflow(input_data) weijin = "违禁"
# 从配置加载 Coze input_data 模板
input_data_template_str = CONFIG['Coze'].get('input_data_template',
'{{"article": "{article_text}", "link":"{link}", "weijin":"{weijin}"}}')
try:
# 解析模板字符串为字典
input_data_template = json.loads(input_data_template_str)
# 使用实际变量格式化模板
input_data = {k: v.format(article_text=article_text, link=link, weijin=weijin) for k, v in
input_data_template.items()}
except (json.JSONDecodeError, KeyError, AttributeError) as e:
logger.error(f"处理 Coze input_data 模板时出错: {e}. 使用默认模板.")
input_data = {
"article": article_text,
"link": link,
"weijin": weijin
}
msg = call_coze_workflow(input_data)
message_content = msg['article']
result = msg['result']
if result == "已经创作过":
return
# 获取当前时间并格式化 # 获取当前时间并格式化
current_time = datetime.now().strftime("%H:%M:%S") current_time = datetime.now().strftime("%H:%M:%S")
@ -117,7 +148,6 @@ def process_link(link, ai_service="dify"):
raise raise
def link_to_text(prompt1=None, prompt2=None, num_threads=None, ai_service="dify"): def link_to_text(prompt1=None, prompt2=None, num_threads=None, ai_service="dify"):
use_link_path = 'use_link_path.txt' use_link_path = 'use_link_path.txt'
@ -139,14 +169,14 @@ def link_to_text(prompt1=None, prompt2=None, num_threads=None, ai_service="dify"
else: else:
filtered_links.append(link) filtered_links.append(link)
logger.info(f"链接不存在: {link}") logger.info(f"链接不存在: {link}")
print("链接不存在,存储到过滤器中:",link) print("链接不存在,存储到过滤器中:", link)
if not filtered_links: if not filtered_links:
logger.info("没有新链接需要处理") logger.info("没有新链接需要处理")
return [] return []
# 使用多线程处理链接 # 使用多线程处理链接
results = process_links_with_threads(filtered_links, num_threads) results = process_links_with_threads(filtered_links, num_threads, ai_service)
# 记录已处理的链接 # 记录已处理的链接
with open(use_link_path, 'a+', encoding='utf-8') as f: with open(use_link_path, 'a+', encoding='utf-8') as f:
@ -163,7 +193,7 @@ result_queue = queue.Queue()
# 工作线程函数 # 工作线程函数
def worker(): def worker(ai_service):
while True: while True:
try: try:
# 从队列中获取任务 # 从队列中获取任务
@ -187,12 +217,12 @@ def worker():
# 多线程处理链接 # 多线程处理链接
def process_links_with_threads(links, num_threads=None): def process_links_with_threads(links, num_threads=None, ai_service="dify"):
if num_threads is None: if num_threads is None:
num_threads = min(MAX_THREADS, len(links)) num_threads = min(MAX_THREADS, len(links))
else: else:
num_threads = min(num_threads, MAX_THREADS, len(links)) num_threads = min(num_threads, MAX_THREADS, len(links))
# 清空任务队列和结果队列 # 清空任务队列和结果队列
while not task_queue.empty(): while not task_queue.empty():
task_queue.get() task_queue.get()
@ -201,8 +231,10 @@ def process_links_with_threads(links, num_threads=None):
# 创建工作线程 # 创建工作线程
threads = [] threads = []
# 将AI服务选择传递给worker函数
for _ in range(num_threads): for _ in range(num_threads):
t = threading.Thread(target=worker) t = threading.Thread(target=worker, args=(ai_service,))
t.daemon = True t.daemon = True
t.start() t.start()
threads.append(t) threads.append(t)