修改获取网页内容代码
This commit is contained in:
parent
0792027bea
commit
42fc2e661f
@ -29,7 +29,7 @@ class ArticleReplaceApp(tk.Tk):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
self.title("文章采集与处理工具")
|
||||
self.title("文章工作流调用工具(软件仅供交流使用)")
|
||||
self.geometry("900x600")
|
||||
|
||||
# 创建标签页控件
|
||||
@ -44,6 +44,11 @@ class ArticleReplaceApp(tk.Tk):
|
||||
self.config_frame = ttk.Frame(self.notebook)
|
||||
self.notebook.add(self.config_frame, text="配置")
|
||||
|
||||
# 创建免责声明页面
|
||||
self.disclaimer_frame = ttk.Frame(self.notebook)
|
||||
self.notebook.add(self.disclaimer_frame, text="免责声明")
|
||||
|
||||
|
||||
# 初始化变量
|
||||
self.running = False
|
||||
self.thread = None
|
||||
@ -67,6 +72,8 @@ class ArticleReplaceApp(tk.Tk):
|
||||
self.init_main_frame()
|
||||
# 初始化配置页面
|
||||
self.init_config_frame()
|
||||
# 初始化免责声明页面
|
||||
self.init_disclaimer_frame()
|
||||
|
||||
# 设置关闭窗口事件
|
||||
self.protocol("WM_DELETE_WINDOW", self.on_close)
|
||||
@ -407,6 +414,54 @@ class ArticleReplaceApp(tk.Tk):
|
||||
parent.columnconfigure(0, weight=1)
|
||||
parent.rowconfigure(1, weight=1)
|
||||
|
||||
def init_disclaimer_frame(self):
|
||||
# 创建免责声明内容框架
|
||||
disclaimer_content = ttk.Frame(self.disclaimer_frame)
|
||||
disclaimer_content.pack(fill=tk.BOTH, expand=True, padx=20, pady=20)
|
||||
|
||||
# 标题
|
||||
title_label = ttk.Label(disclaimer_content, text="免责声明", font=("Arial", 16, "bold"))
|
||||
title_label.pack(pady=10)
|
||||
|
||||
# 免责声明文本
|
||||
disclaimer_text = ScrolledText(disclaimer_content, width=80, height=20, wrap=tk.WORD)
|
||||
disclaimer_text.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
|
||||
disclaimer_text.insert(tk.END, """
|
||||
软件使用免责声明
|
||||
|
||||
1. 合法使用声明
|
||||
本软件仅供合法、正当用途使用。用户应当遵守中华人民共和国相关法律法规,不得将本软件用于任何违法犯罪活动。
|
||||
|
||||
2. 内容责任声明
|
||||
用户通过本软件生成、处理或发布的所有内容,其版权归属、合法性及内容真实性由用户自行负责。本软件开发者不对用户使用本软件处理的内容承担任何法律责任。
|
||||
|
||||
3. 使用风险声明
|
||||
用户应自行承担使用本软件的风险。本软件按"现状"提供,不提供任何明示或暗示的保证,包括但不限于适销性、特定用途适用性和非侵权性的保证。
|
||||
|
||||
4. 禁止用途
|
||||
严禁将本软件用于以下活动:
|
||||
- 违反国家法律法规的活动
|
||||
- 侵犯他人知识产权或其他合法权益的活动
|
||||
- 传播虚假、欺诈或误导性信息的活动
|
||||
- 从事任何可能危害国家安全、社会稳定的活动
|
||||
- 其他违背社会公德、商业道德的活动
|
||||
|
||||
5. 责任限制
|
||||
在法律允许的最大范围内,对于因使用或无法使用本软件而导致的任何直接、间接、偶然、特殊、惩罚性或后果性损害,本软件开发者不承担任何责任。
|
||||
|
||||
6. 协议更新
|
||||
本免责声明可能会不定期更新,更新后的内容将在软件中公布,不再另行通知。用户继续使用本软件即表示接受修改后的免责声明。
|
||||
|
||||
7. 最终解释
|
||||
本免责声明的最终解释权归本软件开发者所有。
|
||||
""")
|
||||
disclaimer_text.config(state=tk.DISABLED) # 设置为只读
|
||||
|
||||
# 确认按钮
|
||||
confirm_frame = ttk.Frame(disclaimer_content)
|
||||
confirm_frame.pack(pady=10)
|
||||
ttk.Button(confirm_frame, text="我已阅读并同意以上声明", command=lambda: self.notebook.select(0)).pack()
|
||||
|
||||
def save_banned_words(self):
|
||||
# 处理文本,将换行符替换为逗号
|
||||
words = self.banned_words_text.get(1.0, tk.END).strip().replace('\n', ',')
|
||||
@ -694,7 +749,7 @@ class ArticleReplaceApp(tk.Tk):
|
||||
# 创建模板选择对话框
|
||||
dialog = tk.Toplevel(self)
|
||||
dialog.title("选择模板")
|
||||
dialog.geometry("400x300")
|
||||
dialog.geometry("400x400")
|
||||
dialog.transient(self) # 设置为应用程序的子窗口
|
||||
dialog.grab_set() # 模态对话框
|
||||
dialog.resizable(False, False)
|
||||
@ -1152,7 +1207,7 @@ class ArticleReplaceApp(tk.Tk):
|
||||
logger.info(f"开始处理链接,使用 {num_threads} 个线程,生成类型: {generation_type}")
|
||||
if current_template:
|
||||
logger.info(f"使用模板: {current_template.get('name', '未命名')}")
|
||||
results = link_to_text(num_threads=num_threads, ai_service=ai_service, current_template=current_template)
|
||||
results = link_to_text(num_threads=num_threads, ai_service=ai_service, current_template=current_template, generation_type=generation_type)
|
||||
|
||||
# 计算处理结果
|
||||
total_links = len(results)
|
||||
@ -1172,7 +1227,8 @@ class ArticleReplaceApp(tk.Tk):
|
||||
f"共处理 {total_links} 个链接\n成功: {success_links} 个\n失败: {total_links - success_links} 个\n总耗时: {elapsed_time:.2f} 秒"))
|
||||
except Exception as e:
|
||||
logger.error(f"处理任务出错: {e}")
|
||||
self.after(0, lambda e=e: messagebox.showerror("处理错误", f"处理任务出错: {e}"))
|
||||
self.after(0, lambda: messagebox.showerror("处理错误", f"处理任务出错: {e}"))
|
||||
# self.after(0, lambda e=e: messagebox.showerror("处理错误", f"处理任务出错: {e}"))
|
||||
finally:
|
||||
# 恢复原始配置(如果有的话)
|
||||
if original_config is not None:
|
||||
|
||||
@ -124,3 +124,51 @@ def call_coze_article_workflow(parameters):
|
||||
"error": f"请求失败,状态码:{response.status_code}",
|
||||
"detail": response.text
|
||||
}
|
||||
|
||||
|
||||
def call_coze_chang_article_workflow(parameters):
|
||||
"""
|
||||
调用 Coze 工作流的函数
|
||||
|
||||
:param parameters: 传递给工作流的输入参数(字典格式)
|
||||
:param is_async: 是否异步执行(默认 False)
|
||||
:return: 工作流的执行结果
|
||||
"""
|
||||
|
||||
workflow_id = CONFIG['Coze']['workflow_id']
|
||||
access_token = CONFIG['Coze']['access_token']
|
||||
is_async = CONFIG['Coze']['is_async'].lower() == 'true'
|
||||
url = "https://api.coze.cn/v1/workflow/run"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
data = {
|
||||
"workflow_id": workflow_id,
|
||||
"parameters": parameters,
|
||||
"is_async": is_async
|
||||
}
|
||||
|
||||
response = requests.post(url, json=data, headers=headers)
|
||||
|
||||
if response.status_code == 200:
|
||||
# data = json.loads(response.text)['data']
|
||||
# print("data:",data['output'])
|
||||
import ast
|
||||
|
||||
# 直接解析整个result字符串
|
||||
result_dict = ast.literal_eval(response.text)
|
||||
|
||||
# 解析data字段
|
||||
data_dict = ast.literal_eval(result_dict['data'])
|
||||
|
||||
# 获取output的值
|
||||
title = data_dict['title']
|
||||
article = data_dict['article']
|
||||
|
||||
return title,article
|
||||
else:
|
||||
return {
|
||||
"error": f"请求失败,状态码:{response.status_code}",
|
||||
"detail": response.text
|
||||
}
|
||||
|
||||
@ -0,0 +1,12 @@
|
||||
|
||||
|
||||
你绝对想不到!江苏王女士最近收到电费单时惊了,夏天每月电费突然涨到800元。她翻出家里所有电器,连路由器都拔了,结果第二个月电费反而涨到900块!
|
||||
|
||||
据《现代快报》报道,供电局工作人员上门检查后才发现,罪魁祸首是待机状态的空调。王女士家3台空调插头都没拔,每月能白白耗掉200多度电。这事让不少网友直呼"活久见",有人留言:"我家电视常年插着电源,难怪电费总降不下来!"
|
||||
|
||||
其实国家电网早做过测试,普通家电待机功率在13瓦之间。按每天待机20小时算,光机顶盒一年就能吃掉30度电。更扎心的是,很多家庭至少有5台电器长期插着电,一年下来相当于白交三百块!
|
||||
|
||||
我特意翻出家里老电表,发现拔掉所有插头后,电表真的转得慢了。现在我家冰箱外的电器用完就拔,这个月省了五十多电费。你家电表跑得快吗?赶紧试试拔插头吧!
|
||||
|
||||
生活窍门 家庭用电 省电妙招 居家过日子
|
||||
你家最近电费有变化吗?评论区聊聊你的省电妙招吧!
|
||||
@ -0,0 +1,11 @@
|
||||
|
||||
|
||||
上海垃圾分类新规实施半个月,罚款总额突破200万!据东方网报道,光是黄浦区就开出了2.3万张罚单,平均每分钟都有居民被处罚。我家楼下王阿姨前天刚被罚了50块,就因为在垃圾站门口多站了半分钟。
|
||||
|
||||
可你绝对想不到,全市60%的罚款都集中在3个高档小区。这些小区明明配置了智能分类设备,还有专人指导,结果反而成了"重灾区"。隔壁张叔气得直拍大腿:"我天天在家分拣半小时,最后还因为垃圾袋颜色不对被罚!"
|
||||
|
||||
据环保局数据显示,新规实施后厨余垃圾分拣正确率反而下降了5%。这事真不能全怪老百姓,有些小区督导员自己都搞不清分类标准。我亲眼见过督导员把干电池扔进有害垃圾箱,那可是要扣分的啊!
|
||||
|
||||
不过话说回来,垃圾分类确实是利国利民的好事。关键是不能"一刀切",得给大伙儿适应时间。听说杭州试点"三次提醒再罚款"的模式,效果反而更好。这事您怎么看?您家小区垃圾分类顺利吗?
|
||||
|
||||
垃圾分类新规 罚款争议 上海热点 社区管理 民生政策
|
||||
@ -2,7 +2,7 @@ import threading
|
||||
import queue
|
||||
import json # 导入 json 模块
|
||||
|
||||
from ai_studio import call_dify_workflow, call_coze_workflow,call_coze_article_workflow
|
||||
from ai_studio import call_dify_workflow, call_coze_workflow,call_coze_article_workflow,call_coze_chang_article_workflow
|
||||
from databases import *
|
||||
|
||||
from images_edit import download_and_process_images
|
||||
@ -12,7 +12,7 @@ from config import *
|
||||
|
||||
|
||||
# ==============================主程序===========================
|
||||
def process_link(link_info, ai_service, current_template=None):
|
||||
def process_link(link_info, ai_service, current_template=None,generation_type=None):
|
||||
link, article_type = link_info # 解包链接和类型信息
|
||||
"""
|
||||
处理单个链接
|
||||
@ -48,8 +48,6 @@ def process_link(link_info, ai_service, current_template=None):
|
||||
|
||||
title = extract_content_until_punctuation(article_text).replace("正文:", "")
|
||||
|
||||
print(img_urls)
|
||||
print(article_text)
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
@ -83,7 +81,7 @@ def process_link(link_info, ai_service, current_template=None):
|
||||
message_content = call_dify_workflow(input_data)
|
||||
elif ai_service == "coze":
|
||||
logger.info("coze正在处理")
|
||||
|
||||
logger.info(f"正在处理的文章类型为:{generation_type}")
|
||||
# 如果有模板配置,临时更新CONFIG
|
||||
original_config = None
|
||||
if current_template:
|
||||
@ -120,7 +118,13 @@ def process_link(link_info, ai_service, current_template=None):
|
||||
}
|
||||
|
||||
try:
|
||||
message_content = call_coze_article_workflow(input_data)
|
||||
title = ""
|
||||
if generation_type == "短篇":
|
||||
message_content = call_coze_article_workflow(input_data)
|
||||
elif generation_type == "文章":
|
||||
title, message_content = call_coze_chang_article_workflow(input_data)
|
||||
|
||||
|
||||
finally:
|
||||
# 恢复原始配置(如果有的话)
|
||||
if original_config is not None:
|
||||
@ -134,9 +138,11 @@ def process_link(link_info, ai_service, current_template=None):
|
||||
|
||||
# 打印当前时间
|
||||
print("当前时间:", current_time)
|
||||
|
||||
|
||||
file_name = handle_duplicate_files_advanced(ARTICLES_BASE_PATH,title_text)[0]
|
||||
file_name = ""
|
||||
if generation_type == '短篇':
|
||||
file_name = handle_duplicate_files_advanced(ARTICLES_BASE_PATH,title_text)[0]
|
||||
elif generation_type == "文章":
|
||||
file_name = handle_duplicate_files_advanced(ARTICLES_BASE_PATH,title)[0]
|
||||
|
||||
|
||||
# 创建类型目录
|
||||
@ -146,22 +152,11 @@ def process_link(link_info, ai_service, current_template=None):
|
||||
# 在类型目录下保存文章
|
||||
article_save_path = os.path.join(type_dir, f"{file_name}.txt")
|
||||
|
||||
if '*' in message_content or '#' in message_content or "-" in message_content:
|
||||
# 使用正则表达式一次性替换多个字符
|
||||
old_content = re.sub(r'[*#-]', '', message_content)
|
||||
else:
|
||||
# 如果不需要替换,直接使用原内容
|
||||
old_content = message_content
|
||||
|
||||
print("改写完成的文章:" + old_content)
|
||||
|
||||
# 删除AI词汇
|
||||
content = old_content
|
||||
|
||||
|
||||
|
||||
# 判断文章合规度
|
||||
if text_detection(content) == "合规":
|
||||
if text_detection(message_content) == "合规":
|
||||
print("文章合规")
|
||||
pass
|
||||
else:
|
||||
@ -169,7 +164,7 @@ def process_link(link_info, ai_service, current_template=None):
|
||||
return
|
||||
|
||||
with open(article_save_path, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
f.write(message_content)
|
||||
logging.info('文本已经保存')
|
||||
|
||||
if img_urls:
|
||||
@ -183,7 +178,7 @@ def process_link(link_info, ai_service, current_template=None):
|
||||
raise
|
||||
|
||||
|
||||
def link_to_text(num_threads=None, ai_service="dify", current_template=None):
|
||||
def link_to_text(num_threads=None, ai_service="dify", current_template=None, generation_type=None):
|
||||
use_link_path = 'use_link_path.txt'
|
||||
|
||||
# 读取链接
|
||||
@ -198,7 +193,8 @@ def link_to_text(num_threads=None, ai_service="dify", current_template=None):
|
||||
|
||||
for link_info in links:
|
||||
link = link_info[0].strip() # 获取链接并去除空白字符
|
||||
article_type = link_info[1].strip() # 获取类型并去除空白字符
|
||||
# 如果Excel中有类型,使用Excel中的类型,否则使用传入的generation_type
|
||||
article_type = link_info[1].strip() if len(link_info) > 1 and link_info[1].strip() else generation_type
|
||||
logging.info(f"总共{len(links)}个链接")
|
||||
# if check_link_exists(host, user, password, database, link):
|
||||
# logger.info(f"链接已存在: {link}")
|
||||
@ -213,7 +209,7 @@ def link_to_text(num_threads=None, ai_service="dify", current_template=None):
|
||||
return []
|
||||
|
||||
# 使用多线程处理链接
|
||||
results = process_links_with_threads(filtered_links, num_threads, ai_service, current_template)
|
||||
results = process_links_with_threads(filtered_links, num_threads, ai_service, current_template,generation_type)
|
||||
|
||||
# 记录已处理的链接
|
||||
with open(use_link_path, 'a+', encoding='utf-8') as f:
|
||||
@ -230,7 +226,7 @@ result_queue = queue.Queue()
|
||||
|
||||
|
||||
# 工作线程函数
|
||||
def worker(ai_service, current_template=None):
|
||||
def worker(ai_service, current_template=None,generation_type=None):
|
||||
while True:
|
||||
try:
|
||||
# 从队列中获取任务
|
||||
@ -241,7 +237,7 @@ def worker(ai_service, current_template=None):
|
||||
# 处理链接
|
||||
try:
|
||||
logger.info(f"开始处理链接:{link}")
|
||||
process_link(link, ai_service, current_template)
|
||||
process_link(link, ai_service, current_template,generation_type)
|
||||
result_queue.put((link, True, None)) # 成功
|
||||
except Exception as e:
|
||||
result_queue.put((link, False, str(e))) # 失败
|
||||
@ -254,7 +250,7 @@ def worker(ai_service, current_template=None):
|
||||
|
||||
|
||||
# 多线程处理链接
|
||||
def process_links_with_threads(links, num_threads=None, ai_service="dify", current_template=None):
|
||||
def process_links_with_threads(links, num_threads=None, ai_service="dify", current_template=None,generation_type=None):
|
||||
if num_threads is None:
|
||||
num_threads = min(MAX_THREADS, len(links))
|
||||
else:
|
||||
@ -271,7 +267,7 @@ def process_links_with_threads(links, num_threads=None, ai_service="dify", curre
|
||||
|
||||
# 将AI服务选择和模板配置传递给worker函数
|
||||
for _ in range(num_threads):
|
||||
t = threading.Thread(target=worker, args=(ai_service, current_template))
|
||||
t = threading.Thread(target=worker, args=(ai_service, current_template,generation_type))
|
||||
t.daemon = True
|
||||
t.start()
|
||||
threads.append(t)
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 120 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 28 KiB |
@ -1,27 +1,56 @@
|
||||
import json
|
||||
|
||||
import requests
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from get_web_content import wechat_extract_content, toutiao_w_extract_content, toutiao_extract_content,wangyi_extract_content,souhu_extract_content
|
||||
def call_coze_article_workflow(workflow_id,access_token,parameters,is_async=False):
|
||||
"""
|
||||
调用 Coze 工作流的函数
|
||||
|
||||
from utils import handle_duplicate_files_advanced
|
||||
from images_edit import download_and_process_images
|
||||
:param parameters: 传递给工作流的输入参数(字典格式)
|
||||
:param is_async: 是否异步执行(默认 False)
|
||||
:return: 工作流的执行结果
|
||||
"""
|
||||
|
||||
# title,article,imgs = wechat_extract_content("https://mp.weixin.qq.com/s/3KejJOMuY2y6LA5k1tNwcg")
|
||||
# title,article,imgs = toutiao_w_extract_content("https://www.t outiao.com/w/1830082267985932/")
|
||||
# title,article,imgs = toutiao_extract_content("https://www.toutiao.com/article/7496132108239356479/")
|
||||
# title,article,imgs = wangyi_extract_content("https://www.163.com/dy/article/JV4K9D020553VRO2.html")
|
||||
title,article,imgs = souhu_extract_content("https://www.sohu.com/a/893588175_115479?scm=")
|
||||
|
||||
print(title)
|
||||
print(article)
|
||||
print(imgs)
|
||||
print(type(imgs))
|
||||
#
|
||||
# download_and_process_images(imgs,"1")
|
||||
url = "https://api.coze.cn/v1/workflow/run"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
data = {
|
||||
"workflow_id": workflow_id,
|
||||
"parameters": parameters,
|
||||
"is_async": is_async
|
||||
}
|
||||
|
||||
#
|
||||
# name = handle_duplicate_files_advanced(r"F:\work\code\python\ArticleReplaceBatch\articles","exeample.txt")
|
||||
# print(name[0])
|
||||
response = requests.post(url, json=data, headers=headers)
|
||||
|
||||
if response.status_code == 200:
|
||||
# data = json.loads(response.text)['data']
|
||||
# print("data:",data['output'])
|
||||
import ast
|
||||
|
||||
# 直接解析整个result字符串
|
||||
result_dict = ast.literal_eval(response.text)
|
||||
|
||||
# 解析data字段
|
||||
data_dict = ast.literal_eval(result_dict['data'])
|
||||
|
||||
# 获取output的值
|
||||
title = data_dict['title']
|
||||
article = data_dict['article']
|
||||
return title, article
|
||||
else:
|
||||
return {
|
||||
"error": f"请求失败,状态码:{response.status_code}",
|
||||
"detail": response.text
|
||||
}
|
||||
|
||||
|
||||
workflow_id = "7509764025128845366"
|
||||
access_token = "pat_0DczPLquEPhA3mSqokHTPpU9KNHrM3mz5sZKSWxi7ZeWK1Fi5UjPzQihq1DwCQ91"
|
||||
parameters = {
|
||||
"title":"1",
|
||||
"article":"1"
|
||||
}
|
||||
title,article = call_coze_article_workflow(workflow_id,access_token,parameters)
|
||||
print(title,article)
|
||||
Loading…
Reference in New Issue
Block a user