From b9c1a797cbc6fc37c69c1242f32b8caacb7b08e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=AA=E4=B8=80?= <2339117167@qq.com> Date: Sun, 13 Apr 2025 16:34:44 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=8E=B7=E5=8F=96=E7=BD=91?= =?UTF-8?q?=E9=A1=B5=E5=86=85=E5=AE=B9=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ArticleReplaceBatch/ai_studio.py | 41 +++++++++++++++++++++++++++++ ArticleReplaceBatch/main_process.py | 19 +++++++++---- ArticleReplaceBatch/test.py | 5 ++-- 3 files changed, 58 insertions(+), 7 deletions(-) diff --git a/ArticleReplaceBatch/ai_studio.py b/ArticleReplaceBatch/ai_studio.py index bd90ba4..913c285 100644 --- a/ArticleReplaceBatch/ai_studio.py +++ b/ArticleReplaceBatch/ai_studio.py @@ -35,3 +35,44 @@ def call_dify_workflow(input_data): # print("article:", article) return article + + +# ==========================调用coze工作流========================== + + +def call_coze_workflow(workflow_id, access_token, parameters,is_async=False): + """ + 调用 Coze 工作流的函数 + + :param workflow_id: Coze 工作流 ID + :param access_token: 个人访问令牌(Access Token) + :param parameters: 传递给工作流的输入参数(字典格式) + :param app_id: 应用 ID(可选) + :param is_async: 是否异步执行(默认 False) + :return: 工作流的执行结果 + """ + url = "https://api.coze.cn/v1/workflow/run" + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json" + } + data = { + "workflow_id": workflow_id, + "parameters": parameters, + "is_async": is_async + } + + + response = requests.post(url, json=data, headers=headers) + + if response.status_code == 200: + # data = json.loads(response.text)['data'] + # print("data:",data['output']) + + return response.text + else: + return { + "error": f"请求失败,状态码:{response.status_code}", + "detail": response.text + } + diff --git a/ArticleReplaceBatch/main_process.py b/ArticleReplaceBatch/main_process.py index bf34cd2..2874ab1 100644 --- a/ArticleReplaceBatch/main_process.py +++ b/ArticleReplaceBatch/main_process.py @@ -20,15 +20,17 @@ def process_link(link): 处理单个链接 """ try: - title_text, article_text, img_urls = "","",[] - if str(link).startswith("https://www.toutiao.com/w"): + if link.startswith("https://www.toutiao.com"): title_text, article_text, img_urls = toutiao_w_extract_content(link) - elif str(link).startswith("https://www.toutiao.com/article/"): - title_text, article_text, img_urls = toutiao_extract_content(link) + if title_text == "": + title_text, article_text, img_urls = toutiao_extract_content(link) + elif link.startswith("https://mp.weixin.qq.co"): + title_text, article_text, img_urls = wechat_extract_content(link) else: title_text, article_text, img_urls = "", "", [] - + if title_text == "": + return # 获取数据库配置 host = CONFIG['Database']['host'] @@ -162,6 +164,7 @@ def worker(): # 处理链接 try: + logger.info(f"开始处理链接:{link}") process_link(link) result_queue.put((link, True, None)) # 成功 except Exception as e: @@ -180,6 +183,12 @@ def process_links_with_threads(links, num_threads=None): num_threads = min(MAX_THREADS, len(links)) else: num_threads = min(num_threads, MAX_THREADS, len(links)) + + # 清空任务队列和结果队列 + while not task_queue.empty(): + task_queue.get() + while not result_queue.empty(): + result_queue.get() # 创建工作线程 threads = [] diff --git a/ArticleReplaceBatch/test.py b/ArticleReplaceBatch/test.py index 61a9fe8..9ab9351 100644 --- a/ArticleReplaceBatch/test.py +++ b/ArticleReplaceBatch/test.py @@ -1,7 +1,8 @@ -from get_web_content import toutiao_extract_content +from get_web_content import wechat_extract_content,toutiao_w_extract_content,toutiao_extract_content -title,article,imgs = toutiao_extract_content("https://www.toutiao.com/article/7491890368917602825/?log_from=ab01481cf63ba_1744526333347") +title,article,imgs = wechat_extract_content("https://mp.weixin.qq.com/s/3KejJOMuY2y6LA5k1tNwcg") +# title,article,imgs = toutiao_extract_content("https://www.toutiao.com/article/7491890368917602825/?log_from=ab01481cf63ba_1744526333347") print("title:",title)