修改获取网页内容代码

2025-04-13 16:34:44 +08:00 · 2025-04-13 16:34:44 +08:00 · b9c1a797cb
commit b9c1a797cb
parent bd0c6a6ff0
3 changed files with 58 additions and 7 deletions
--- a/ArticleReplaceBatch/ai_studio.py
+++ b/ArticleReplaceBatch/ai_studio.py
@ -35,3 +35,44 @@ def call_dify_workflow(input_data):
    # print("article:", article)
    return article

+
+
+# ==========================调用coze工作流==========================
+
+
+def call_coze_workflow(workflow_id, access_token, parameters,is_async=False):
+    """
+    调用 Coze 工作流的函数
+
+    :param workflow_id: Coze 工作流 ID
+    :param access_token: 个人访问令牌（Access Token）
+    :param parameters: 传递给工作流的输入参数（字典格式）
+    :param app_id: 应用 ID（可选）
+    :param is_async: 是否异步执行（默认 False）
+    :return: 工作流的执行结果
+    """
+    url = "https://api.coze.cn/v1/workflow/run"
+    headers = {
+        "Authorization": f"Bearer {access_token}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "workflow_id": workflow_id,
+        "parameters": parameters,
+        "is_async": is_async
+    }
+
+
+    response = requests.post(url, json=data, headers=headers)
+
+    if response.status_code == 200:
+        # data = json.loads(response.text)['data']
+        # print("data：",data['output'])
+
+        return response.text
+    else:
+        return {
+            "error": f"请求失败，状态码：{response.status_code}",
+            "detail": response.text
+        }
+
--- a/ArticleReplaceBatch/main_process.py
+++ b/ArticleReplaceBatch/main_process.py
@ -20,15 +20,17 @@ def process_link(link):
    处理单个链接
    """
    try:
-        title_text, article_text, img_urls = "","",[]
-        if str(link).startswith("https://www.toutiao.com/w"):
+        if link.startswith("https://www.toutiao.com"):
            title_text, article_text, img_urls = toutiao_w_extract_content(link)
-        elif str(link).startswith("https://www.toutiao.com/article/"):
+            if title_text == "":
                title_text, article_text, img_urls = toutiao_extract_content(link)
+        elif link.startswith("https://mp.weixin.qq.co"):
+            title_text, article_text, img_urls = wechat_extract_content(link)
        else:
            title_text, article_text, img_urls = "", "", []

-
+        if title_text == "":
+            return

        # 获取数据库配置
        host = CONFIG['Database']['host']
@ -162,6 +164,7 @@ def worker():

            # 处理链接
            try:
+                logger.info(f"开始处理链接：{link}")
                process_link(link)
                result_queue.put((link, True, None))  # 成功
            except Exception as e:
@ -181,6 +184,12 @@ def process_links_with_threads(links, num_threads=None):
    else:
        num_threads = min(num_threads, MAX_THREADS, len(links))
    
+    # 清空任务队列和结果队列
+    while not task_queue.empty():
+        task_queue.get()
+    while not result_queue.empty():
+        result_queue.get()
+
    # 创建工作线程
    threads = []
    for _ in range(num_threads):
--- a/ArticleReplaceBatch/test.py
+++ b/ArticleReplaceBatch/test.py
@ -1,7 +1,8 @@
-from get_web_content import toutiao_extract_content
+from get_web_content import wechat_extract_content,toutiao_w_extract_content,toutiao_extract_content


-title,article,imgs = toutiao_extract_content("https://www.toutiao.com/article/7491890368917602825/?log_from=ab01481cf63ba_1744526333347")
+title,article,imgs = wechat_extract_content("https://mp.weixin.qq.com/s/3KejJOMuY2y6LA5k1tNwcg")
+# title,article,imgs = toutiao_extract_content("https://www.toutiao.com/article/7491890368917602825/?log_from=ab01481cf63ba_1744526333347")

 print("title:",title)