commit bd0c6a6ff077c63940570164abf2e4c549b470e3 Author: 太一 <2339117167@qq.com> Date: Mon Mar 31 10:28:37 2025 +0800 Changes diff --git a/ArticleReplaceBatch/ai_studio.py b/ArticleReplaceBatch/ai_studio.py new file mode 100644 index 0000000..bd90ba4 --- /dev/null +++ b/ArticleReplaceBatch/ai_studio.py @@ -0,0 +1,37 @@ +import json + +import requests + +from config import * + + +# ==========================调用dify工作流=============================================== +def call_dify_workflow(input_data): + """ + 调用Dify工作流的函数。 + + :param input_data: 传递给工作流的输入数据 + :return: 工作流的输出结果 + """ + api_key = CONFIG['Dify']['api_key'] + user_id = CONFIG['Dify']['user_id'] + url = CONFIG['Dify']['url'] + + headers = { + 'Authorization': f'Bearer {api_key}', + 'Content-Type': 'application/json', + } + data = { + "inputs": input_data, + "response_mode": "blocking", + "user": user_id + } + response = requests.post(url, headers=headers, data=json.dumps(data)) + json_data = json.loads(response.text) + print("json_data:", json_data) + + # 获取article的值 + article = json_data['data']['outputs']['article'] + # print("article:", article) + return article + diff --git a/ArticleReplaceBatch/config.py b/ArticleReplaceBatch/config.py new file mode 100644 index 0000000..663fe05 --- /dev/null +++ b/ArticleReplaceBatch/config.py @@ -0,0 +1,113 @@ +import configparser +import getpass +import logging +import os + +# 配置文件路径 +CONFIG_FILE = "config.ini" + +# 默认配置 +DEFAULT_CONFIG = { + "General": { + "chrome_user_dir": f"C:\\Users\\{getpass.getuser()}\\AppData\\Local\\Google\\Chrome\\User Data", + "articles_path": "articles", + "images_path": "picture", + "title_file": "文章链接.xlsx", + "max_threads": "3" + }, + "Database": { + "host": "27.106.125.150", + "user": "root", + "password": "taiyi.1224", + "database": "toutiao" + }, + "Dify": { + "api_key": "app-87gssUKFBs9BwJw4m95uUcyF", + "user_id": "toutiao", + "url": "http://27.106.125.150/v1/workflows/run" + }, + "Baidu": { + "api_key": "6GvuZoSEe4L8I7O3p7tZRKhj", + "secret_key": "jDujU3MyzP34cUuTP0GNtPejlQpUFWvl" + }, + "ImageModify": { + "crop_percent": "0.02", + "min_rotation": "0.3", + "max_rotation": "3.0", + "min_brightness": "0.8", + "max_brightness": "1.2", + "watermark_text": "Qin Quan Shan Chu", + "watermark_opacity": "128", + "overlay_opacity": "30" + }, + "Keywords": { + "banned_words": "珠海,落马,股票,股市,股民,爆炸,火灾,死亡,抢劫,诈骗,习大大,习近平,政府,官员,扫黑,警察,落网,嫌疑人,通报,暴力执法,执法,暴力,气象,天气,暴雨,大雨" + } +} + + +# 加载配置 +def load_config(): + config = configparser.ConfigParser() + + # 如果配置文件不存在,创建默认配置 + if not os.path.exists(CONFIG_FILE): + for section, options in DEFAULT_CONFIG.items(): + config[section] = options + + with open(CONFIG_FILE, 'w', encoding='utf-8') as f: + config.write(f) + else: + config.read(CONFIG_FILE, encoding='utf-8') + + # 检查并添加缺失的配置项 + for section, options in DEFAULT_CONFIG.items(): + if not config.has_section(section): + config[section] = {} + + for option, value in options.items(): + if not config.has_option(section, option): + config[section][option] = value + + # 保存更新后的配置 + with open(CONFIG_FILE, 'w', encoding='utf-8') as f: + config.write(f) + + return config + + +# 保存配置 +def save_config(config): + with open(CONFIG_FILE, 'w', encoding='utf-8') as f: + config.write(f) + + +# 加载配置 +CONFIG = load_config() + +# 更新全局变量 +USER_DIR_PATH = CONFIG['General']['chrome_user_dir'] +ARTICLES_BASE_PATH = CONFIG['General']['articles_path'] +IMGS_BASE_PATH = CONFIG['General']['images_path'] +TITLE_BASE_PATH = CONFIG['General']['title_file'] +MAX_THREADS = int(CONFIG['General']['max_threads']) + +# 创建必要的目录 +if not os.path.exists(ARTICLES_BASE_PATH): + os.makedirs(ARTICLES_BASE_PATH) + os.chmod(ARTICLES_BASE_PATH, 0o777) +if not os.path.exists(IMGS_BASE_PATH): + os.makedirs(IMGS_BASE_PATH) + os.chmod(IMGS_BASE_PATH, 0o777) + +# 日志配置 +logging.basicConfig(level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler("article_replace.log", encoding='utf-8'), + logging.StreamHandler() + ]) +logger = logging.getLogger(__name__) + +# 日志文件保存路径 +LOG_FILE = "article_replace.log" \ No newline at end of file diff --git a/ArticleReplaceBatch/databases.py b/ArticleReplaceBatch/databases.py new file mode 100644 index 0000000..06dda33 --- /dev/null +++ b/ArticleReplaceBatch/databases.py @@ -0,0 +1,86 @@ +import pymysql + + +# ==============================数据库模块=================================== +def check_link_exists(host, user, password, database, link): + """ + 检查指定的 link 是否存在于 MySQL 数据库表中,如果不存在,则插入该链接 + :param host: MySQL 数据库主机地址 + :param user: MySQL 用户名 + :param password: MySQL 密码 + :param database: 数据库名称 + :param link: 需要检查的链接 + :return: 如果链接存在,返回 True;如果链接不存在且插入成功,返回 False + """ + connection = None # 确保 connection 被初始化 + + try: + # 连接到 MySQL 数据库 + connection = pymysql.connect( + host=host, + user=user, + password=password, + database=database + ) + + with connection.cursor() as cursor: + # 查询链接是否存在 + cursor.execute("SELECT 1 FROM links WHERE link = %s", (link,)) + result = cursor.fetchone() + + # 如果链接存在 + if result: + return True + else: + return False + + except pymysql.MySQLError as e: + print(f"数据库错误: {e}") + return False + finally: + # 确保在结束时关闭连接 + if connection: + connection.close() + + +def check_link_insert(host, user, password, database, link): + """ + 检查指定的 link 是否存在于 MySQL 数据库表中,如果不存在,则插入该链接 + :param host: MySQL 数据库主机地址 + :param user: MySQL 用户名 + :param password: MySQL 密码 + :param database: 数据库名称 + :param link: 需要检查的链接 + :return: 如果链接存在,返回 True;如果链接不存在且插入成功,返回 False + """ + connection = None # 确保 connection 被初始化 + try: + # 连接到 MySQL 数据库 + connection = pymysql.connect( + host=host, + user=user, + password=password, + database=database + ) + + with connection.cursor() as cursor: + # 查询链接是否存在 + cursor.execute("SELECT 1 FROM links WHERE link = %s", (link,)) + result = cursor.fetchone() + if result: + # 如果链接已经存在,返回 True + return True + else: + # 插入链接 + cursor.execute("INSERT INTO links (link) VALUES (%s)", (link,)) + connection.commit() # 提交事务 + print("链接已插入") + return False + except pymysql.MySQLError as e: + print(f"数据库错误: {e}") + return False + finally: + # 确保在结束时关闭连接 + if connection: + connection.close() + diff --git a/ArticleReplaceBatch/images_edit.py b/ArticleReplaceBatch/images_edit.py new file mode 100644 index 0000000..570cdc4 --- /dev/null +++ b/ArticleReplaceBatch/images_edit.py @@ -0,0 +1,153 @@ +import logging +import os +import random + +import requests +from PIL import Image +from PIL import ImageDraw, ImageFont, ImageEnhance + +from config import * +from utils import safe_open_directory + +IMGS_BASE_PATH = CONFIG['General']['images_path'] +def crop_and_replace_images(folder_path): + """ + 修改图片尺寸 + :param folder_path: + :return: + """ + print("开始处理图片。。。。") + # 遍历文件夹中的所有文件 + for filename in os.listdir(folder_path): + # 检查文件扩展名是否为图片格式 + if filename.lower().endswith(('.jpg')): + # 拼接完整的文件路径 + file_path = os.path.join(folder_path, filename) + print("文件夹路径:" + folder_path) + print("文件路径:" + file_path) + # 打开图片 + with Image.open(file_path) as img: + # 获取图片的尺寸 + width, height = img.size + # 裁剪图片,裁剪下方10px + print("裁剪图片。。。") + cropped_img = img.crop((0, 0, width, height - (height * 0.1))) + # 保存裁剪后的图片,覆盖原文件 + # 通过拉伸使改变裁剪后图片的尺寸与原图片尺寸相同 + resized_img = cropped_img.resize((width, height)) + # output_path = file_path[0:file_path.find('.')] + '.png' + + resized_img.save(file_path, 'JPEG') + + +def deduplicate_images(folder_path): + print("开始对图片去重。。。") + """扫描 folder_path 下的图片,对每张图片做修改并直接覆盖原文件""" + if not os.path.exists(folder_path): + print("错误:输入文件夹不存在!") + return + + supported_ext = ('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.webp') + + for root, _, files in os.walk(folder_path): + for file in files: + if file.lower().endswith(supported_ext): + file_path = os.path.join(root, file) + try: + with Image.open(file_path) as img: + modified_img = modify_image(img) + modified_img.save(file_path) # 直接覆盖原图片 + print(f"已处理并覆盖:{file_path}") + except Exception as e: + print(f"处理 {file_path} 时出错:{e}") + + +def download_image(image_url, save_path): + """ + 下载图片并保存 + :param image_url: 图片链接 + :param save_path: 保存路径 + :return: + """ + try: + response = requests.get(image_url) + if response.status_code == 200: + with open(save_path, 'wb') as f: + f.write(response.content) + print(f"图片下载成功,保存路径为:{save_path}") + else: + print(f"图片下载失败,状态码为:{response.status_code}") + except requests.exceptions.RequestException as e: + print(f"请求出错:{e}") + + +def download_and_process_images(img_urls, article_title): + """ + 下载并处理图片 + """ + img_dir_path = os.path.join(IMGS_BASE_PATH, article_title) + safe_open_directory(img_dir_path) + + for i, img_url in enumerate(img_urls): + imgurl = "https:" + img_url + img_path = os.path.join(img_dir_path, f"图片{i}.jpg") + try: + download_image(imgurl, img_path) + crop_and_replace_images(img_dir_path) + deduplicate_images(img_dir_path) + except Exception as e: + logging.error(f"处理图片失败: {e}") + + +def modify_image(img): + print("修改图片") + """对图片应用去重处理,不翻转,仅裁剪、旋转、亮度调整、添加水印、加透明蒙版""" + width, height = img.size + + # 从配置中获取参数 + crop_percent = float(CONFIG['ImageModify']['crop_percent']) + min_rotation = float(CONFIG['ImageModify']['min_rotation']) + max_rotation = float(CONFIG['ImageModify']['max_rotation']) + min_brightness = float(CONFIG['ImageModify']['min_brightness']) + max_brightness = float(CONFIG['ImageModify']['max_brightness']) + watermark_text = CONFIG['ImageModify']['watermark_text'] + watermark_opacity = int(CONFIG['ImageModify']['watermark_opacity']) + overlay_opacity = int(CONFIG['ImageModify']['overlay_opacity']) + + # 1. 裁剪边缘 + crop_px_w = int(width * crop_percent) + crop_px_h = int(height * crop_percent) + img = img.crop((crop_px_w, crop_px_h, width - crop_px_w, height - crop_px_h)) + + # 2. 随机旋转 + angle = random.uniform(min_rotation, max_rotation) * random.choice([-1, 1]) + img = img.rotate(angle, expand=True) + + # 3. 调整亮度 + enhancer = ImageEnhance.Brightness(img) + factor = random.uniform(min_brightness, max_brightness) # 亮度调整因子 + img = enhancer.enhance(factor) + + # 4. 添加文字水印 + draw = ImageDraw.Draw(img) + font_size = max(20, int(min(img.size) * 0.05)) + try: + font = ImageFont.truetype("arial.ttf", font_size) + except: + font = ImageFont.load_default() + + # 获取文本尺寸 + text_width, text_height = draw.textbbox((0, 0), watermark_text, font=font)[2:] + + # 水印放在图片右下角 + x = img.size[0] - text_width - 5 + y = img.size[1] - text_height - 5 + draw.text((x, y), watermark_text, font=font, fill=(255, 255, 255, watermark_opacity)) + + # 5. 添加半透明蒙版 + overlay = Image.new('RGBA', img.size, (255, 255, 255, overlay_opacity)) + if img.mode != 'RGBA': + img = img.convert('RGBA') + img = Image.alpha_composite(img, overlay) + + return img.convert('RGB') diff --git a/ArticleReplaceBatch/main_process.py b/ArticleReplaceBatch/main_process.py new file mode 100644 index 0000000..bf34cd2 --- /dev/null +++ b/ArticleReplaceBatch/main_process.py @@ -0,0 +1,209 @@ +import pandas as pd +import getpass +import sys # 导入sys模块 +import threading +import queue + + +from ai_studio import call_dify_workflow +from databases import * + + +from images_edit import download_and_process_images +from utils import * +from get_web_content import * +from config import * + +# ==============================主程序=========================== +def process_link(link): + """ + 处理单个链接 + """ + try: + title_text, article_text, img_urls = "","",[] + if str(link).startswith("https://www.toutiao.com/w"): + title_text, article_text, img_urls = toutiao_w_extract_content(link) + elif str(link).startswith("https://www.toutiao.com/article/"): + title_text, article_text, img_urls = toutiao_extract_content(link) + else: + title_text, article_text, img_urls = "", "", [] + + + + # 获取数据库配置 + host = CONFIG['Database']['host'] + user = CONFIG['Database']['user'] + password = CONFIG['Database']['password'] + database = CONFIG['Database']['database'] + + # 判断文章内容是否有违禁词 + check_keywords = check_keywords_in_text(title_text) + + if check_keywords: + print("文章中有违禁词!") + check_link_insert(host, user, password, database, link) + return + + title = extract_content_until_punctuation(article_text).replace("正文:", "") + + print(title) + print(article_text) + + from datetime import datetime + + # 获取当前时间并格式化 + current_time = datetime.now().strftime("%H:%M:%S") + + # 打印当前时间 + print("当前时间:", current_time) + + input_data = { + "old_article": article_text + } + + message_content = call_dify_workflow(input_data) + # 获取当前时间并格式化 + current_time = datetime.now().strftime("%H:%M:%S") + + # 打印当前时间 + print("当前时间:", current_time) + + finally_article = message_content.replace("正文:", "") + "\n" + + article_save_path = os.path.join(ARTICLES_BASE_PATH, f"{title}.txt") + + if '*' in finally_article or '#' in finally_article or "-" in finally_article: + # 使用正则表达式一次性替换多个字符 + old_content = re.sub(r'[*#-]', '', message_content) + else: + # 如果不需要替换,直接使用原内容 + old_content = finally_article + + print("改写完成的文章:" + old_content) + + # 删除AI词汇 + content = old_content + + check_link_insert(host, user, password, database, link) + + # 判断文章合规度 + if text_detection(content) == "合规": + print("文章合规") + pass + else: + print("文章不合规") + return + + with open(article_save_path, 'w', encoding='utf-8') as f: + f.write(content) + logging.info('文本已经保存') + + if img_urls: + download_and_process_images(img_urls, title) + + except Exception as e: + logging.error(f"处理链接 {link} 时出错: {e}") + raise + + + +def link_to_text(prompt1=None, prompt2=None, num_threads=None): + use_link_path = 'use_link_path.txt' + + # 读取链接 + links = read_excel(TITLE_BASE_PATH) + + # 过滤已处理的链接 + filtered_links = [] + host = CONFIG['Database']['host'] + user = CONFIG['Database']['user'] + password = CONFIG['Database']['password'] + database = CONFIG['Database']['database'] + + for link in links: + logging.info(f"总共{len(links)}个链接") + if check_link_exists(host, user, password, database, link): + logger.info(f"链接已存在: {link}") + continue + else: + filtered_links.append(link) + logger.info(f"链接不存在: {link}") + print("链接不存在,存储到过滤器中:",link) + + if not filtered_links: + logger.info("没有新链接需要处理") + return [] + + # 使用多线程处理链接 + results = process_links_with_threads(filtered_links, num_threads) + + # 记录已处理的链接 + with open(use_link_path, 'a+', encoding='utf-8') as f: + for link, success, _ in results: + if success: + f.write(link + "\n") + + return results + + +# 创建一个任务队列和结果队列 +task_queue = queue.Queue() +result_queue = queue.Queue() + + +# 工作线程函数 +def worker(): + while True: + try: + # 从队列中获取任务 + link = task_queue.get() + if link is None: # 结束信号 + break + + # 处理链接 + try: + process_link(link) + result_queue.put((link, True, None)) # 成功 + except Exception as e: + result_queue.put((link, False, str(e))) # 失败 + logger.error(f"处理链接 {link} 时出错: {e}") + + # 标记任务完成 + task_queue.task_done() + except Exception as e: + logger.error(f"工作线程出错: {e}") + + +# 多线程处理链接 +def process_links_with_threads(links, num_threads=None): + if num_threads is None: + num_threads = min(MAX_THREADS, len(links)) + else: + num_threads = min(num_threads, MAX_THREADS, len(links)) + + # 创建工作线程 + threads = [] + for _ in range(num_threads): + t = threading.Thread(target=worker) + t.daemon = True + t.start() + threads.append(t) + + # 添加任务到队列 + for link in links: + task_queue.put(link) + + # 添加结束信号 + for _ in range(num_threads): + task_queue.put(None) + + # 等待所有线程完成 + for t in threads: + t.join() + + # 处理结果 + results = [] + while not result_queue.empty(): + results.append(result_queue.get()) + + return results diff --git a/ArticleReplaceBatch/test.py b/ArticleReplaceBatch/test.py new file mode 100644 index 0000000..61a9fe8 --- /dev/null +++ b/ArticleReplaceBatch/test.py @@ -0,0 +1,10 @@ +from get_web_content import toutiao_extract_content + + +title,article,imgs = toutiao_extract_content("https://www.toutiao.com/article/7491890368917602825/?log_from=ab01481cf63ba_1744526333347") + +print("title:",title) + +print("article",article) + +print("imgs",imgs) \ No newline at end of file diff --git a/ArticleReplaceBatch/utils.py b/ArticleReplaceBatch/utils.py new file mode 100644 index 0000000..aab4860 --- /dev/null +++ b/ArticleReplaceBatch/utils.py @@ -0,0 +1,101 @@ +import json + +import re + +import pandas as pd +import requests +from config import * + + +def text_detection(text): + """ + 百度检验文字是否违规 + :param text: + :return: + """ + url = "https://aip.baidubce.com/rest/2.0/solution/v1/text_censor/v2/user_defined?access_token=" + get_baidu_access_token() + payload = 'text=' + text + headers = { + 'Content-Type': 'application/x-www-form-urlencoded', + 'Accept': 'application/json' + } + + response = requests.request("POST", url, headers=headers, data=payload) + content = str(response.text) + data = json.loads(content) + print(data) + conclusion = data['conclusion'] + return conclusion + + +def get_baidu_access_token(): + """ + 使用 AK,SK 生成鉴权签名(Access Token),百度信息获取 + :return: access_token,或是None(如果错误) + """ + API_KEY = CONFIG['Baidu']['api_key'] + SECRET_KEY = CONFIG['Baidu']['secret_key'] + + url = "https://aip.baidubce.com/oauth/2.0/token" + params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY} + return str(requests.post(url, params=params).json().get("access_token")) + + +def safe_open_directory(directory_path): + """ + 安全创建目录 + """ + try: + if not os.path.exists(directory_path): + os.makedirs(directory_path) + os.chmod(directory_path, 0o777) + except Exception as e: + # 打印日志并保存到日志文件中 + logging.error(f"创建目录失败: {e}") + raise + + + +def check_keywords_in_text(text): + """ + 检查文本中是否包含违禁词 + :param text: + :return: + """ + keywords = CONFIG['Keywords']['banned_words'].split(',') + for keyword in keywords: + if keyword.strip() in text: + return True + return False + + +def extract_content_until_punctuation(text, punctuations=r'[,。!?;]'): + """ + 截取一段话中从开始到最近的标点符号的内容。 + + :param text: 输入的文本 + :param punctuations: 标点符号的正则表达式模式,默认为",","。","!","?",";" + :return: 截取的内容 + """ + # 使用正则表达式查找标点符号的位置 + match = re.search(punctuations, text) + + if match: + # 如果找到标点符号,截取从开始到标点符号之前的部分 + return text[:match.end()].strip() + else: + # 如果没有找到标点符号,返回整个文本 + return text.strip() + + + +# 读取Excel表格某一列的内容并将内容以列表的形式返回 +def read_excel(file_name): + datas = pd.read_excel(file_name) + first_column_name = datas.columns[0] + first_colunm_data = datas[first_column_name].tolist() + print(first_colunm_data) + + return first_colunm_data + + diff --git a/GzhArticle/config.py b/GzhArticle/config.py new file mode 100644 index 0000000..018a561 --- /dev/null +++ b/GzhArticle/config.py @@ -0,0 +1,149 @@ +import logging +import os +import sys +import getpass + +ARTICLES_BASE_PATH = r"articles" + + +PROMPT_TITLE = """ +请根据原标题:{title} +写一个新的标题,方法叫“1秒留人开头模板”: 结合文章主题,选取开头模版的1或2个,把中间的的*号换成主题的内容,形成一句话的开头 +以下都是开头模版,分为6类目,模板以“/”为分隔: +疑问体:你知道**吗?/你见过**吗?/**值不值?/为什么***?/***怎么办?/**好不好?/***如何***? +恐吓体:千万***!/绝对不能***!/一定要知道***!/***快没了!/赶快***扔了吧!/***到底有多难?/再不***就晚了! +数字体:1分钟学会***/3秒钟看懂***/一招解决***/牢记N点***/N种技巧***/***学会能省100万/收藏这N招*** +揭秘体:99%的人不知道***/这篇文章可能会被删***/这篇文章可能会得罪同行***/再也忍不住***/注意***/最新消息!*** +故事体:我有一个朋友***/很多人问我***/是怎么做到的/你愿意***吗?/挑战***100天/10年来我最想***的事/我被***收藏了 +震惊体:不得了***/太奇葩***/佩服!***/万万没想到***/天啊,居然***/炸裂!*** +参考示例: +主题是“一个很变态但能轻松过四六级”选用疑问体的“你知道**吗?”模板,开头:你知道一个很变态但能轻松过四六级吗? +主题是“8件事可以增加收入的副业”选用揭秘体的“99%的人不知道***”模板,开头:99%的人不知道,这8件事增加收入! +主题是“人工智能带来的100种新职业”选用震惊体“不得了***”模板,开头:不得了!人工智能带来的100种新职业 +主题是“GPT学会写诗”选用恐吓体“一定要知道***”模板,开头:一定要知道,GPT好像真学会写诗了 +主题是“微头条为什么改了之后就能成为爆文”选用数字体“一招解决***”模板,开头:一招解决微头条成为爆文 +主题是“他发来一条信息说:你看看这张照片,看你还敢不还钱?”选用故事体“很多人问我”模板,开头:很多人问我,看了这张照片,你还敢不还钱? +请输出一个最能引起共鸣的标题。 +## 输出格式: +标题:{标题} + + +""" + +PROMPT_ONE = """ + +请扮演一名资深的美食分享家与烹饪达人。 + +背景: +7年美食行业从业经验,深耕餐饮评测与烹饪教学领域。 +中国烹饪学院专业背景,擅长解析菜品制作工艺。 +曾任多家知名美食平台主编,对餐饮趋势把握精准。 +有多年运营美食公众号经验,创作过多篇爆款美食测评与菜谱。 + +性格特点: +专业严谨但不失幽默,平易近人。 +直言不讳,敢说真话。 +关注食客体验,站在消费者角度思考。 +热爱美食文化,乐于分享烹饪技巧。 + +专用词汇: +"一秒回购"(形容美食非常好吃) +"必点清单"(推荐必点菜品) +"踩雷"(不建议尝试) +"绝绝子":味道非常出众 +"惊喜":口感或价格超出预期 +"上头":食客对这道菜的迷恋程度 +"回魂":重新找回记忆中的味道 +"长肉预警":美味到容易发胖 +"抢手货":常常售罄的人气菜品 +"配方秘籍":独家配方或烹饪技巧 +"灵魂调料":决定菜品味道的关键配料 +"深藏不露":看似普通但味道惊艳的菜品 + +美食相关网络用语(根据需求适当选择): +"米其林级别":形容菜品品质很高 +"网红打卡地":人气很旺的餐厅 +"家常菜王者":做得特别好的家常菜 +"隐藏菜单":餐厅不公开的特色菜品 +"老字号":历史悠久的传统餐馆 +"深夜食堂":夜宵好去处 +"元老级别":店内最经典的菜品 +"宝藏小店":性价比高的小众餐厅 +"黑暗料理":外观或搭配特别另类的菜品 +"秒杀":价格特别实惠 +"心头好":最爱的一道菜 +"私房菜":有特色的家常菜品 + +写作原则: +仔细研究菜品的原材料、烹饪工艺和特色。 +严格基于实际体验进行评价。 +不编造、不臆测任何信息。 +以食客视角出发,关注性价比。 +保持内容原创性,禁止抄袭。 +在进行横向对比时,只对比亲身体验过的菜品。 + +语言风格: +通俗易懂地解释烹饪术语。 +适度使用网络流行语。 +善用美食形容词,让描述更生动。 +保持专业性的同时注重趣味性。 +适当加入烹饪知识,提升文章深度。 +使用生动的味觉描写。 + +文章结构: +开门见山介绍菜品或店铺最吸引人的特点。 +全方位解析(外观、香气、口感、价格等)。 +分析特色配料或独特烹饪工艺。 +提供实用的点菜建议。 +分享个人体验和建议。 +对比类似菜品。 +结尾总结并抛出互动问题。 + +专业术语(按需选用并解释): +烹饪工艺:焖、炒、煎、炸、煮等 +食材特点:口感、新鲜度、产地等 +调味技巧:调味料使用、火候掌握等 + + +注意事项: +文章字数1500-2000字,内容充实。 +重要观点或内容需加粗。 +保持原创性。 +结尾语:"今天也要好好吃饭呀。" + +美食描述要素: +色:菜品外观、摆盘 +香:气味特点 +味:口感层次 +形:造型特色 +器:餐具搭配 + +内容: + +""" + +sys.setrecursionlimit(5000) + +# 谷歌浏览器路径 +USER_DIR_PATH = f"C:\\Users\\{getpass.getuser()}\\AppData\Local\Google\Chrome\\User Data" +# 文章文件夹路径 +ARTICLES_BASE_PATH = r"articles" +# 图片文件夹路径 +IMGS_BASE_PATH = r"picture" +# 文章链接路径 +TITLE_BASE_PATH = r"文章链接.xlsx" + +# 判断文件夹是否存在,如果不存在,创建文件夹 +if not os.path.exists(ARTICLES_BASE_PATH): + os.chdir(ARTICLES_BASE_PATH) + os.chmod(ARTICLES_BASE_PATH, 0o777) +if not os.path.exists(IMGS_BASE_PATH): + os.chdir(IMGS_BASE_PATH) + os.chmod(IMGS_BASE_PATH, 0o777) + +logging.basicConfig(level=logging.INFO) + +# 日志文件保存路径 +LOG_FILE = "article_replace.log" + + diff --git a/GzhArticle/getContentByAPI.py b/GzhArticle/getContentByAPI.py new file mode 100644 index 0000000..8405dd6 --- /dev/null +++ b/GzhArticle/getContentByAPI.py @@ -0,0 +1,51 @@ +from openai import OpenAI + + +# =============================godgpt提问==================================== + +def get_god_chat_completion(prompt,model="gpt-4o"): + """ + gotAPI + :param prompt: + :return: + """ + api_key = "sk-YYHnMAyHysBx7EOLBe6303Ab5d324903905e7c1b3f6e8e58" + api_base = "https://api.wlai.vip/v1" + client = OpenAI(api_key=api_key, base_url=api_base) + + completion = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": "你是一个十万粉丝的头条爆款文章创作者。"}, + {"role": "user", "content": prompt} + ] + ) + + print(completion.choices[0].message.content) + return completion.choices[0].message.content + + + +def get_deepseek_chat_completion(prompt, model="deepseek-reasoner"): + """ + deepseek问答API + :param prompt: 提示词 + :param model: 使用的模型 + :return: 返回结果 + deepseek-reasoner 思考模型 + """ + client = OpenAI(api_key="sk-b37c17c924044f73996a77b3ef74904c", base_url="https://api.deepseek.com") + + response = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": "你是一个十万粉丝的爆款文章创作者!"}, + {"role": "user", "content": prompt}, + ], + stream=False + ) + + print(response.choices[0].message.content) + return response.choices[0].message.content + +# =========================读取文档链接,并开始程序================================ diff --git a/GzhArticle/getLink.py b/GzhArticle/getLink.py new file mode 100644 index 0000000..d65fb07 --- /dev/null +++ b/GzhArticle/getLink.py @@ -0,0 +1,29 @@ +""" +获取页面内容 +""" + +def wechat_extract_content(page): + """ + 提取微信页面内容 + """ + title_text = page.text_content( + '#activity-name') + article_text = page.text_content( + '#js_content') + + return title_text, article_text + + +def toutiao_extract_content(page): + """ + 提取页面内容 + """ + title_text = page.text_content('#root > div.wtt-detail-container > div.main > div:nth-child(1) > div > div > div > div > article') + # root > div.wtt-detail-container > div.main > div:nth-child(1) > div > div > div > div > article + article_text = page.text_content(f'#root > div.wtt-detail-container > div.main > div:nth-child(1) > div > div > div > div > article') + # img_list = page.query_selector_all(r".pgc-img img") + # img_list = page.query_selector_all(r"#root > div.wtt-detail-container > div.main > div:nth-child(1) > div > div > div > div > article > div.image-list > img") + # img_urls = [img.get_attribute('src') for img in img_list] + return title_text, article_text + + diff --git a/GzhArticle/main.py b/GzhArticle/main.py new file mode 100644 index 0000000..34a858f --- /dev/null +++ b/GzhArticle/main.py @@ -0,0 +1,44 @@ +import tkinter as tk +from GzhArticleReplace import link_to_text + + +def start(): + # 这里可以添加你想要执行的代码 + print("开始按钮被点击了") + prompt1 = entry_prompt1.get() + prompt2 = entry_prompt2.get() + link_to_text(prompt1, prompt2) + + + +windows = tk.Tk() +windows.title("公众号文章创作") +windows.geometry("300x200") + +frame_form = tk.Frame(windows) +frame_form.pack() + +label_prompt1 = tk.Label(frame_form, text='提示词一:') +label_prompt1.pack() + +entry_prompt1 = tk.Entry(frame_form, width=100) +entry_prompt1.pack() + +label_prompt2 = tk.Label(frame_form, text='提示词二:') +label_prompt2.pack() + +entry_prompt2 = tk.Entry(frame_form, width=100) +entry_prompt2.pack() + +button_submit = tk.Button(frame_form, text='开始', command=start) +button_submit.pack() +label_result = tk.Label(windows, text='') +label_result.pack() +windows.mainloop() + + + +if __name__ == '__main__': + start() + +