ArticleReplaceBatch/ArticleReplaceBatch/txt2docx.py
2025-05-12 14:56:51 +08:00

354 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import PySimpleGUI as sg
import json
import os
import random
import re
from docx.shared import Pt, RGBColor
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_UNDERLINE
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.enum.style import WD_STYLE_TYPE
from docx import Document
from docx.shared import Inches
from PIL import Image
# 保存文件路径的 JSON 文件
SETTINGS_FILE = 'settings.json'
def set_picture_wrapping(paragraph):
"""
设置图片环绕方式
:param paragraph:
:return:
"""
# 设置图片环绕方式为上下环绕
pPr = paragraph._element.get_or_add_pPr()
framePr = OxmlElement('w:framePr')
framePr.set(qn('w:wrap'), 'around')
framePr.set(qn('w:vAnchor'), 'text')
framePr.set(qn('w:hAnchor'), 'text')
pPr.append(framePr)
def format_word_document(input_filename, output_filename):
# 打开文档
doc = Document(input_filename)
# 创建或更新标题样式
style = doc.styles.add_style('CustomHeading', WD_STYLE_TYPE.PARAGRAPH)
style.font.name = '黑体'
style.font.size = Pt(22) # 二号字
style.font.color.rgb = RGBColor(0, 0, 255) # 蓝色
style.paragraph_format.space_after = Pt(12) # 标题后间距
# 创建或更新正文样式
style = doc.styles.add_style('CustomBody', WD_STYLE_TYPE.PARAGRAPH)
style.font.name = '仿宋'
style.font.size = Pt(14) # 四号字
style.paragraph_format.first_line_indent = Pt(20) # 首行缩进两字符
style.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
style.paragraph_format.line_spacing = 1.5 # 行间距
style.paragraph_format.space_before = Pt(6) # 段前间距
style.paragraph_format.space_after = Pt(6) # 段后间距
# 遍历所有段落
for paragraph in doc.paragraphs:
# 设置标题格式
if paragraph.style.name.startswith('Heading'):
paragraph.style = doc.styles['CustomHeading']
# 设置段落格式
else:
paragraph.style = doc.styles['CustomBody']
# 遍历所有图片
for rel in doc.part.rels.values():
if "image" in rel.target_ref:
# 获取图片所在的段落
for paragraph in doc.paragraphs:
for run in paragraph.runs:
if run._element.tag.endswith('}pict'):
# 设置图片居中
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
# 设置图片环绕方式为上下环绕
set_picture_wrapping(paragraph)
paragraph.paragraph_format.space_before = Pt(12)
paragraph.paragraph_format.space_after = Pt(12)
# output_filename = remove_book_titles(output_filename)
# 保存文档
doc.save(output_filename)
def crop_and_replace_images(folder_path):
"""
修改图片尺寸
:param folder_path:
:return:
"""
folder_path = folder_path.strip()
# 遍历文件夹中的所有文件
if not os.path.exists(folder_path):
os.mkdir(folder_path)
else:
for filename in os.listdir(folder_path):
if os.path.exists(filename):
# 检查文件扩展名是否为图片格式
if filename.lower().endswith(('.jpg','.png')):
# 拼接完整的文件路径
file_path = os.path.join(folder_path, filename)
print("文件夹路径:" + folder_path)
print("文件路径:" + file_path)
# 打开图片
with Image.open(file_path) as img:
# 获取图片的尺寸
width, height = img.size
# 裁剪图片裁剪下方10px
cropped_img = img.crop((0, 0, width, height - (height * 0.2)))
# 保存裁剪后的图片,覆盖原文件
output_path = file_path[0:file_path.find('.')] + '.png'
cropped_img.save(output_path, 'PNG')
def split_text_into_paragraphs(text):
"""
将文本分割成段落,并在每个段落之间加一个空行
:param text: 输入的文本
:return: 段落列表
"""
paragraphs = text.split('\n\n')
# 过滤掉空行和只包含空白字符的段落
paragraphs = list(filter(lambda p: p.strip(), paragraphs))
# 在每个段落之间加一个空行
paragraphs_with_blank_lines = []
for paragraph in paragraphs:
paragraphs_with_blank_lines.append(paragraph)
paragraphs_with_blank_lines.append('')
# 移除最后一个多余的空行
if paragraphs_with_blank_lines:
paragraphs_with_blank_lines.pop()
return paragraphs_with_blank_lines
def insert_images_into_paragraphs(paragraphs, image_folder, doc, title):
"""
将图片插入到段落中
:param paragraphs:
:param image_folder:
:param doc:
:return:
"""
if os.path.exists(image_folder):
images = sorted([os.path.join(image_folder, img) for img in os.listdir(image_folder) if
img.lower().endswith(('jpg'))])
else:
images = []
# 获取图片列表并排序
# images = sorted([os.path.join(image_folder, img) for img in os.listdir(image_folder) if
# img.lower().endswith(('jpg'))])
# images = sorted([os.path.join(image_folder, img) for img in os.listdir(image_folder) if
# # img.lower().endswith(('png', 'jpg', 'jpeg'))])
total_images = len(images)
image_index = 0
for i, paragraph in enumerate(paragraphs):
if "正文:" in paragraph:
paragraph = paragraph.replace("正文:", '')
p = doc.add_paragraph(paragraph)
if os.path.exists(image_folder):
# 插入图片
if image_index < total_images:
img_path = images[image_index]
# 确保图片路径正确且图片文件存在
if os.path.exists(img_path):
try:
with Image.open(img_path) as img:
width, height = img.size
doc.add_picture(img_path, width=Inches(width / height * 1.5))
image_index += 1
except Exception as e:
print(f"无法识别图像: {img_path}, 错误: {e}")
continue
else:
print(f"图片路径无效: {img_path}")
def create_word_document(text, image_folder, output_path, title):
"""
创建Word文档
:param text:
:param image_folder:
:param output_path:
:return:
"""
doc = Document()
paragraphs = split_text_into_paragraphs(text)
insert_images_into_paragraphs(paragraphs, image_folder, doc, title)
# modify_document(doc)
doc.save(output_path)
format_word_document(output_path, output_path)
print(f'文档已保存到: {output_path}')
# 读取指定路径下txt文本的内容
def read_text_file(file_path):
"""
读取指定路径下txt文本的内容
:param file_path:
:return:
"""
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
def get_file_name(file_path):
"""
获取文件名
:param file_path:
:return:
"""
return os.path.basename(file_path)
def apply_random_style(paragraph):
# 预定义字体颜色列表
predefined_font_colors = [
RGBColor(255, 0, 0), # 红色
RGBColor(255, 165, 0), # 橙色
RGBColor(128, 0, 128), # 紫色
]
# 预定义背景颜色列表手动定义RGB颜色避免太亮或太深
predefined_bg_colors = [
RGBColor(240, 240, 240), # 浅灰色
RGBColor(255, 255, 224), # 浅黄色
RGBColor(224, 255, 224), # 浅绿色
RGBColor(224, 255, 255), # 浅青色
RGBColor(255, 228, 225), # 浅粉色
RGBColor(240, 248, 255), # 浅蓝色
]
# 获取段落中的每一个run对象代表一段连续的文字
for run in paragraph.runs:
# 随机选择样式
style_choice = random.choice(['bold', 'italic', 'underline', 'color', 'background'])
if style_choice == 'bold':
run.bold = True
elif style_choice == 'italic':
run.italic = True
elif style_choice == 'underline':
run.underline = WD_UNDERLINE.SINGLE
elif style_choice == 'color':
# 从预定义颜色中随机选择一个颜色
run.font.color.rgb = random.choice(predefined_font_colors)
elif style_choice == 'background':
# 从预定义背景颜色中随机选择一个颜色
run.font.color.highlight_color = random.choice(predefined_bg_colors)
def txt2docx(txt_path, image_path, keep_txt=True):
file_path = txt_path
txts = sorted([os.path.join(file_path, txt) for txt in os.listdir(file_path) if
txt.lower().endswith(('txt'))])
img_path = image_path
for txt in txts:
print("正在修改:" + txt)
text = read_text_file(txt)
# print(text)
txt_name = get_file_name(txt)
title_name = txt_name.replace(".txt", "")
title = title_name
print(title)
if "正文:" in text:
new_text = text.split('正文:')[1].replace("```markdown", "").replace("```", "")
else:
new_text = text.replace("```markdown", "").replace("```", "")
content = new_text
# image_folder = img_path + r'\\' + txt_name.replace(".txt", "").rstrip(".")
# image_folder = os.path.join(img_path, txt_name.replace(".txt", "").rstrip("."))
from pathlib import Path
from pathlib import Path
img_path = Path(img_path)
image_folder = img_path / txt_name.replace(".txt", "").rstrip(".")
# crop_and_replace_images(image_folder)
create_word_document(content, image_folder, txt.replace(".txt", ".docx"), title_name)
# 根据用户选择决定是否删除原始txt文件
if not keep_txt:
os.remove(txt)
print(f"已删除原始文件: {txt}")
else:
print(f"保留原始文件: {txt}")
# 加载设置
def load_settings():
if os.path.exists(SETTINGS_FILE):
with open(SETTINGS_FILE, 'r') as f:
return json.load(f)
return {'folder1': '', 'folder2': ''}
# 保存设置
def save_settings(settings):
with open(SETTINGS_FILE, 'w') as f:
json.dump(settings, f)
# 自定义函数,用于处理用户选择的文件夹
def process_folders(folder1, folder2, keep_txt=True):
# 在这里添加处理文件夹的代码
txt2docx(folder1, folder2, keep_txt)
# 加载之前的设置
settings = load_settings()
if 'keep_txt' not in settings:
settings['keep_txt'] = True
# 定义窗口的布局
layout = [
[sg.Text('文章文件夹:'), sg.Input(default_text=settings['folder1']), sg.FolderBrowse()],
[sg.Text('图片文件夹:'), sg.Input(default_text=settings['folder2']), sg.FolderBrowse()],
[sg.Checkbox('保留原始txt文件', default=settings['keep_txt'], key='keep_txt')],
[sg.Button('确认'), sg.Button('取消')]
]
# 创建窗口
window = sg.Window('文件夹选择窗口', layout)
# 事件循环
while True:
event, values = window.read()
if event == sg.WIN_CLOSED or event == '取消': # 如果用户关闭窗口或点击取消按钮
break
elif event == '确认': # 如果用户点击确认按钮
folder1 = values[0]
folder2 = values[1]
keep_txt = values['keep_txt']
process_folders(folder1, folder2, keep_txt)
# 保存用户选择的文件夹路径和保留txt文件的选项
settings['folder1'] = folder1
settings['folder2'] = folder2
settings['keep_txt'] = keep_txt
save_settings(settings)
# 关闭窗口
window.close()