1007 lines
37 KiB
Python
1007 lines
37 KiB
Python
import os
|
||
import sys
|
||
import glob
|
||
from PIL import Image
|
||
from docx import Document
|
||
from docx.shared import Inches, Pt
|
||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||
import PySimpleGUI as sg
|
||
|
||
|
||
# 配置设置
|
||
class Config:
|
||
def __init__(self):
|
||
# 文件处理配置
|
||
self.txt_encoding = "utf-8"
|
||
self.match_pattern = "exact" # exact: 完全匹配, prefix: 前缀匹配, contains: 包含
|
||
self.output_location = "txt_folder" # txt_folder or custom
|
||
# 图片处理配置
|
||
self.image_sort_by = "name" # name or time
|
||
self.image_resize = "none" # none or width
|
||
self.image_width = 6 # 英寸
|
||
self.image_alignment = "center" # left, center, right
|
||
self.image_strategy = "cycle" # cycle, truncate, repeat_last
|
||
# 文档格式配置
|
||
self.line_spacing = 1.5
|
||
self.title_levels = 6 # 支持的最大标题层级
|
||
# 输出配置
|
||
self.output_subfolder = "converted_docs" # 输出子文件夹名称
|
||
|
||
|
||
# 全局配置实例
|
||
config = Config()
|
||
|
||
|
||
# 文件处理模块 - 增强文件夹和匹配处理
|
||
class FileHandler:
|
||
@staticmethod
|
||
def scan_txt_files(folder_path):
|
||
"""扫描文件夹中的所有TXT文件"""
|
||
if not os.path.isdir(folder_path):
|
||
raise Exception(f"TXT文件夹不存在: {folder_path}")
|
||
|
||
txt_files = []
|
||
# 递归扫描所有TXT文件
|
||
for root, dirs, files in os.walk(folder_path):
|
||
for file in files:
|
||
if file.lower().endswith(".txt"):
|
||
txt_path = os.path.join(root, file)
|
||
# 获取文件名(不含扩展名)
|
||
file_name = os.path.splitext(file)[0]
|
||
txt_files.append({
|
||
"path": txt_path,
|
||
"name": file_name,
|
||
"relative_path": os.path.relpath(txt_path, folder_path),
|
||
"folder": root # 存储文件所在的文件夹
|
||
})
|
||
|
||
if not txt_files:
|
||
raise Exception(f"在 {folder_path} 中未找到任何TXT文件")
|
||
|
||
return sorted(txt_files, key=lambda x: x["relative_path"])
|
||
|
||
@staticmethod
|
||
def find_matching_image_folders(txt_files, images_root):
|
||
"""根据TXT文件名匹配图片文件夹"""
|
||
if not os.path.isdir(images_root):
|
||
raise Exception(f"图片根文件夹不存在: {images_root}")
|
||
|
||
# 获取所有图片文件夹
|
||
all_image_folders = []
|
||
for root, dirs, _ in os.walk(images_root):
|
||
for dir in dirs:
|
||
folder_path = os.path.join(root, dir)
|
||
all_image_folders.append({
|
||
"path": folder_path,
|
||
"name": dir,
|
||
"relative_path": os.path.relpath(folder_path, images_root)
|
||
})
|
||
|
||
# 为每个TXT文件匹配图片文件夹
|
||
matched_pairs = []
|
||
for txt in txt_files:
|
||
matches = []
|
||
txt_name = txt["name"].lower()
|
||
|
||
for img_folder in all_image_folders:
|
||
folder_name = img_folder["name"].lower()
|
||
|
||
# 根据匹配模式查找匹配项
|
||
if config.match_pattern == "exact" and txt_name == folder_name:
|
||
matches.append(img_folder)
|
||
elif config.match_pattern == "prefix" and folder_name.startswith(txt_name):
|
||
matches.append(img_folder)
|
||
elif config.match_pattern == "contains" and txt_name in folder_name:
|
||
matches.append(img_folder)
|
||
|
||
# 优先选择相对路径最短的匹配项
|
||
if matches:
|
||
matches.sort(key=lambda x: len(x["relative_path"]))
|
||
matched_pairs.append({
|
||
"txt": txt,
|
||
"image_folder": matches[0],
|
||
"all_matches": matches
|
||
})
|
||
else:
|
||
matched_pairs.append({
|
||
"txt": txt,
|
||
"image_folder": None,
|
||
"all_matches": []
|
||
})
|
||
|
||
return matched_pairs
|
||
|
||
@staticmethod
|
||
def get_image_files(folder_path):
|
||
"""获取文件夹中的所有图片文件"""
|
||
if not folder_path or not os.path.isdir(folder_path):
|
||
return []
|
||
|
||
# 仅保留常见图片格式
|
||
image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.gif']
|
||
image_files = []
|
||
|
||
for ext in image_extensions:
|
||
image_files.extend(glob.glob(os.path.join(folder_path, ext)))
|
||
|
||
# 根据配置排序
|
||
if config.image_sort_by == "name":
|
||
image_files.sort()
|
||
elif config.image_sort_by == "time":
|
||
image_files.sort(key=lambda x: os.path.getmtime(x))
|
||
|
||
return image_files
|
||
|
||
@staticmethod
|
||
def read_markdown_txt(file_path):
|
||
"""读取含Markdown内容的TXT文件"""
|
||
if not os.path.exists(file_path):
|
||
raise Exception(f"TXT文件不存在: {file_path}")
|
||
|
||
# 尝试多种编码读取TXT文件
|
||
encodings = [config.txt_encoding, "gbk", "utf-16", "iso-8859-1"]
|
||
for encoding in encodings:
|
||
try:
|
||
with open(file_path, 'r', encoding=encoding) as f:
|
||
content = f.read()
|
||
content = content.replace("\r\n", "\n").replace("\r", "\n")
|
||
return content
|
||
except UnicodeDecodeError:
|
||
continue
|
||
|
||
raise Exception(f"无法解析TXT文件(编码问题): {file_path}")
|
||
|
||
@staticmethod
|
||
def prepare_output_path(txt_info, images_root, output_root):
|
||
"""准备输出文件路径"""
|
||
# 根据配置决定输出位置
|
||
if config.output_location == "txt_folder":
|
||
# 输出到TXT文件所在的文件夹
|
||
base_folder = txt_info["folder"]
|
||
else:
|
||
# 输出到指定的根文件夹
|
||
base_folder = output_root
|
||
|
||
# 创建输出子文件夹
|
||
output_folder = os.path.join(base_folder, config.output_subfolder)
|
||
os.makedirs(output_folder, exist_ok=True)
|
||
|
||
# 生成输出文件名
|
||
txt_name = txt_info["name"]
|
||
output_path = os.path.join(output_folder, f"{txt_name}.docx")
|
||
|
||
# 处理文件名冲突
|
||
counter = 1
|
||
while os.path.exists(output_path):
|
||
output_path = os.path.join(output_folder, f"{txt_name}_{counter}.docx")
|
||
counter += 1
|
||
|
||
return output_path
|
||
|
||
|
||
# Markdown解析模块
|
||
class MarkdownParser:
|
||
@staticmethod
|
||
def parse(txt_content):
|
||
"""解析TXT中的Markdown内容"""
|
||
headings = []
|
||
current_heading = None
|
||
current_paragraphs = []
|
||
in_code_block = False
|
||
|
||
lines = txt_content.split('\n')
|
||
for line in lines:
|
||
line = line.rstrip('\r')
|
||
|
||
# 处理代码块
|
||
if line.startswith('```'):
|
||
in_code_block = not in_code_block
|
||
continue
|
||
|
||
if in_code_block:
|
||
continue
|
||
|
||
# 识别标题
|
||
if line.lstrip().startswith('#'):
|
||
level = 0
|
||
stripped_line = line.lstrip()
|
||
while level < len(stripped_line) and stripped_line[level] == '#' and level < config.title_levels:
|
||
level += 1
|
||
|
||
if level > 0 and (len(stripped_line) <= level or stripped_line[level] in (' ', '\t')):
|
||
if current_heading:
|
||
current_heading['paragraphs'] = current_paragraphs
|
||
headings.append(current_heading)
|
||
|
||
heading_text = stripped_line[level:].lstrip()
|
||
current_heading = {
|
||
'level': level,
|
||
'content': heading_text,
|
||
'paragraphs': []
|
||
}
|
||
current_paragraphs = []
|
||
continue
|
||
|
||
# 处理无标题内容
|
||
if current_heading is None:
|
||
current_heading = {
|
||
'level': 0,
|
||
'content': '前置内容',
|
||
'paragraphs': []
|
||
}
|
||
|
||
# 处理段落
|
||
if line.strip() == '':
|
||
if current_paragraphs and current_paragraphs[-1]['content'].strip() != '':
|
||
current_paragraphs.append({
|
||
'type': 'empty',
|
||
'content': '',
|
||
'is_first': False,
|
||
'formatting': {}
|
||
})
|
||
else:
|
||
para_type = 'text'
|
||
if line.startswith(('- ', '* ')):
|
||
para_type = 'unordered_list'
|
||
elif line.lstrip()[0].isdigit() and line.lstrip()[1:3] in ('. ', ') '):
|
||
para_type = 'ordered_list'
|
||
elif line.startswith('> '):
|
||
para_type = 'quote'
|
||
|
||
is_first = len(current_paragraphs) == 0 and not any(p['type'] == 'text' for p in current_paragraphs)
|
||
formatting = MarkdownParser.extract_formatting(line)
|
||
|
||
current_paragraphs.append({
|
||
'type': para_type,
|
||
'content': line,
|
||
'is_first': is_first,
|
||
'formatting': formatting
|
||
})
|
||
|
||
if current_heading:
|
||
current_heading['paragraphs'] = current_paragraphs
|
||
headings.append(current_heading)
|
||
|
||
return headings
|
||
|
||
@staticmethod
|
||
def extract_formatting(text):
|
||
"""提取文本格式"""
|
||
formatting = {
|
||
'bold': [],
|
||
'italic': [],
|
||
'code': []
|
||
}
|
||
|
||
# 提取粗体
|
||
start = 0
|
||
while start < len(text):
|
||
pos = text.find('**', start)
|
||
if pos == -1:
|
||
break
|
||
end = text.find('**', pos + 2)
|
||
if end == -1:
|
||
break
|
||
formatting['bold'].append((pos, end + 2))
|
||
start = end + 2
|
||
|
||
# 提取斜体
|
||
start = 0
|
||
while start < len(text):
|
||
pos = text.find('*', start)
|
||
if pos == -1:
|
||
break
|
||
if pos > 0 and text[pos - 1] == '*':
|
||
start = pos + 1
|
||
continue
|
||
end = text.find('*', pos + 1)
|
||
if end == -1:
|
||
break
|
||
formatting['italic'].append((pos, end + 1))
|
||
start = end + 1
|
||
|
||
# 提取代码
|
||
start = 0
|
||
while start < len(text):
|
||
pos = text.find('`', start)
|
||
if pos == -1:
|
||
break
|
||
end = text.find('`', pos + 1)
|
||
if end == -1:
|
||
break
|
||
formatting['code'].append((pos, end + 1))
|
||
start = end + 1
|
||
|
||
return formatting
|
||
|
||
|
||
# 图片处理模块
|
||
class ImageProcessor:
|
||
@staticmethod
|
||
def process_image(image_path):
|
||
"""处理图片"""
|
||
try:
|
||
with Image.open(image_path) as img:
|
||
# 处理图片方向
|
||
if hasattr(img, '_getexif'):
|
||
exif = img._getexif()
|
||
if exif:
|
||
orientation_tag = 274
|
||
if orientation_tag in exif:
|
||
orientation = exif[orientation_tag]
|
||
if orientation == 3:
|
||
img = img.rotate(180, expand=True)
|
||
elif orientation == 6:
|
||
img = img.rotate(270, expand=True)
|
||
elif orientation == 8:
|
||
img = img.rotate(90, expand=True)
|
||
|
||
# 调整大小
|
||
if config.image_resize == "width" and config.image_width > 0:
|
||
target_width_px = config.image_width * 96
|
||
width, height = img.size
|
||
|
||
if width > target_width_px:
|
||
ratio = target_width_px / width
|
||
new_height = int(height * ratio)
|
||
img = img.resize((int(target_width_px), new_height), Image.LANCZOS)
|
||
|
||
return img, config.image_width
|
||
else:
|
||
width_in = img.width / 96
|
||
return img, width_in
|
||
except Exception as e:
|
||
raise Exception(f"处理图片失败 {image_path}: {str(e)}")
|
||
|
||
@staticmethod
|
||
def get_image_alignment():
|
||
"""获取图片对齐方式"""
|
||
if config.image_alignment == "left":
|
||
return WD_ALIGN_PARAGRAPH.LEFT
|
||
elif config.image_alignment == "right":
|
||
return WD_ALIGN_PARAGRAPH.RIGHT
|
||
else:
|
||
return WD_ALIGN_PARAGRAPH.CENTER
|
||
|
||
|
||
# DOCX生成模块
|
||
class DocxGenerator:
|
||
@staticmethod
|
||
def generate(headings, image_files, output_path, progress_callback=None):
|
||
"""生成DOCX文档"""
|
||
doc = Document()
|
||
total_headings = len(headings)
|
||
image_index = 0
|
||
image_count = len(image_files)
|
||
|
||
for i, heading in enumerate(headings):
|
||
if progress_callback:
|
||
progress = int((i / total_headings) * 100)
|
||
progress_callback(progress, f"处理标题: {heading['content'][:30]}...")
|
||
|
||
# 添加标题
|
||
if heading['level'] > 0 and heading['level'] <= config.title_levels:
|
||
doc.add_heading(heading['content'], level=heading['level'])
|
||
else:
|
||
para = doc.add_paragraph(heading['content'])
|
||
run = para.runs[0]
|
||
run.font.size = Pt(14)
|
||
run.font.bold = True
|
||
para.space_after = Pt(12)
|
||
|
||
# 处理段落
|
||
paragraphs = heading['paragraphs']
|
||
if not paragraphs:
|
||
continue
|
||
|
||
# 处理第一段
|
||
first_para = paragraphs[0]
|
||
DocxGenerator.add_formatted_paragraph(doc, first_para)
|
||
|
||
# 插入图片
|
||
if image_count > 0 and image_index < image_count:
|
||
try:
|
||
img, width = ImageProcessor.process_image(image_files[image_index])
|
||
|
||
temp_img_path = None
|
||
if config.image_resize == "width":
|
||
temp_dir = os.path.dirname(output_path)
|
||
os.makedirs(temp_dir, exist_ok=True)
|
||
temp_img_path = os.path.join(temp_dir, f"temp_img_{image_index}.png")
|
||
img.save(temp_img_path)
|
||
img_path = temp_img_path
|
||
else:
|
||
img_path = image_files[image_index]
|
||
|
||
para = doc.add_picture(img_path, width=Inches(width))
|
||
para.alignment = ImageProcessor.get_image_alignment()
|
||
|
||
if temp_img_path and os.path.exists(temp_img_path):
|
||
os.remove(temp_img_path)
|
||
|
||
image_index += 1
|
||
|
||
if image_index >= image_count:
|
||
if config.image_strategy == "cycle":
|
||
image_index = 0
|
||
elif config.image_strategy == "truncate":
|
||
image_index = image_count
|
||
|
||
except Exception as e:
|
||
doc.add_paragraph(f"[图片插入失败: {str(e)}]")
|
||
|
||
# 添加剩余段落
|
||
for para in paragraphs[1:]:
|
||
DocxGenerator.add_formatted_paragraph(doc, para)
|
||
|
||
try:
|
||
doc.save(output_path)
|
||
if progress_callback:
|
||
progress_callback(100, "转换完成!")
|
||
return True
|
||
except Exception as e:
|
||
raise Exception(f"保存DOCX失败: {str(e)}")
|
||
|
||
@staticmethod
|
||
def add_formatted_paragraph(doc, paragraph_data):
|
||
"""添加带格式的段落"""
|
||
content = paragraph_data['content']
|
||
para_type = paragraph_data['type']
|
||
formatting = paragraph_data['formatting']
|
||
|
||
if para_type == 'unordered_list':
|
||
para = doc.add_paragraph(style='List Bullet')
|
||
text = content[2:].strip()
|
||
elif para_type == 'ordered_list':
|
||
para = doc.add_paragraph(style='List Number')
|
||
if '.' in content[:5]:
|
||
text = content.split('.', 1)[1].strip()
|
||
elif ')' in content[:5]:
|
||
text = content.split(')', 1)[1].strip()
|
||
else:
|
||
text = content.strip()
|
||
elif para_type == 'quote':
|
||
para = doc.add_paragraph(style='Intense Quote')
|
||
text = content[2:].strip()
|
||
elif para_type == 'empty':
|
||
doc.add_paragraph()
|
||
return
|
||
else:
|
||
para = doc.add_paragraph()
|
||
text = content.strip()
|
||
|
||
DocxGenerator.apply_formatting(para, text, formatting)
|
||
|
||
if config.line_spacing > 0:
|
||
para.paragraph_format.line_spacing = config.line_spacing
|
||
|
||
@staticmethod
|
||
def apply_formatting(paragraph, text, formatting):
|
||
"""应用文本格式"""
|
||
format_positions = []
|
||
for pos in formatting['bold']:
|
||
format_positions.append((pos[0], pos[1], 'bold'))
|
||
for pos in formatting['italic']:
|
||
format_positions.append((pos[0], pos[1], 'italic'))
|
||
for pos in formatting['code']:
|
||
format_positions.append((pos[0], pos[1], 'code'))
|
||
|
||
format_positions.sort(key=lambda x: x[0])
|
||
|
||
current_pos = 0
|
||
for start, end, fmt_type in format_positions:
|
||
if start > current_pos:
|
||
paragraph.add_run(text[current_pos:start])
|
||
|
||
run = paragraph.add_run(text[start:end])
|
||
|
||
if fmt_type == 'bold':
|
||
run.text = run.text[2:-2]
|
||
run.bold = True
|
||
elif fmt_type == 'italic':
|
||
run.text = run.text[1:-1]
|
||
run.italic = True
|
||
elif fmt_type == 'code':
|
||
run.text = run.text[1:-1]
|
||
run.font.name = 'Courier New'
|
||
run.font.size = Pt(10)
|
||
|
||
current_pos = end
|
||
|
||
if current_pos < len(text):
|
||
paragraph.add_run(text[current_pos:])
|
||
|
||
|
||
# 批量处理模块
|
||
class BatchProcessor:
|
||
@staticmethod
|
||
def process_batch(matched_pairs, output_root, progress_callback=None):
|
||
"""批量处理匹配的文件对"""
|
||
total = len(matched_pairs)
|
||
success_count = 0
|
||
failed_items = []
|
||
|
||
for i, pair in enumerate(matched_pairs):
|
||
try:
|
||
# 更新整体进度
|
||
if progress_callback:
|
||
overall_progress = int((i / total) * 100)
|
||
progress_callback(overall_progress,
|
||
f"处理 {i + 1}/{total}: {pair['txt']['name']}")
|
||
|
||
# 准备输出路径
|
||
output_path = FileHandler.prepare_output_path(
|
||
pair['txt'],
|
||
pair['image_folder']['path'] if pair['image_folder'] else "",
|
||
output_root
|
||
)
|
||
|
||
# 读取TXT内容
|
||
txt_content = FileHandler.read_markdown_txt(pair['txt']['path'])
|
||
|
||
# 解析内容
|
||
headings = MarkdownParser.parse(txt_content)
|
||
|
||
if not headings:
|
||
raise Exception("未解析到有效内容")
|
||
|
||
# 获取图片文件
|
||
image_files = []
|
||
if pair['image_folder']:
|
||
image_files = FileHandler.get_image_files(pair['image_folder']['path'])
|
||
|
||
# 生成DOCX
|
||
def update_file_progress(progress, text):
|
||
if progress_callback:
|
||
# 计算整体进度中的子进度
|
||
sub_progress = int((i + progress / 100) / total * 100)
|
||
progress_callback(sub_progress, f"{pair['txt']['name']}: {text}")
|
||
|
||
DocxGenerator.generate(headings, image_files, output_path, update_file_progress)
|
||
success_count += 1
|
||
|
||
except Exception as e:
|
||
failed_items.append({
|
||
"name": pair['txt']['name'],
|
||
"error": str(e)
|
||
})
|
||
|
||
# 确定主要输出文件夹用于结果显示
|
||
if matched_pairs and success_count > 0:
|
||
sample_output = FileHandler.prepare_output_path(matched_pairs[0]['txt'], "", output_root)
|
||
main_output_folder = os.path.dirname(sample_output)
|
||
else:
|
||
main_output_folder = ""
|
||
|
||
return {
|
||
"total": total,
|
||
"success": success_count,
|
||
"failed": len(failed_items),
|
||
"failed_items": failed_items,
|
||
"main_output_folder": main_output_folder
|
||
}
|
||
|
||
|
||
# 配置窗口
|
||
def show_config_window():
|
||
"""显示配置窗口"""
|
||
layout = [
|
||
[sg.Text('文件匹配设置', font=('bold', 12))],
|
||
[sg.Text('TXT编码:'),
|
||
sg.Combo(['utf-8', 'gbk', 'utf-16'],
|
||
default_value=config.txt_encoding, key='txt_encoding')],
|
||
[sg.Text('匹配模式:'),
|
||
sg.Radio('完全匹配(文件名与文件夹名相同)', 'match',
|
||
default=config.match_pattern == "exact", key='match_exact'),
|
||
sg.Radio('前缀匹配', 'match',
|
||
default=config.match_pattern == "prefix", key='match_prefix'),
|
||
sg.Radio('包含匹配', 'match',
|
||
default=config.match_pattern == "contains", key='match_contains')],
|
||
[sg.HSeparator()],
|
||
[sg.Text('输出位置设置', font=('bold', 12))],
|
||
[sg.Radio('输出到TXT文件所在文件夹', 'output_loc',
|
||
default=config.output_location == "txt_folder", key='output_txt_folder'),
|
||
sg.Radio('输出到指定文件夹', 'output_loc',
|
||
default=config.output_location == "custom", key='output_custom')],
|
||
[sg.Text('输出子文件夹名称:'),
|
||
sg.InputText(config.output_subfolder, key='output_subfolder')],
|
||
[sg.HSeparator()],
|
||
[sg.Text('图片处理设置', font=('bold', 12))],
|
||
[sg.Text('图片排序方式:'),
|
||
sg.Radio('按名称', 'sort', default=config.image_sort_by == "name", key='sort_name'),
|
||
sg.Radio('按修改时间', 'sort', default=config.image_sort_by == "time", key='sort_time')],
|
||
[sg.Text('图片尺寸调整:'),
|
||
sg.Radio('不调整', 'resize', default=config.image_resize == "none", key='resize_none'),
|
||
sg.Radio('按宽度:', 'resize', default=config.image_resize == "width", key='resize_width'),
|
||
sg.InputText(str(config.image_width), size=(5, 1), key='image_width'),
|
||
sg.Text('英寸')],
|
||
[sg.Text('图片对齐方式:'),
|
||
sg.Radio('左对齐', 'align', default=config.image_alignment == "left", key='align_left'),
|
||
sg.Radio('居中', 'align', default=config.image_alignment == "center", key='align_center'),
|
||
sg.Radio('右对齐', 'align', default=config.image_alignment == "right", key='align_right')],
|
||
[sg.HSeparator()],
|
||
[sg.Text('图片不足时策略', font=('bold', 12))],
|
||
[sg.Radio('循环使用', 'strategy', default=config.image_strategy == "cycle", key='strategy_cycle'),
|
||
sg.Radio('忽略多余标题', 'strategy', default=config.image_strategy == "truncate", key='strategy_truncate'),
|
||
sg.Radio('重复最后一张', 'strategy', default=config.image_strategy == "repeat_last", key='strategy_repeat')],
|
||
[sg.HSeparator()],
|
||
[sg.Button('确定'), sg.Button('取消')]
|
||
]
|
||
|
||
window = sg.Window('转换设置', layout, modal=True, resizable=True)
|
||
|
||
while True:
|
||
event, values = window.read()
|
||
if event in (sg.WIN_CLOSED, '取消'):
|
||
break
|
||
if event == '确定':
|
||
# 保存配置
|
||
config.txt_encoding = values['txt_encoding']
|
||
|
||
if values['match_exact']:
|
||
config.match_pattern = "exact"
|
||
elif values['match_prefix']:
|
||
config.match_pattern = "prefix"
|
||
else:
|
||
config.match_pattern = "contains"
|
||
|
||
# 保存输出位置设置
|
||
config.output_location = "txt_folder" if values['output_txt_folder'] else "custom"
|
||
|
||
if values['output_subfolder'].strip():
|
||
config.output_subfolder = values['output_subfolder'].strip()
|
||
|
||
config.image_sort_by = "name" if values['sort_name'] else "time"
|
||
config.image_resize = "none" if values['resize_none'] else "width"
|
||
|
||
try:
|
||
config.image_width = float(values['image_width'])
|
||
except:
|
||
pass
|
||
|
||
if values['align_left']:
|
||
config.image_alignment = "left"
|
||
elif values['align_right']:
|
||
config.image_alignment = "right"
|
||
else:
|
||
config.image_alignment = "center"
|
||
|
||
if values['strategy_cycle']:
|
||
config.image_strategy = "cycle"
|
||
elif values['strategy_truncate']:
|
||
config.image_strategy = "truncate"
|
||
else:
|
||
config.image_strategy = "repeat_last"
|
||
|
||
break
|
||
|
||
window.close()
|
||
|
||
|
||
# 匹配编辑窗口
|
||
def show_matching_editor(matched_pairs, images_root):
|
||
"""显示匹配编辑窗口,允许手动调整匹配关系"""
|
||
# 获取所有可用的图片文件夹
|
||
all_image_folders = []
|
||
if os.path.isdir(images_root):
|
||
for root, dirs, _ in os.walk(images_root):
|
||
for dir in dirs:
|
||
folder_path = os.path.join(root, dir)
|
||
rel_path = os.path.relpath(folder_path, images_root)
|
||
all_image_folders.append((folder_path, rel_path))
|
||
|
||
# 创建表格数据
|
||
table_data = []
|
||
for i, pair in enumerate(matched_pairs):
|
||
txt_name = pair['txt']['name']
|
||
img_folder = pair['image_folder']['relative_path'] if pair['image_folder'] else "无匹配"
|
||
table_data.append([i, txt_name, img_folder])
|
||
|
||
# 窗口布局
|
||
layout = [
|
||
[sg.Text('文件匹配编辑', font=('bold', 14))],
|
||
[sg.Text('选择要修改的项目,然后从右侧选择图片文件夹')],
|
||
[
|
||
sg.Table(
|
||
values=table_data,
|
||
headings=['序号', 'TXT文件名', '匹配的图片文件夹'],
|
||
key='-TABLE-',
|
||
select_mode=sg.TABLE_SELECT_MODE_BROWSE,
|
||
enable_events=True,
|
||
justification='left',
|
||
size=(None, 15)
|
||
),
|
||
sg.VSeparator(),
|
||
sg.Listbox(
|
||
values=[f[1] for f in all_image_folders],
|
||
key='-FOLDERS-',
|
||
size=(40, 15),
|
||
enable_events=True
|
||
)
|
||
],
|
||
[sg.Button('设置选中项'), sg.Button('清除选中项'), sg.Button('应用所有')]
|
||
]
|
||
|
||
window = sg.Window('匹配编辑', layout, resizable=True)
|
||
selected_row = None
|
||
|
||
while True:
|
||
event, values = window.read()
|
||
|
||
if event in (sg.WIN_CLOSED, '应用所有'):
|
||
break
|
||
|
||
# 表格选中事件
|
||
if event == '-TABLE-':
|
||
if values['-TABLE-']:
|
||
selected_row = values['-TABLE-'][0]
|
||
|
||
# 设置选中项
|
||
if event == '设置选中项' and selected_row is not None and values['-FOLDERS-']:
|
||
folder_idx = [i for i, f in enumerate(all_image_folders) if f[1] == values['-FOLDERS-'][0]][0]
|
||
folder_path, folder_rel = all_image_folders[folder_idx]
|
||
|
||
# 更新匹配数据
|
||
matched_pairs[selected_row]['image_folder'] = {
|
||
"path": folder_path,
|
||
"name": os.path.basename(folder_path),
|
||
"relative_path": folder_rel
|
||
}
|
||
|
||
# 更新表格
|
||
table_data[selected_row][2] = folder_rel
|
||
window['-TABLE-'].update(values=table_data)
|
||
|
||
# 清除选中项
|
||
if event == '清除选中项' and selected_row is not None:
|
||
matched_pairs[selected_row]['image_folder'] = None
|
||
table_data[selected_row][2] = "无匹配"
|
||
window['-TABLE-'].update(values=table_data)
|
||
|
||
window.close()
|
||
return matched_pairs
|
||
|
||
|
||
# 帮助窗口
|
||
def show_help_window():
|
||
"""显示帮助窗口"""
|
||
help_text = """
|
||
批量Markdown TXT转DOCX工具使用说明:
|
||
|
||
1. 选择包含Markdown内容的TXT文件所在文件夹
|
||
2. 选择图片文件夹的根目录(程序会自动查找子文件夹)
|
||
3. 选择输出文件的保存根目录(当选择"输出到指定文件夹"时有效)
|
||
4. 点击"扫描文件"按钮,程序会自动匹配TXT文件和图片文件夹
|
||
5. 查看匹配结果,可点击"编辑匹配"调整匹配关系
|
||
6. 点击"开始批量转换"生成DOCX文件
|
||
|
||
输出路径选择:
|
||
- 输出到TXT文件所在文件夹: 每个DOCX文件会保存在对应TXT文件所在文件夹的子目录中
|
||
- 输出到指定文件夹: 所有DOCX文件会保存在您指定的文件夹的子目录中
|
||
|
||
匹配规则:
|
||
- 完全匹配: TXT文件名(不含扩展名)与图片文件夹名完全相同
|
||
- 前缀匹配: 图片文件夹名以前缀形式包含TXT文件名
|
||
- 包含匹配: 图片文件夹名中包含TXT文件名
|
||
|
||
转换规则:
|
||
- 每个小标题的第一段后会插入一张图片
|
||
- 支持Markdown格式: 标题、列表、粗体、斜体、代码等
|
||
"""
|
||
sg.popup_scrolled('使用帮助', help_text, size=(60, 20))
|
||
|
||
|
||
# 结果窗口
|
||
def show_results_window(results):
|
||
"""显示批量处理结果窗口"""
|
||
if results['failed'] == 0:
|
||
message = f"全部成功!\n共处理 {results['total']} 个文件,全部转换成功。"
|
||
if results['main_output_folder']:
|
||
message += f"\n主要输出文件夹: {results['main_output_folder']}"
|
||
sg.popup('处理完成', message)
|
||
else:
|
||
failed_text = "\n".join([f"- {item['name']}: {item['error']}" for item in results['failed_items']])
|
||
message = (f"处理完成!\n共处理 {results['total']} 个文件,"
|
||
f"{results['success']} 个成功,{results['failed']} 个失败。\n\n"
|
||
f"失败项:\n{failed_text}")
|
||
if results['main_output_folder']:
|
||
message += f"\n主要输出文件夹: {results['main_output_folder']}"
|
||
sg.popup_scrolled('处理完成', message, size=(60, 20))
|
||
|
||
# 询问是否打开输出文件夹
|
||
if results['main_output_folder'] and os.path.exists(results['main_output_folder']):
|
||
if sg.popup_yes_no('是否打开主要输出文件夹?') == 'Yes':
|
||
if sys.platform.startswith('win'):
|
||
os.startfile(results['main_output_folder'])
|
||
elif sys.platform.startswith('darwin'):
|
||
os.system(f'open "{results["main_output_folder"]}"')
|
||
else:
|
||
os.system(f'xdg-open "{results["main_output_folder"]}"')
|
||
|
||
|
||
# 主界面
|
||
def main_window():
|
||
"""主界面"""
|
||
sg.theme('BlueMono')
|
||
|
||
# 初始化变量
|
||
matched_pairs = []
|
||
|
||
layout = [
|
||
[sg.Text('批量Markdown TXT转DOCX工具', font=('bold', 16))],
|
||
[sg.Text('(按文件名匹配TXT文件和图片文件夹)', text_color='gray')],
|
||
[sg.HSeparator()],
|
||
[sg.Text('TXT文件文件夹:', size=(15, 1)),
|
||
sg.InputText(key='txt_folder', enable_events=True),
|
||
sg.FolderBrowse('浏览')],
|
||
[sg.Text('图片根文件夹:', size=(15, 1)),
|
||
sg.InputText(key='images_root', enable_events=True),
|
||
sg.FolderBrowse('浏览')],
|
||
[sg.Text('输出根文件夹:', size=(15, 1)),
|
||
sg.InputText(key='output_root', enable_events=True),
|
||
sg.FolderBrowse('浏览'),
|
||
sg.Text('(当选择"输出到指定文件夹"时有效)', text_color='gray')],
|
||
[sg.Button('扫描文件', size=(12, 1)),
|
||
sg.Button('编辑匹配', size=(12, 1), disabled=True),
|
||
sg.Button('转换设置', size=(12, 1)),
|
||
sg.Button('帮助', size=(8, 1))],
|
||
[sg.HSeparator()],
|
||
[sg.Text('匹配结果预览:', font=('bold', 10))],
|
||
[sg.Table(
|
||
values=[],
|
||
headings=['TXT文件名', '相对路径', '匹配的图片文件夹'],
|
||
key='-PREVIEW_TABLE-',
|
||
auto_size_columns=False,
|
||
col_widths=[20, 30, 30],
|
||
justification='left',
|
||
size=(None, 10)
|
||
)],
|
||
[sg.ProgressBar(100, orientation='h', size=(80, 20), key='progress_bar', visible=False)],
|
||
[sg.Text('状态: 就绪', key='status_text', size=(80, 1))],
|
||
[sg.Button('开始批量转换', size=(15, 1), disabled=True), sg.Button('退出')]
|
||
]
|
||
|
||
# 创建窗口时不使用finalized参数
|
||
window = sg.Window('批量Markdown TXT转DOCX工具', layout, resizable=True)
|
||
progress_bar = window['progress_bar']
|
||
status_text = window['status_text']
|
||
preview_table = window['-PREVIEW_TABLE-']
|
||
output_root_input = window['output_root']
|
||
|
||
def update_output_root_state():
|
||
"""根据配置更新输出根文件夹输入框的状态"""
|
||
if config.output_location == "custom":
|
||
output_root_input.update(disabled=False)
|
||
output_root_input.Widget.configure(foreground='black')
|
||
else:
|
||
output_root_input.update(disabled=True)
|
||
output_root_input.Widget.configure(foreground='gray')
|
||
|
||
# 先进行一次窗口读取来完成初始化,然后再更新元素状态
|
||
# 发送一个虚拟事件来触发首次更新
|
||
window.read(timeout=1)
|
||
update_output_root_state()
|
||
|
||
while True:
|
||
event, values = window.read()
|
||
|
||
if event in (sg.WIN_CLOSED, '退出'):
|
||
break
|
||
|
||
if event == '转换设置':
|
||
# 保存当前输出根目录
|
||
current_output_root = values['output_root']
|
||
# 显示配置窗口
|
||
show_config_window()
|
||
# 更新输出根目录输入框状态
|
||
update_output_root_state()
|
||
# 恢复输出根目录值
|
||
window['output_root'].update(current_output_root)
|
||
|
||
if event == '帮助':
|
||
show_help_window()
|
||
|
||
if event == '扫描文件':
|
||
txt_folder = values['txt_folder']
|
||
images_root = values['images_root']
|
||
|
||
if not txt_folder:
|
||
sg.popup_error('请选择TXT文件所在的文件夹')
|
||
continue
|
||
|
||
if not images_root:
|
||
sg.popup_error('请选择图片根文件夹')
|
||
continue
|
||
|
||
try:
|
||
status_text.update('正在扫描TXT文件...')
|
||
window.refresh()
|
||
txt_files = FileHandler.scan_txt_files(txt_folder)
|
||
|
||
status_text.update('正在匹配图片文件夹...')
|
||
window.refresh()
|
||
matched_pairs = FileHandler.find_matching_image_folders(txt_files, images_root)
|
||
|
||
# 更新预览表格
|
||
table_data = []
|
||
for pair in matched_pairs:
|
||
img_folder = pair['image_folder']['relative_path'] if pair['image_folder'] else "无匹配"
|
||
table_data.append([
|
||
pair['txt']['name'],
|
||
pair['txt']['relative_path'],
|
||
img_folder
|
||
])
|
||
|
||
preview_table.update(values=table_data)
|
||
status_text.update(f'扫描完成: 找到 {len(matched_pairs)} 个TXT文件')
|
||
|
||
# 启用相关按钮
|
||
window['编辑匹配'].update(disabled=False)
|
||
window['开始批量转换'].update(disabled=False)
|
||
|
||
except Exception as e:
|
||
sg.popup_error(f'扫描失败: {str(e)}')
|
||
status_text.update('状态: 扫描失败')
|
||
|
||
if event == '编辑匹配' and matched_pairs:
|
||
images_root = values['images_root']
|
||
if not images_root:
|
||
sg.popup_error('请选择图片根文件夹')
|
||
continue
|
||
|
||
# 打开匹配编辑窗口
|
||
matched_pairs = show_matching_editor(matched_pairs, images_root)
|
||
|
||
# 更新预览表格
|
||
table_data = []
|
||
for pair in matched_pairs:
|
||
img_folder = pair['image_folder']['relative_path'] if pair['image_folder'] else "无匹配"
|
||
table_data.append([
|
||
pair['txt']['name'],
|
||
pair['txt']['relative_path'],
|
||
img_folder
|
||
])
|
||
|
||
preview_table.update(values=table_data)
|
||
|
||
if event == '开始批量转换' and matched_pairs:
|
||
# 检查输出路径(当选择输出到指定文件夹时)
|
||
if config.output_location == "custom" and not values['output_root']:
|
||
sg.popup_error('请选择输出根文件夹(在"转换设置"中选择了"输出到指定文件夹")')
|
||
continue
|
||
|
||
try:
|
||
# 显示进度条
|
||
progress_bar.update(0, visible=True)
|
||
status_text.update('开始批量转换...')
|
||
window.refresh()
|
||
|
||
# 执行批量处理
|
||
def update_batch_progress(progress, text):
|
||
progress_bar.update(progress)
|
||
status_text.update(f'状态: {text}')
|
||
window.refresh()
|
||
|
||
results = BatchProcessor.process_batch(matched_pairs, values['output_root'], update_batch_progress)
|
||
|
||
# 显示结果
|
||
show_results_window(results)
|
||
status_text.update('状态: 批量转换完成')
|
||
|
||
except Exception as e:
|
||
sg.popup_error(f'批量处理失败: {str(e)}')
|
||
status_text.update('状态: 批量转换失败')
|
||
|
||
# 自动填充输出文件夹(如果未设置)
|
||
if (event == 'txt_folder' or event == 'images_root') and values[event] and not values['output_root']:
|
||
# 使用TXT文件夹作为默认输出文件夹
|
||
default_output = values['txt_folder'] if values['txt_folder'] else values['images_root']
|
||
window['output_root'].update(default_output)
|
||
|
||
window.close()
|
||
|
||
|
||
# 程序入口
|
||
if __name__ == '__main__':
|
||
main_window()
|