ArticleReplaceBatch/md2txt/txt2md2docx.py

1007 lines
37 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import sys
import glob
from PIL import Image
from docx import Document
from docx.shared import Inches, Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
import PySimpleGUI as sg
# 配置设置
class Config:
def __init__(self):
# 文件处理配置
self.txt_encoding = "utf-8"
self.match_pattern = "exact" # exact: 完全匹配, prefix: 前缀匹配, contains: 包含
self.output_location = "txt_folder" # txt_folder or custom
# 图片处理配置
self.image_sort_by = "name" # name or time
self.image_resize = "none" # none or width
self.image_width = 6 # 英寸
self.image_alignment = "center" # left, center, right
self.image_strategy = "cycle" # cycle, truncate, repeat_last
# 文档格式配置
self.line_spacing = 1.5
self.title_levels = 6 # 支持的最大标题层级
# 输出配置
self.output_subfolder = "converted_docs" # 输出子文件夹名称
# 全局配置实例
config = Config()
# 文件处理模块 - 增强文件夹和匹配处理
class FileHandler:
@staticmethod
def scan_txt_files(folder_path):
"""扫描文件夹中的所有TXT文件"""
if not os.path.isdir(folder_path):
raise Exception(f"TXT文件夹不存在: {folder_path}")
txt_files = []
# 递归扫描所有TXT文件
for root, dirs, files in os.walk(folder_path):
for file in files:
if file.lower().endswith(".txt"):
txt_path = os.path.join(root, file)
# 获取文件名(不含扩展名)
file_name = os.path.splitext(file)[0]
txt_files.append({
"path": txt_path,
"name": file_name,
"relative_path": os.path.relpath(txt_path, folder_path),
"folder": root # 存储文件所在的文件夹
})
if not txt_files:
raise Exception(f"{folder_path} 中未找到任何TXT文件")
return sorted(txt_files, key=lambda x: x["relative_path"])
@staticmethod
def find_matching_image_folders(txt_files, images_root):
"""根据TXT文件名匹配图片文件夹"""
if not os.path.isdir(images_root):
raise Exception(f"图片根文件夹不存在: {images_root}")
# 获取所有图片文件夹
all_image_folders = []
for root, dirs, _ in os.walk(images_root):
for dir in dirs:
folder_path = os.path.join(root, dir)
all_image_folders.append({
"path": folder_path,
"name": dir,
"relative_path": os.path.relpath(folder_path, images_root)
})
# 为每个TXT文件匹配图片文件夹
matched_pairs = []
for txt in txt_files:
matches = []
txt_name = txt["name"].lower()
for img_folder in all_image_folders:
folder_name = img_folder["name"].lower()
# 根据匹配模式查找匹配项
if config.match_pattern == "exact" and txt_name == folder_name:
matches.append(img_folder)
elif config.match_pattern == "prefix" and folder_name.startswith(txt_name):
matches.append(img_folder)
elif config.match_pattern == "contains" and txt_name in folder_name:
matches.append(img_folder)
# 优先选择相对路径最短的匹配项
if matches:
matches.sort(key=lambda x: len(x["relative_path"]))
matched_pairs.append({
"txt": txt,
"image_folder": matches[0],
"all_matches": matches
})
else:
matched_pairs.append({
"txt": txt,
"image_folder": None,
"all_matches": []
})
return matched_pairs
@staticmethod
def get_image_files(folder_path):
"""获取文件夹中的所有图片文件"""
if not folder_path or not os.path.isdir(folder_path):
return []
# 仅保留常见图片格式
image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.gif']
image_files = []
for ext in image_extensions:
image_files.extend(glob.glob(os.path.join(folder_path, ext)))
# 根据配置排序
if config.image_sort_by == "name":
image_files.sort()
elif config.image_sort_by == "time":
image_files.sort(key=lambda x: os.path.getmtime(x))
return image_files
@staticmethod
def read_markdown_txt(file_path):
"""读取含Markdown内容的TXT文件"""
if not os.path.exists(file_path):
raise Exception(f"TXT文件不存在: {file_path}")
# 尝试多种编码读取TXT文件
encodings = [config.txt_encoding, "gbk", "utf-16", "iso-8859-1"]
for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as f:
content = f.read()
content = content.replace("\r\n", "\n").replace("\r", "\n")
return content
except UnicodeDecodeError:
continue
raise Exception(f"无法解析TXT文件编码问题: {file_path}")
@staticmethod
def prepare_output_path(txt_info, images_root, output_root):
"""准备输出文件路径"""
# 根据配置决定输出位置
if config.output_location == "txt_folder":
# 输出到TXT文件所在的文件夹
base_folder = txt_info["folder"]
else:
# 输出到指定的根文件夹
base_folder = output_root
# 创建输出子文件夹
output_folder = os.path.join(base_folder, config.output_subfolder)
os.makedirs(output_folder, exist_ok=True)
# 生成输出文件名
txt_name = txt_info["name"]
output_path = os.path.join(output_folder, f"{txt_name}.docx")
# 处理文件名冲突
counter = 1
while os.path.exists(output_path):
output_path = os.path.join(output_folder, f"{txt_name}_{counter}.docx")
counter += 1
return output_path
# Markdown解析模块
class MarkdownParser:
@staticmethod
def parse(txt_content):
"""解析TXT中的Markdown内容"""
headings = []
current_heading = None
current_paragraphs = []
in_code_block = False
lines = txt_content.split('\n')
for line in lines:
line = line.rstrip('\r')
# 处理代码块
if line.startswith('```'):
in_code_block = not in_code_block
continue
if in_code_block:
continue
# 识别标题
if line.lstrip().startswith('#'):
level = 0
stripped_line = line.lstrip()
while level < len(stripped_line) and stripped_line[level] == '#' and level < config.title_levels:
level += 1
if level > 0 and (len(stripped_line) <= level or stripped_line[level] in (' ', '\t')):
if current_heading:
current_heading['paragraphs'] = current_paragraphs
headings.append(current_heading)
heading_text = stripped_line[level:].lstrip()
current_heading = {
'level': level,
'content': heading_text,
'paragraphs': []
}
current_paragraphs = []
continue
# 处理无标题内容
if current_heading is None:
current_heading = {
'level': 0,
'content': '前置内容',
'paragraphs': []
}
# 处理段落
if line.strip() == '':
if current_paragraphs and current_paragraphs[-1]['content'].strip() != '':
current_paragraphs.append({
'type': 'empty',
'content': '',
'is_first': False,
'formatting': {}
})
else:
para_type = 'text'
if line.startswith(('- ', '* ')):
para_type = 'unordered_list'
elif line.lstrip()[0].isdigit() and line.lstrip()[1:3] in ('. ', ') '):
para_type = 'ordered_list'
elif line.startswith('> '):
para_type = 'quote'
is_first = len(current_paragraphs) == 0 and not any(p['type'] == 'text' for p in current_paragraphs)
formatting = MarkdownParser.extract_formatting(line)
current_paragraphs.append({
'type': para_type,
'content': line,
'is_first': is_first,
'formatting': formatting
})
if current_heading:
current_heading['paragraphs'] = current_paragraphs
headings.append(current_heading)
return headings
@staticmethod
def extract_formatting(text):
"""提取文本格式"""
formatting = {
'bold': [],
'italic': [],
'code': []
}
# 提取粗体
start = 0
while start < len(text):
pos = text.find('**', start)
if pos == -1:
break
end = text.find('**', pos + 2)
if end == -1:
break
formatting['bold'].append((pos, end + 2))
start = end + 2
# 提取斜体
start = 0
while start < len(text):
pos = text.find('*', start)
if pos == -1:
break
if pos > 0 and text[pos - 1] == '*':
start = pos + 1
continue
end = text.find('*', pos + 1)
if end == -1:
break
formatting['italic'].append((pos, end + 1))
start = end + 1
# 提取代码
start = 0
while start < len(text):
pos = text.find('`', start)
if pos == -1:
break
end = text.find('`', pos + 1)
if end == -1:
break
formatting['code'].append((pos, end + 1))
start = end + 1
return formatting
# 图片处理模块
class ImageProcessor:
@staticmethod
def process_image(image_path):
"""处理图片"""
try:
with Image.open(image_path) as img:
# 处理图片方向
if hasattr(img, '_getexif'):
exif = img._getexif()
if exif:
orientation_tag = 274
if orientation_tag in exif:
orientation = exif[orientation_tag]
if orientation == 3:
img = img.rotate(180, expand=True)
elif orientation == 6:
img = img.rotate(270, expand=True)
elif orientation == 8:
img = img.rotate(90, expand=True)
# 调整大小
if config.image_resize == "width" and config.image_width > 0:
target_width_px = config.image_width * 96
width, height = img.size
if width > target_width_px:
ratio = target_width_px / width
new_height = int(height * ratio)
img = img.resize((int(target_width_px), new_height), Image.LANCZOS)
return img, config.image_width
else:
width_in = img.width / 96
return img, width_in
except Exception as e:
raise Exception(f"处理图片失败 {image_path}: {str(e)}")
@staticmethod
def get_image_alignment():
"""获取图片对齐方式"""
if config.image_alignment == "left":
return WD_ALIGN_PARAGRAPH.LEFT
elif config.image_alignment == "right":
return WD_ALIGN_PARAGRAPH.RIGHT
else:
return WD_ALIGN_PARAGRAPH.CENTER
# DOCX生成模块
class DocxGenerator:
@staticmethod
def generate(headings, image_files, output_path, progress_callback=None):
"""生成DOCX文档"""
doc = Document()
total_headings = len(headings)
image_index = 0
image_count = len(image_files)
for i, heading in enumerate(headings):
if progress_callback:
progress = int((i / total_headings) * 100)
progress_callback(progress, f"处理标题: {heading['content'][:30]}...")
# 添加标题
if heading['level'] > 0 and heading['level'] <= config.title_levels:
doc.add_heading(heading['content'], level=heading['level'])
else:
para = doc.add_paragraph(heading['content'])
run = para.runs[0]
run.font.size = Pt(14)
run.font.bold = True
para.space_after = Pt(12)
# 处理段落
paragraphs = heading['paragraphs']
if not paragraphs:
continue
# 处理第一段
first_para = paragraphs[0]
DocxGenerator.add_formatted_paragraph(doc, first_para)
# 插入图片
if image_count > 0 and image_index < image_count:
try:
img, width = ImageProcessor.process_image(image_files[image_index])
temp_img_path = None
if config.image_resize == "width":
temp_dir = os.path.dirname(output_path)
os.makedirs(temp_dir, exist_ok=True)
temp_img_path = os.path.join(temp_dir, f"temp_img_{image_index}.png")
img.save(temp_img_path)
img_path = temp_img_path
else:
img_path = image_files[image_index]
para = doc.add_picture(img_path, width=Inches(width))
para.alignment = ImageProcessor.get_image_alignment()
if temp_img_path and os.path.exists(temp_img_path):
os.remove(temp_img_path)
image_index += 1
if image_index >= image_count:
if config.image_strategy == "cycle":
image_index = 0
elif config.image_strategy == "truncate":
image_index = image_count
except Exception as e:
doc.add_paragraph(f"[图片插入失败: {str(e)}]")
# 添加剩余段落
for para in paragraphs[1:]:
DocxGenerator.add_formatted_paragraph(doc, para)
try:
doc.save(output_path)
if progress_callback:
progress_callback(100, "转换完成!")
return True
except Exception as e:
raise Exception(f"保存DOCX失败: {str(e)}")
@staticmethod
def add_formatted_paragraph(doc, paragraph_data):
"""添加带格式的段落"""
content = paragraph_data['content']
para_type = paragraph_data['type']
formatting = paragraph_data['formatting']
if para_type == 'unordered_list':
para = doc.add_paragraph(style='List Bullet')
text = content[2:].strip()
elif para_type == 'ordered_list':
para = doc.add_paragraph(style='List Number')
if '.' in content[:5]:
text = content.split('.', 1)[1].strip()
elif ')' in content[:5]:
text = content.split(')', 1)[1].strip()
else:
text = content.strip()
elif para_type == 'quote':
para = doc.add_paragraph(style='Intense Quote')
text = content[2:].strip()
elif para_type == 'empty':
doc.add_paragraph()
return
else:
para = doc.add_paragraph()
text = content.strip()
DocxGenerator.apply_formatting(para, text, formatting)
if config.line_spacing > 0:
para.paragraph_format.line_spacing = config.line_spacing
@staticmethod
def apply_formatting(paragraph, text, formatting):
"""应用文本格式"""
format_positions = []
for pos in formatting['bold']:
format_positions.append((pos[0], pos[1], 'bold'))
for pos in formatting['italic']:
format_positions.append((pos[0], pos[1], 'italic'))
for pos in formatting['code']:
format_positions.append((pos[0], pos[1], 'code'))
format_positions.sort(key=lambda x: x[0])
current_pos = 0
for start, end, fmt_type in format_positions:
if start > current_pos:
paragraph.add_run(text[current_pos:start])
run = paragraph.add_run(text[start:end])
if fmt_type == 'bold':
run.text = run.text[2:-2]
run.bold = True
elif fmt_type == 'italic':
run.text = run.text[1:-1]
run.italic = True
elif fmt_type == 'code':
run.text = run.text[1:-1]
run.font.name = 'Courier New'
run.font.size = Pt(10)
current_pos = end
if current_pos < len(text):
paragraph.add_run(text[current_pos:])
# 批量处理模块
class BatchProcessor:
@staticmethod
def process_batch(matched_pairs, output_root, progress_callback=None):
"""批量处理匹配的文件对"""
total = len(matched_pairs)
success_count = 0
failed_items = []
for i, pair in enumerate(matched_pairs):
try:
# 更新整体进度
if progress_callback:
overall_progress = int((i / total) * 100)
progress_callback(overall_progress,
f"处理 {i + 1}/{total}: {pair['txt']['name']}")
# 准备输出路径
output_path = FileHandler.prepare_output_path(
pair['txt'],
pair['image_folder']['path'] if pair['image_folder'] else "",
output_root
)
# 读取TXT内容
txt_content = FileHandler.read_markdown_txt(pair['txt']['path'])
# 解析内容
headings = MarkdownParser.parse(txt_content)
if not headings:
raise Exception("未解析到有效内容")
# 获取图片文件
image_files = []
if pair['image_folder']:
image_files = FileHandler.get_image_files(pair['image_folder']['path'])
# 生成DOCX
def update_file_progress(progress, text):
if progress_callback:
# 计算整体进度中的子进度
sub_progress = int((i + progress / 100) / total * 100)
progress_callback(sub_progress, f"{pair['txt']['name']}: {text}")
DocxGenerator.generate(headings, image_files, output_path, update_file_progress)
success_count += 1
except Exception as e:
failed_items.append({
"name": pair['txt']['name'],
"error": str(e)
})
# 确定主要输出文件夹用于结果显示
if matched_pairs and success_count > 0:
sample_output = FileHandler.prepare_output_path(matched_pairs[0]['txt'], "", output_root)
main_output_folder = os.path.dirname(sample_output)
else:
main_output_folder = ""
return {
"total": total,
"success": success_count,
"failed": len(failed_items),
"failed_items": failed_items,
"main_output_folder": main_output_folder
}
# 配置窗口
def show_config_window():
"""显示配置窗口"""
layout = [
[sg.Text('文件匹配设置', font=('bold', 12))],
[sg.Text('TXT编码:'),
sg.Combo(['utf-8', 'gbk', 'utf-16'],
default_value=config.txt_encoding, key='txt_encoding')],
[sg.Text('匹配模式:'),
sg.Radio('完全匹配(文件名与文件夹名相同)', 'match',
default=config.match_pattern == "exact", key='match_exact'),
sg.Radio('前缀匹配', 'match',
default=config.match_pattern == "prefix", key='match_prefix'),
sg.Radio('包含匹配', 'match',
default=config.match_pattern == "contains", key='match_contains')],
[sg.HSeparator()],
[sg.Text('输出位置设置', font=('bold', 12))],
[sg.Radio('输出到TXT文件所在文件夹', 'output_loc',
default=config.output_location == "txt_folder", key='output_txt_folder'),
sg.Radio('输出到指定文件夹', 'output_loc',
default=config.output_location == "custom", key='output_custom')],
[sg.Text('输出子文件夹名称:'),
sg.InputText(config.output_subfolder, key='output_subfolder')],
[sg.HSeparator()],
[sg.Text('图片处理设置', font=('bold', 12))],
[sg.Text('图片排序方式:'),
sg.Radio('按名称', 'sort', default=config.image_sort_by == "name", key='sort_name'),
sg.Radio('按修改时间', 'sort', default=config.image_sort_by == "time", key='sort_time')],
[sg.Text('图片尺寸调整:'),
sg.Radio('不调整', 'resize', default=config.image_resize == "none", key='resize_none'),
sg.Radio('按宽度:', 'resize', default=config.image_resize == "width", key='resize_width'),
sg.InputText(str(config.image_width), size=(5, 1), key='image_width'),
sg.Text('英寸')],
[sg.Text('图片对齐方式:'),
sg.Radio('左对齐', 'align', default=config.image_alignment == "left", key='align_left'),
sg.Radio('居中', 'align', default=config.image_alignment == "center", key='align_center'),
sg.Radio('右对齐', 'align', default=config.image_alignment == "right", key='align_right')],
[sg.HSeparator()],
[sg.Text('图片不足时策略', font=('bold', 12))],
[sg.Radio('循环使用', 'strategy', default=config.image_strategy == "cycle", key='strategy_cycle'),
sg.Radio('忽略多余标题', 'strategy', default=config.image_strategy == "truncate", key='strategy_truncate'),
sg.Radio('重复最后一张', 'strategy', default=config.image_strategy == "repeat_last", key='strategy_repeat')],
[sg.HSeparator()],
[sg.Button('确定'), sg.Button('取消')]
]
window = sg.Window('转换设置', layout, modal=True, resizable=True)
while True:
event, values = window.read()
if event in (sg.WIN_CLOSED, '取消'):
break
if event == '确定':
# 保存配置
config.txt_encoding = values['txt_encoding']
if values['match_exact']:
config.match_pattern = "exact"
elif values['match_prefix']:
config.match_pattern = "prefix"
else:
config.match_pattern = "contains"
# 保存输出位置设置
config.output_location = "txt_folder" if values['output_txt_folder'] else "custom"
if values['output_subfolder'].strip():
config.output_subfolder = values['output_subfolder'].strip()
config.image_sort_by = "name" if values['sort_name'] else "time"
config.image_resize = "none" if values['resize_none'] else "width"
try:
config.image_width = float(values['image_width'])
except:
pass
if values['align_left']:
config.image_alignment = "left"
elif values['align_right']:
config.image_alignment = "right"
else:
config.image_alignment = "center"
if values['strategy_cycle']:
config.image_strategy = "cycle"
elif values['strategy_truncate']:
config.image_strategy = "truncate"
else:
config.image_strategy = "repeat_last"
break
window.close()
# 匹配编辑窗口
def show_matching_editor(matched_pairs, images_root):
"""显示匹配编辑窗口,允许手动调整匹配关系"""
# 获取所有可用的图片文件夹
all_image_folders = []
if os.path.isdir(images_root):
for root, dirs, _ in os.walk(images_root):
for dir in dirs:
folder_path = os.path.join(root, dir)
rel_path = os.path.relpath(folder_path, images_root)
all_image_folders.append((folder_path, rel_path))
# 创建表格数据
table_data = []
for i, pair in enumerate(matched_pairs):
txt_name = pair['txt']['name']
img_folder = pair['image_folder']['relative_path'] if pair['image_folder'] else "无匹配"
table_data.append([i, txt_name, img_folder])
# 窗口布局
layout = [
[sg.Text('文件匹配编辑', font=('bold', 14))],
[sg.Text('选择要修改的项目,然后从右侧选择图片文件夹')],
[
sg.Table(
values=table_data,
headings=['序号', 'TXT文件名', '匹配的图片文件夹'],
key='-TABLE-',
select_mode=sg.TABLE_SELECT_MODE_BROWSE,
enable_events=True,
justification='left',
size=(None, 15)
),
sg.VSeparator(),
sg.Listbox(
values=[f[1] for f in all_image_folders],
key='-FOLDERS-',
size=(40, 15),
enable_events=True
)
],
[sg.Button('设置选中项'), sg.Button('清除选中项'), sg.Button('应用所有')]
]
window = sg.Window('匹配编辑', layout, resizable=True)
selected_row = None
while True:
event, values = window.read()
if event in (sg.WIN_CLOSED, '应用所有'):
break
# 表格选中事件
if event == '-TABLE-':
if values['-TABLE-']:
selected_row = values['-TABLE-'][0]
# 设置选中项
if event == '设置选中项' and selected_row is not None and values['-FOLDERS-']:
folder_idx = [i for i, f in enumerate(all_image_folders) if f[1] == values['-FOLDERS-'][0]][0]
folder_path, folder_rel = all_image_folders[folder_idx]
# 更新匹配数据
matched_pairs[selected_row]['image_folder'] = {
"path": folder_path,
"name": os.path.basename(folder_path),
"relative_path": folder_rel
}
# 更新表格
table_data[selected_row][2] = folder_rel
window['-TABLE-'].update(values=table_data)
# 清除选中项
if event == '清除选中项' and selected_row is not None:
matched_pairs[selected_row]['image_folder'] = None
table_data[selected_row][2] = "无匹配"
window['-TABLE-'].update(values=table_data)
window.close()
return matched_pairs
# 帮助窗口
def show_help_window():
"""显示帮助窗口"""
help_text = """
批量Markdown TXT转DOCX工具使用说明:
1. 选择包含Markdown内容的TXT文件所在文件夹
2. 选择图片文件夹的根目录(程序会自动查找子文件夹)
3. 选择输出文件的保存根目录(当选择"输出到指定文件夹"时有效)
4. 点击"扫描文件"按钮程序会自动匹配TXT文件和图片文件夹
5. 查看匹配结果,可点击"编辑匹配"调整匹配关系
6. 点击"开始批量转换"生成DOCX文件
输出路径选择:
- 输出到TXT文件所在文件夹: 每个DOCX文件会保存在对应TXT文件所在文件夹的子目录中
- 输出到指定文件夹: 所有DOCX文件会保存在您指定的文件夹的子目录中
匹配规则:
- 完全匹配: TXT文件名不含扩展名与图片文件夹名完全相同
- 前缀匹配: 图片文件夹名以前缀形式包含TXT文件名
- 包含匹配: 图片文件夹名中包含TXT文件名
转换规则:
- 每个小标题的第一段后会插入一张图片
- 支持Markdown格式: 标题、列表、粗体、斜体、代码等
"""
sg.popup_scrolled('使用帮助', help_text, size=(60, 20))
# 结果窗口
def show_results_window(results):
"""显示批量处理结果窗口"""
if results['failed'] == 0:
message = f"全部成功!\n共处理 {results['total']} 个文件,全部转换成功。"
if results['main_output_folder']:
message += f"\n主要输出文件夹: {results['main_output_folder']}"
sg.popup('处理完成', message)
else:
failed_text = "\n".join([f"- {item['name']}: {item['error']}" for item in results['failed_items']])
message = (f"处理完成!\n共处理 {results['total']} 个文件,"
f"{results['success']} 个成功,{results['failed']} 个失败。\n\n"
f"失败项:\n{failed_text}")
if results['main_output_folder']:
message += f"\n主要输出文件夹: {results['main_output_folder']}"
sg.popup_scrolled('处理完成', message, size=(60, 20))
# 询问是否打开输出文件夹
if results['main_output_folder'] and os.path.exists(results['main_output_folder']):
if sg.popup_yes_no('是否打开主要输出文件夹?') == 'Yes':
if sys.platform.startswith('win'):
os.startfile(results['main_output_folder'])
elif sys.platform.startswith('darwin'):
os.system(f'open "{results["main_output_folder"]}"')
else:
os.system(f'xdg-open "{results["main_output_folder"]}"')
# 主界面
def main_window():
"""主界面"""
sg.theme('BlueMono')
# 初始化变量
matched_pairs = []
layout = [
[sg.Text('批量Markdown TXT转DOCX工具', font=('bold', 16))],
[sg.Text('按文件名匹配TXT文件和图片文件夹', text_color='gray')],
[sg.HSeparator()],
[sg.Text('TXT文件文件夹:', size=(15, 1)),
sg.InputText(key='txt_folder', enable_events=True),
sg.FolderBrowse('浏览')],
[sg.Text('图片根文件夹:', size=(15, 1)),
sg.InputText(key='images_root', enable_events=True),
sg.FolderBrowse('浏览')],
[sg.Text('输出根文件夹:', size=(15, 1)),
sg.InputText(key='output_root', enable_events=True),
sg.FolderBrowse('浏览'),
sg.Text('(当选择"输出到指定文件夹"时有效)', text_color='gray')],
[sg.Button('扫描文件', size=(12, 1)),
sg.Button('编辑匹配', size=(12, 1), disabled=True),
sg.Button('转换设置', size=(12, 1)),
sg.Button('帮助', size=(8, 1))],
[sg.HSeparator()],
[sg.Text('匹配结果预览:', font=('bold', 10))],
[sg.Table(
values=[],
headings=['TXT文件名', '相对路径', '匹配的图片文件夹'],
key='-PREVIEW_TABLE-',
auto_size_columns=False,
col_widths=[20, 30, 30],
justification='left',
size=(None, 10)
)],
[sg.ProgressBar(100, orientation='h', size=(80, 20), key='progress_bar', visible=False)],
[sg.Text('状态: 就绪', key='status_text', size=(80, 1))],
[sg.Button('开始批量转换', size=(15, 1), disabled=True), sg.Button('退出')]
]
# 创建窗口时不使用finalized参数
window = sg.Window('批量Markdown TXT转DOCX工具', layout, resizable=True)
progress_bar = window['progress_bar']
status_text = window['status_text']
preview_table = window['-PREVIEW_TABLE-']
output_root_input = window['output_root']
def update_output_root_state():
"""根据配置更新输出根文件夹输入框的状态"""
if config.output_location == "custom":
output_root_input.update(disabled=False)
output_root_input.Widget.configure(foreground='black')
else:
output_root_input.update(disabled=True)
output_root_input.Widget.configure(foreground='gray')
# 先进行一次窗口读取来完成初始化,然后再更新元素状态
# 发送一个虚拟事件来触发首次更新
window.read(timeout=1)
update_output_root_state()
while True:
event, values = window.read()
if event in (sg.WIN_CLOSED, '退出'):
break
if event == '转换设置':
# 保存当前输出根目录
current_output_root = values['output_root']
# 显示配置窗口
show_config_window()
# 更新输出根目录输入框状态
update_output_root_state()
# 恢复输出根目录值
window['output_root'].update(current_output_root)
if event == '帮助':
show_help_window()
if event == '扫描文件':
txt_folder = values['txt_folder']
images_root = values['images_root']
if not txt_folder:
sg.popup_error('请选择TXT文件所在的文件夹')
continue
if not images_root:
sg.popup_error('请选择图片根文件夹')
continue
try:
status_text.update('正在扫描TXT文件...')
window.refresh()
txt_files = FileHandler.scan_txt_files(txt_folder)
status_text.update('正在匹配图片文件夹...')
window.refresh()
matched_pairs = FileHandler.find_matching_image_folders(txt_files, images_root)
# 更新预览表格
table_data = []
for pair in matched_pairs:
img_folder = pair['image_folder']['relative_path'] if pair['image_folder'] else "无匹配"
table_data.append([
pair['txt']['name'],
pair['txt']['relative_path'],
img_folder
])
preview_table.update(values=table_data)
status_text.update(f'扫描完成: 找到 {len(matched_pairs)} 个TXT文件')
# 启用相关按钮
window['编辑匹配'].update(disabled=False)
window['开始批量转换'].update(disabled=False)
except Exception as e:
sg.popup_error(f'扫描失败: {str(e)}')
status_text.update('状态: 扫描失败')
if event == '编辑匹配' and matched_pairs:
images_root = values['images_root']
if not images_root:
sg.popup_error('请选择图片根文件夹')
continue
# 打开匹配编辑窗口
matched_pairs = show_matching_editor(matched_pairs, images_root)
# 更新预览表格
table_data = []
for pair in matched_pairs:
img_folder = pair['image_folder']['relative_path'] if pair['image_folder'] else "无匹配"
table_data.append([
pair['txt']['name'],
pair['txt']['relative_path'],
img_folder
])
preview_table.update(values=table_data)
if event == '开始批量转换' and matched_pairs:
# 检查输出路径(当选择输出到指定文件夹时)
if config.output_location == "custom" and not values['output_root']:
sg.popup_error('请选择输出根文件夹(在"转换设置"中选择了"输出到指定文件夹"')
continue
try:
# 显示进度条
progress_bar.update(0, visible=True)
status_text.update('开始批量转换...')
window.refresh()
# 执行批量处理
def update_batch_progress(progress, text):
progress_bar.update(progress)
status_text.update(f'状态: {text}')
window.refresh()
results = BatchProcessor.process_batch(matched_pairs, values['output_root'], update_batch_progress)
# 显示结果
show_results_window(results)
status_text.update('状态: 批量转换完成')
except Exception as e:
sg.popup_error(f'批量处理失败: {str(e)}')
status_text.update('状态: 批量转换失败')
# 自动填充输出文件夹(如果未设置)
if (event == 'txt_folder' or event == 'images_root') and values[event] and not values['output_root']:
# 使用TXT文件夹作为默认输出文件夹
default_output = values['txt_folder'] if values['txt_folder'] else values['images_root']
window['output_root'].update(default_output)
window.close()
# 程序入口
if __name__ == '__main__':
main_window()