TxT2Docx/Txt2docx2.py

1635 lines
63 KiB
Python
Raw Normal View History

2025-09-10 10:35:03 +08:00
import os
import sys
import glob
import re
import random
import json
from typing import Tuple, List
from PIL import Image
from docx import Document
from docx.shared import Inches, Pt, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.style import WD_STYLE_TYPE
import PySimpleGUI as sg
from replacestr import replace_text
import configparser
CONFIG_FILE_PATH = os.path.join(os.path.expanduser("~"), ".txt2md2docx.ini")
# 错别字处理功能集成
def load_error_chars(db_path: str = "data/error_chars.json") -> dict:
"""加载错别字库"""
# 检查文件夹是否存在,不存在则创建
dir_name = os.path.dirname(db_path)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
# 检查文件是否存在,不存在则创建默认库
if not os.path.exists(db_path):
default_chars = {
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
"": "",
}
with open(db_path, 'w', encoding='utf-8') as f:
json.dump(default_chars, f, ensure_ascii=False, indent=2)
return default_chars
# 加载已存在的错别字库
with open(db_path, 'r', encoding='utf-8') as f:
return json.load(f)
def introduce_char_errors(text: str, intensity: float = 1.0, db_path: str = "data/error_chars.json") -> Tuple[
str, int, List[str], List[str]]:
"""
将文本中的正确单字替换为常见错误单字
参数:
text: 要处理的文本
intensity: 错误引入强度0.0-1.0之间1.0表示替换所有可能的字
db_path: 错别字库文件路径
返回:
处理后的文本替换的总数量原句列表处理后的句子列表
"""
# 加载错别字库
error_chars = load_error_chars(db_path)
# 句子拆分函数
def split_into_sentences(txt: str) -> List[str]:
separators = re.compile(r'([。!?;,.!?;])')
parts = separators.split(txt)
sentences = []
for i in range(0, len(parts) - 1, 2):
if parts[i] or parts[i + 1]:
sentences.append(parts[i] + parts[i + 1])
if len(parts) % 2 == 1 and parts[-1]:
sentences.append(parts[-1])
return sentences
# 单句错误引入函数
def introduce_errors_to_sentence(sentence: str) -> Tuple[str, int]:
modified = list(sentence)
replace_count = 0
for i, char in enumerate(modified):
if char in error_chars and random.random() <= intensity:
modified[i] = error_chars[char]
replace_count += 1
return ''.join(modified), replace_count
# 处理整个文本
original_sentences = split_into_sentences(text)
modified_sentences = []
total_replace = 0
for sentence in original_sentences:
modified, count = introduce_errors_to_sentence(sentence)
modified_sentences.append(modified)
total_replace += count
modified_text = ''.join(modified_sentences)
return modified_text, total_replace, original_sentences, modified_sentences
# 配置设置
class Config:
def __init__(self):
# 文件处理配置
self.txt_encoding = "utf-8"
self.match_pattern = "exact" # exact: 完全匹配, prefix: 前缀匹配, contains: 包含
self.output_location = "txt_folder" # txt_folder or custom
# 最近使用的文件夹路径
self.last_txt_folder = ""
self.last_images_root = ""
self.last_output_root = ""
# 文字处理
self.reverse_text_order = False # 转换文字顺序开关
# 错别字处理配置
self.enable_char_errors = False # 是否启用错别字处理
self.char_error_intensity = 0.3 # 错别字强度 0.0-1.0
self.char_error_db_path = "data/error_chars.json" # 错别字库路径
# 图片处理配置
self.image_sort_by = "name" # name or time
self.image_resize = "none" # none or width
self.image_width = 6 # 英寸
self.image_alignment = "center" # left, center, right
self.image_strategy = "cycle" # cycle, truncate, repeat_last
# 文档格式配置
self.line_spacing = 1.5
self.title_levels = 6 # 支持的最大标题层级
self.replace_punctuation = False # 是否替换标点符号
self.add_disclaimer = False # 是否添加免责声明
def load_from_file(self, file_path):
if not os.path.exists(file_path):
return False
config_parser = configparser.ConfigParser()
config_parser.read(file_path, encoding='utf-8')
# 加载文件处理配置
if 'FileHandling' in config_parser:
self.txt_encoding = config_parser.get('FileHandling', 'txt_encoding', fallback=self.txt_encoding)
self.match_pattern = config_parser.get('FileHandling', 'match_pattern', fallback=self.match_pattern)
self.output_location = config_parser.get('FileHandling', 'output_location',
fallback=self.output_location)
self.last_txt_folder = config_parser.get('FileHandling', 'last_txt_folder',
fallback=self.last_txt_folder)
self.last_images_root = config_parser.get('FileHandling', 'last_images_root',
fallback=self.last_images_root)
self.last_output_root = config_parser.get('FileHandling', 'last_output_root',
fallback=self.last_output_root)
# 加载文字处理配置
if 'TextProcessing' in config_parser:
self.reverse_text_order = config_parser.getboolean('TextProcessing', 'reverse_text_order',
fallback=self.reverse_text_order)
self.replace_punctuation = config_parser.getboolean('TextProcessing', 'replace_punctuation',
fallback=self.replace_punctuation)
self.add_disclaimer = config_parser.getboolean('TextProcessing', 'add_disclaimer',
fallback=self.add_disclaimer)
# 错别字处理配置
self.enable_char_errors = config_parser.getboolean('TextProcessing', 'enable_char_errors',
fallback=self.enable_char_errors)
self.char_error_intensity = config_parser.getfloat('TextProcessing', 'char_error_intensity',
fallback=self.char_error_intensity)
self.char_error_db_path = config_parser.get('TextProcessing', 'char_error_db_path',
fallback=self.char_error_db_path)
# 加载图片处理配置
if 'ImageProcessing' in config_parser:
self.image_sort_by = config_parser.get('ImageProcessing', 'image_sort_by', fallback=self.image_sort_by)
self.image_resize = config_parser.get('ImageProcessing', 'image_resize', fallback=self.image_resize)
self.image_width = config_parser.getfloat('ImageProcessing', 'image_width', fallback=self.image_width)
self.image_alignment = config_parser.get('ImageProcessing', 'image_alignment',
fallback=self.image_alignment)
self.image_strategy = config_parser.get('ImageProcessing', 'image_strategy',
fallback=self.image_strategy)
# 加载文档格式配置
if 'DocumentFormat' in config_parser:
self.line_spacing = config_parser.getfloat('DocumentFormat', 'line_spacing', fallback=self.line_spacing)
self.title_levels = config_parser.getint('DocumentFormat', 'title_levels', fallback=self.title_levels)
return True
def save_to_file(self, file_path):
config_parser = configparser.ConfigParser()
# 保存文件处理配置
config_parser['FileHandling'] = {
'txt_encoding': self.txt_encoding,
'match_pattern': self.match_pattern,
'output_location': self.output_location,
'last_txt_folder': self.last_txt_folder,
'last_images_root': self.last_images_root,
'last_output_root': self.last_output_root
}
# 保存文字处理配置
config_parser['TextProcessing'] = {
'reverse_text_order': str(self.reverse_text_order),
'replace_punctuation': str(self.replace_punctuation),
'add_disclaimer': str(self.add_disclaimer),
'enable_char_errors': str(self.enable_char_errors),
'char_error_intensity': str(self.char_error_intensity),
'char_error_db_path': self.char_error_db_path
}
# 保存图片处理配置
config_parser['ImageProcessing'] = {
'image_sort_by': self.image_sort_by,
'image_resize': self.image_resize,
'image_width': str(self.image_width),
'image_alignment': self.image_alignment,
'image_strategy': self.image_strategy
}
# 保存文档格式配置
config_parser['DocumentFormat'] = {
'line_spacing': str(self.line_spacing),
'title_levels': str(self.title_levels)
}
with open(file_path, 'w', encoding='utf-8') as f:
config_parser.write(f)
return True
# 全局配置实例
config = Config()
config.load_from_file(CONFIG_FILE_PATH)
# 文字处理工具类 - 增强功能
class TextProcessor:
@staticmethod
def replace_periods(text: str) -> str:
"""
将中间出现的句号统一替换为逗号
若文本末尾是句号则直接删除该句号
"""
text = text.rstrip()
if not text:
return ''
# 去掉末尾句号(如果有)
if text[-1] == '':
text = text[:-1]
# 把剩余句号替换为逗号
return text.replace('', '')
@staticmethod
def reverse_text_order(content):
"""反转文本顺序(按字符级反转)"""
if not content:
return content
return content[::-1]
@staticmethod
def reverse_paragraph_order(content):
"""反转段落顺序(保留段落内文字顺序)"""
if not content:
return content
paragraphs = content.split('\n')
return '\n'.join(reversed(paragraphs))
@staticmethod
def apply_char_errors(text: str) -> str:
"""应用错别字处理"""
if not config.enable_char_errors or not text:
return text
try:
modified_text, replace_count, _, _ = introduce_char_errors(
text,
config.char_error_intensity,
config.char_error_db_path
)
print(f"已应用错别字处理,替换了 {replace_count} 个字符。")
return modified_text
except Exception as e:
# 如果错别字处理出错,返回原文本
print(f"错别字处理出错: {e}")
return text
@staticmethod
def process_text_content(text):
"""统一处理文字内容:顺序调换、错别字处理和标点符号替换"""
if not text or not text.strip():
return text
# 先进行文字顺序处理
if config.reverse_text_order:
text = replace_text(text)
# 应用错别字处理
text = TextProcessor.apply_char_errors(text)
# 最后进行标点符号替换
if config.replace_punctuation:
text = TextProcessor.replace_periods(text)
return text
# 增强的Markdown解析器
class MarkdownParser:
# Markdown格式匹配模式
PATTERNS = {
'heading': re.compile(r'^(\s*)(#{1,6})\s+(.+)$'),
'bold_asterisk': re.compile(r'\*\*(.+?)\*\*'),
'bold_underscore': re.compile(r'__(.+?)__'),
'italic_asterisk': re.compile(r'(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)'),
'italic_underscore': re.compile(r'_(.+?)_'),
'code_inline': re.compile(r'`([^`]+)`'),
'code_block': re.compile(r'^```(\w+)?\s*\n(.*?)\n```', re.MULTILINE | re.DOTALL),
'strikethrough': re.compile(r'~~(.+?)~~'),
'link': re.compile(r'\[([^\]]+)\]\(([^)]+)\)'),
'image': re.compile(r'!\[([^\]]*)\]\(([^)]+)\)'),
'unordered_list': re.compile(r'^\s*[-*+]\s+(.+)$'),
'ordered_list': re.compile(r'^\s*\d+\.\s+(.+)$'),
'blockquote': re.compile(r'^\s*>\s*(.+)$'),
'horizontal_rule': re.compile(r'^(\s*[-*_]){3,}\s*$'),
'table_row': re.compile(r'^\|(.+)\|$'),
'table_separator': re.compile(r'^\|(\s*:?-+:?\s*\|)+$')
}
@staticmethod
def parse(txt_content):
"""解析Markdown内容为结构化数据"""
elements = []
lines = txt_content.split('\n')
i = 0
current_section = None
in_code_block = False
code_block_content = []
table_mode = False
table_rows = []
while i < len(lines):
line = lines[i].rstrip('\r')
original_line = line
# 处理代码块
if line.strip().startswith('```'):
if not in_code_block:
in_code_block = True
language = line.strip()[3:].strip()
code_block_content = []
i += 1
continue
else:
in_code_block = False
elements.append({
'type': 'code_block',
'language': language if 'language' in locals() else '',
'content': '\n'.join(code_block_content),
'level': 0
})
code_block_content = []
i += 1
continue
if in_code_block:
code_block_content.append(line)
i += 1
continue
# 处理表格
table_match = MarkdownParser.PATTERNS['table_row'].match(line)
table_sep_match = MarkdownParser.PATTERNS['table_separator'].match(line)
if table_match or table_sep_match:
if not table_mode:
table_mode = True
table_rows = []
if table_match and not table_sep_match:
cells = [cell.strip() for cell in table_match.group(1).split('|')]
table_rows.append(cells)
i += 1
continue
elif table_mode:
# 表格结束
if table_rows:
elements.append({
'type': 'table',
'rows': table_rows,
'level': 0
})
table_mode = False
table_rows = []
# 处理标题
heading_match = MarkdownParser.PATTERNS['heading'].match(line)
if heading_match:
level = len(heading_match.group(2))
if level <= config.title_levels:
# 提取标题文本(可能包含粗体等格式)
heading_text = heading_match.group(3).strip()
# 先移除Markdown标记但保留文本内容
cleaned_text = re.sub(r'\*\*(.+?)\*\*|__(.+?)__', r'\1\2', heading_text)
elements.append({
'type': 'heading',
'level': level,
'content': heading_text, # 保留原始内容用于格式处理
'cleaned_content': cleaned_text # 用于显示的纯文本
})
current_section = elements[-1]
current_section['paragraphs'] = []
i += 1
continue
# 处理水平分隔线
if MarkdownParser.PATTERNS['horizontal_rule'].match(line):
elements.append({
'type': 'horizontal_rule',
'level': 0
})
i += 1
continue
# 处理列表
ul_match = MarkdownParser.PATTERNS['unordered_list'].match(line)
ol_match = MarkdownParser.PATTERNS['ordered_list'].match(line)
if ul_match:
elements.append({
'type': 'unordered_list',
'content': ul_match.group(1),
'level': 0
})
i += 1
continue
if ol_match:
elements.append({
'type': 'ordered_list',
'content': ol_match.group(1),
'level': 0
})
i += 1
continue
# 处理引用
quote_match = MarkdownParser.PATTERNS['blockquote'].match(line)
if quote_match:
elements.append({
'type': 'blockquote',
'content': quote_match.group(1),
'level': 0
})
i += 1
continue
# 处理空行
if line.strip() == '':
elements.append({
'type': 'empty',
'content': '',
'level': 0
})
i += 1
continue
# 处理普通段落
elements.append({
'type': 'paragraph',
'content': line,
'level': 0
})
i += 1
# 处理剩余的表格
if table_mode and table_rows:
elements.append({
'type': 'table',
'rows': table_rows,
'level': 0
})
return MarkdownParser.group_by_sections(elements)
@staticmethod
def group_by_sections(elements):
"""将解析的元素按标题分组"""
sections = []
current_section = {
'type': 'section',
'level': 0,
'content': '前置内容',
'elements': []
}
for element in elements:
if element['type'] == 'heading':
# 保存当前section
if current_section['elements']:
sections.append(current_section)
# 创建新section
current_section = {
'type': 'section',
'level': element['level'],
'content': element['content'],
'elements': []
}
else:
current_section['elements'].append(element)
# 添加最后一个section
if current_section['elements']:
sections.append(current_section)
return sections
@staticmethod
def extract_inline_formatting(text):
"""提取行内格式信息"""
formatting = []
# 提取粗体 (**)
for match in MarkdownParser.PATTERNS['bold_asterisk'].finditer(text):
formatting.append({
'type': 'bold',
'start': match.start(),
'end': match.end(),
'content': match.group(1)
})
# 提取粗体 (__)
for match in MarkdownParser.PATTERNS['bold_underscore'].finditer(text):
formatting.append({
'type': 'bold',
'start': match.start(),
'end': match.end(),
'content': match.group(1)
})
# 提取斜体 (*)
for match in MarkdownParser.PATTERNS['italic_asterisk'].finditer(text):
# 检查是否与粗体重叠
overlaps = any(f['start'] <= match.start() < f['end'] or f['start'] < match.end() <= f['end']
for f in formatting if f['type'] == 'bold')
if not overlaps:
formatting.append({
'type': 'italic',
'start': match.start(),
'end': match.end(),
'content': match.group(1)
})
# 提取斜体 (_)
for match in MarkdownParser.PATTERNS['italic_underscore'].finditer(text):
overlaps = any(f['start'] <= match.start() < f['end'] or f['start'] < match.end() <= f['end']
for f in formatting if f['type'] in ['bold', 'italic'])
if not overlaps:
formatting.append({
'type': 'italic',
'start': match.start(),
'end': match.end(),
'content': match.group(1)
})
# 提取行内代码
for match in MarkdownParser.PATTERNS['code_inline'].finditer(text):
formatting.append({
'type': 'code',
'start': match.start(),
'end': match.end(),
'content': match.group(1)
})
# 提取删除线
for match in MarkdownParser.PATTERNS['strikethrough'].finditer(text):
formatting.append({
'type': 'strikethrough',
'start': match.start(),
'end': match.end(),
'content': match.group(1)
})
# 提取链接
for match in MarkdownParser.PATTERNS['link'].finditer(text):
formatting.append({
'type': 'link',
'start': match.start(),
'end': match.end(),
'text': match.group(1),
'url': match.group(2)
})
# 按位置排序
formatting.sort(key=lambda x: x['start'])
return formatting
# 文件处理模块
class FileHandler:
@staticmethod
def scan_txt_files(folder_path):
"""扫描文件夹中的所有TXT文件"""
if not os.path.isdir(folder_path):
raise Exception(f"TXT文件夹不存在: {folder_path}")
txt_files = []
for root, dirs, files in os.walk(folder_path):
for file in files:
if file.lower().endswith(".txt"):
txt_path = os.path.join(root, file)
file_name = os.path.splitext(file)[0]
txt_files.append({
"path": txt_path,
"name": file_name,
"relative_path": os.path.relpath(txt_path, folder_path),
"folder": root
})
if not txt_files:
raise Exception(f"{folder_path} 中未找到任何TXT文件")
return sorted(txt_files, key=lambda x: x["relative_path"])
@staticmethod
def find_matching_image_folders(txt_files, images_root):
"""根据TXT文件名匹配图片文件夹"""
if not os.path.isdir(images_root):
raise Exception(f"图片根文件夹不存在: {images_root}")
all_image_folders = []
for root, dirs, _ in os.walk(images_root):
for dir in dirs:
folder_path = os.path.join(root, dir)
all_image_folders.append({
"path": folder_path,
"name": dir,
"relative_path": os.path.relpath(folder_path, images_root)
})
matched_pairs = []
for txt in txt_files:
matches = []
txt_name = txt["name"].lower()
for img_folder in all_image_folders:
folder_name = img_folder["name"].lower()
if config.match_pattern == "exact" and txt_name == folder_name:
matches.append(img_folder)
elif config.match_pattern == "prefix" and folder_name.startswith(txt_name):
matches.append(img_folder)
elif config.match_pattern == "contains" and txt_name in folder_name:
matches.append(img_folder)
if matches:
matches.sort(key=lambda x: len(x["relative_path"]))
matched_pairs.append({
"txt": txt,
"image_folder": matches[0],
"all_matches": matches
})
else:
matched_pairs.append({
"txt": txt,
"image_folder": None,
"all_matches": []
})
return matched_pairs
@staticmethod
def get_image_files(folder_path):
"""获取文件夹中的所有图片文件"""
if not folder_path or not os.path.isdir(folder_path):
return []
image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.gif', '*.webp', '*.tiff']
image_files = []
for ext in image_extensions:
image_files.extend(glob.glob(os.path.join(folder_path, ext)))
if config.image_sort_by == "name":
image_files.sort()
elif config.image_sort_by == "time":
image_files.sort(key=lambda x: os.path.getmtime(x))
return image_files
@staticmethod
def read_markdown_txt(file_path):
"""读取含Markdown内容的TXT文件"""
if not os.path.exists(file_path):
raise Exception(f"TXT文件不存在: {file_path}")
encodings = [config.txt_encoding, "gbk", "utf-16", "iso-8859-1"]
for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as f:
content = f.read()
content = content.replace("\r\n", "\n").replace("\r", "\n")
return content
except UnicodeDecodeError:
continue
raise Exception(f"无法解析TXT文件编码问题: {file_path}")
@staticmethod
def prepare_output_path(txt_info, images_root, output_root):
"""准备输出文件路径"""
if config.output_location == "txt_folder":
base_folder = txt_info["folder"]
else:
base_folder = output_root
os.makedirs(base_folder, exist_ok=True)
txt_name = txt_info["name"]
output_path = os.path.join(base_folder, f"{txt_name}.docx")
counter = 1
while os.path.exists(output_path):
output_path = os.path.join(base_folder, f"{txt_name}_{counter}.docx")
counter += 1
return output_path
# 图片处理模块
class ImageProcessor:
@staticmethod
def process_image(image_path):
"""处理图片"""
try:
with Image.open(image_path) as img:
# 处理图片方向
if hasattr(img, '_getexif'):
exif = img._getexif()
if exif:
orientation_tag = 274
if orientation_tag in exif:
orientation = exif[orientation_tag]
if orientation == 3:
img = img.rotate(180, expand=True)
elif orientation == 6:
img = img.rotate(270, expand=True)
elif orientation == 8:
img = img.rotate(90, expand=True)
# 调整大小
if config.image_resize == "width" and config.image_width > 0:
target_width_px = config.image_width * 96
width, height = img.size
if width > target_width_px:
ratio = target_width_px / width
new_height = int(height * ratio)
img = img.resize((int(target_width_px), new_height), Image.LANCZOS)
return img, config.image_width
else:
width_in = img.width / 96
return img, width_in
except Exception as e:
raise Exception(f"处理图片失败 {image_path}: {str(e)}")
@staticmethod
def get_image_alignment():
"""获取图片对齐方式"""
if config.image_alignment == "left":
return WD_ALIGN_PARAGRAPH.LEFT
elif config.image_alignment == "right":
return WD_ALIGN_PARAGRAPH.RIGHT
else:
return WD_ALIGN_PARAGRAPH.CENTER
DISCLAIMER_TEXT = """`[免责声明]文章的时间、过程、图片均来自于网络,文章旨在传播正能量,均无低俗等不良引导,请观众勿对号入座,并上升到人身攻击等方面。观众理性看待本事件,切勿留下主观臆断的恶意评论,互联网不是法外之地。本文如若真实性存在争议、事件版权或图片侵权问题,请及时联系作者,我们将予以删除。`"""
# DOCX生成模块 - 完全重构
class DocxGenerator:
@staticmethod
def generate(sections, image_files, output_path, progress_callback=None):
"""生成DOCX文档 - 重构版本"""
doc = Document()
total_sections = len(sections)
image_index = 0
image_count = len(image_files)
for i, section in enumerate(sections):
if progress_callback:
progress = int((i / total_sections) * 100)
progress_callback(progress, f"处理章节: {section['content'][:30]}...")
# 添加标题
if section['level'] > 0 and section['level'] <= config.title_levels:
# 使用原始带格式的内容进行处理
heading_text = TextProcessor.process_text_content(section['content'])
# 创建标题段落
para = doc.add_heading(level=section['level'])
# 应用行内格式(包括粗体)
DocxGenerator.apply_inline_formatting(para, heading_text)
elif section['content'] != '前置内容':
heading_text = TextProcessor.process_text_content(section['content'])
para = doc.add_paragraph()
run = para.add_run(heading_text)
run.font.size = Pt(14)
run.font.bold = True
para.space_after = Pt(12)
# 处理章节中的元素
elements = section.get('elements', [])
if not elements:
continue
# 处理第一个非空元素后插入图片
first_content_added = False
for element in elements:
# 添加元素到文档
DocxGenerator.add_element_to_doc(doc, element)
# 在第一个内容元素后插入图片
if not first_content_added and element['type'] not in ['empty']:
first_content_added = True
# 插入图片
if image_count > 0 and image_index < image_count:
try:
DocxGenerator.insert_image(doc, image_files[image_index], output_path)
image_index += 1
if image_index >= image_count:
if config.image_strategy == "cycle":
image_index = 0
elif config.image_strategy == "truncate":
image_index = image_count
except Exception as e:
doc.add_paragraph(f"[图片插入失败: {str(e)}]")
# 添加免责声明
if config.add_disclaimer:
doc.add_paragraph("---")
para = doc.add_paragraph()
disclaimer_text = TextProcessor.process_text_content(DISCLAIMER_TEXT)
run = para.add_run(disclaimer_text)
run.font.size = Pt(10)
para.paragraph_format.line_spacing = 1.0
try:
doc.save(output_path)
if progress_callback:
progress_callback(100, "转换完成!")
return True
except Exception as e:
raise Exception(f"保存DOCX失败: {str(e)}")
@staticmethod
def add_element_to_doc(doc, element):
"""将解析的元素添加到文档中"""
etype = element['type']
content = TextProcessor.process_text_content(element.get('content', ''))
if etype == 'paragraph':
DocxGenerator.add_formatted_paragraph(doc, content)
elif etype == 'unordered_list':
para = doc.add_paragraph(style='List Bullet')
DocxGenerator.apply_inline_formatting(para, content)
elif etype == 'ordered_list':
para = doc.add_paragraph(style='List Number')
DocxGenerator.apply_inline_formatting(para, content)
elif etype == 'blockquote':
para = doc.add_paragraph(style='Quote')
DocxGenerator.apply_inline_formatting(para, content)
elif etype == 'code_block':
para = doc.add_paragraph(style='No Spacing')
run = para.add_run(element['content'])
run.font.name = 'Courier New'
run.font.size = Pt(10)
elif etype == 'table':
DocxGenerator.add_table_to_doc(doc, element['rows'])
elif etype == 'horizontal_rule':
DocxGenerator.add_horizontal_rule(doc)
elif etype == 'empty':
doc.add_paragraph()
@staticmethod
def add_horizontal_rule(doc):
"""在文档中添加横线"""
para = doc.add_paragraph()
run = para.add_run()
# 添加水平线条(使用下划线作为横线)
run.font.underline = True
run.text = " " * 100 # 足够长的下划线作为横线
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
@staticmethod
def add_table_to_doc(doc, rows):
"""添加表格到文档"""
if not rows:
return
table = doc.add_table(rows=len(rows), cols=len(rows[0]))
table.style = 'Table Grid'
for i, row_data in enumerate(rows):
row_cells = table.rows[i].cells
for j, cell_data in enumerate(row_data):
if j < len(row_cells):
# 处理单元格内容的格式和文字处理
processed_text = TextProcessor.process_text_content(cell_data)
row_cells[j].text = processed_text
@staticmethod
def insert_image(doc, image_path, output_path):
"""插入图片到文档"""
img, width = ImageProcessor.process_image(image_path)
temp_img_path = None
if config.image_resize == "width":
temp_dir = os.path.dirname(output_path)
os.makedirs(temp_dir, exist_ok=True)
temp_img_path = os.path.join(temp_dir, f"temp_img_{hash(image_path)}.png")
img.save(temp_img_path)
img_path = temp_img_path
else:
img_path = image_path
para = doc.add_paragraph()
run = para.runs[0] if para.runs else para.add_run()
run.add_picture(img_path, width=Inches(width))
para.alignment = ImageProcessor.get_image_alignment()
if temp_img_path and os.path.exists(temp_img_path):
try:
os.remove(temp_img_path)
except:
pass # 忽略删除临时文件的错误
@staticmethod
def add_formatted_paragraph(doc, content):
"""添加带格式的段落"""
if not content or not content.strip():
doc.add_paragraph()
return
para = doc.add_paragraph()
DocxGenerator.apply_inline_formatting(para, content)
if config.line_spacing > 0:
para.paragraph_format.line_spacing = config.line_spacing
@staticmethod
def apply_inline_formatting(paragraph, text):
"""应用行内格式到段落"""
# 首先处理文字内容(顺序调换、错别字和标点符号替换)
processed_text = TextProcessor.process_text_content(text)
# 重新提取格式信息(因为文字可能已经改变)
formatting = MarkdownParser.extract_inline_formatting(processed_text)
# 如果没有格式,直接添加文本
if not formatting:
paragraph.add_run(processed_text)
return
current_pos = 0
for fmt in formatting:
# 添加格式前的普通文本
if fmt['start'] > current_pos:
paragraph.add_run(processed_text[current_pos:fmt['start']])
# 创建格式化的run
if fmt['type'] == 'bold':
# 移除markdown标记并应用格式
clean_text = re.sub(r'\*\*(.+?)\*\*|__(.+?)__', r'\1\2', processed_text[fmt['start']:fmt['end']])
run = paragraph.add_run(clean_text)
run.bold = True
elif fmt['type'] == 'italic':
clean_text = re.sub(r'(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)|_(.+?)_', r'\1\2',
processed_text[fmt['start']:fmt['end']])
run = paragraph.add_run(clean_text)
run.italic = True
elif fmt['type'] == 'code':
clean_text = re.sub(r'`([^`]+)`', r'\1', processed_text[fmt['start']:fmt['end']])
run = paragraph.add_run(clean_text)
run.font.name = 'Courier New'
run.font.size = Pt(10)
elif fmt['type'] == 'strikethrough':
clean_text = re.sub(r'~~(.+?)~~', r'\1', processed_text[fmt['start']:fmt['end']])
run = paragraph.add_run(clean_text)
run.font.strike = True
elif fmt['type'] == 'link':
# 对于链接,只显示链接文本
run = paragraph.add_run(fmt['text'])
run.font.color.rgb = RGBColor(0, 0, 255) # 蓝色
run.underline = True
current_pos = fmt['end']
# 添加剩余的普通文本
if current_pos < len(processed_text):
paragraph.add_run(processed_text[current_pos:])
# 批量处理模块
class BatchProcessor:
@staticmethod
def process_batch(matched_pairs, output_root, progress_callback=None):
"""批量处理匹配的文件对"""
total = len(matched_pairs)
success_count = 0
failed_items = []
for i, pair in enumerate(matched_pairs):
try:
if progress_callback:
overall_progress = int((i / total) * 100)
progress_callback(overall_progress,
f"处理 {i + 1}/{total}: {pair['txt']['name']}")
# 准备输出路径
output_path = FileHandler.prepare_output_path(
pair['txt'],
pair['image_folder']['path'] if pair['image_folder'] else "",
output_root
)
# 读取TXT内容
txt_content = FileHandler.read_markdown_txt(pair['txt']['path'])
# 解析内容为结构化数据
sections = MarkdownParser.parse(txt_content)
if not sections:
raise Exception("未解析到有效内容")
# 获取图片文件
image_files = []
if pair['image_folder']:
image_files = FileHandler.get_image_files(pair['image_folder']['path'])
# 生成DOCX
def update_file_progress(progress, text):
if progress_callback:
sub_progress = int((i + progress / 100) / total * 100)
progress_callback(sub_progress, f"{pair['txt']['name']}: {text}")
DocxGenerator.generate(sections, image_files, output_path, update_file_progress)
success_count += 1
except Exception as e:
failed_items.append({
"name": pair['txt']['name'],
"error": str(e)
})
# 确定主要输出文件夹
if matched_pairs and success_count > 0:
sample_output = FileHandler.prepare_output_path(matched_pairs[0]['txt'], "", output_root)
main_output_folder = os.path.dirname(sample_output)
else:
main_output_folder = ""
return {
"total": total,
"success": success_count,
"failed": len(failed_items),
"failed_items": failed_items,
"main_output_folder": main_output_folder
}
# 配置窗口 - 优化排版
def show_config_window():
"""显示配置窗口 - 优化排版"""
# 创建标签页布局
tab_file_layout = [
[sg.Text('文件处理设置', font=('bold', 12))],
[sg.HSeparator()],
[sg.Text('TXT编码:', size=(12, 1)),
sg.Combo(['utf-8', 'gbk', 'utf-16'], default_value=config.txt_encoding, key='txt_encoding', size=(15, 1))],
[sg.Text('匹配模式:', size=(12, 1))],
[sg.Radio('完全匹配(文件名与文件夹名相同)', 'match', default=config.match_pattern == "exact",
key='match_exact')],
[sg.Radio('前缀匹配', 'match', default=config.match_pattern == "prefix", key='match_prefix')],
[sg.Radio('包含匹配', 'match', default=config.match_pattern == "contains", key='match_contains')],
[sg.HSeparator()],
[sg.Text('输出位置:', size=(12, 1))],
[sg.Radio('输出到TXT文件所在文件夹', 'output_loc', default=config.output_location == "txt_folder",
key='output_txt_folder')],
[sg.Radio('输出到指定文件夹', 'output_loc', default=config.output_location == "custom", key='output_custom')]
]
tab_text_layout = [
[sg.Text('文字处理设置', font=('bold', 12))],
[sg.HSeparator()],
[sg.Checkbox('转换文字顺序', key='-REVERSE_TEXT-', default=config.reverse_text_order)],
[sg.Checkbox('替换标点符号(句号转逗号,保留结尾句号)', key='-REPLACE_PUNCTUATION-',
default=config.replace_punctuation)],
[sg.HSeparator()],
[sg.Text('错别字处理', font=('bold', 11), text_color='darkblue')],
[sg.Checkbox('启用错别字处理', key='-ENABLE_CHAR_ERRORS-', default=config.enable_char_errors,
enable_events=True)],
[sg.Text('错误强度:', size=(10, 1)),
sg.Slider(range=(0.0, 1.0), default_value=config.char_error_intensity, resolution=0.1,
orientation='h', size=(20, 15), key='char_error_intensity', disabled=not config.enable_char_errors)],
[sg.Text('错别字库路径:', size=(12, 1)),
sg.InputText(config.char_error_db_path, key='char_error_db_path', size=(30, 1),
disabled=not config.enable_char_errors),
sg.FileBrowse('浏览', file_types=(("JSON Files", "*.json"),), disabled=not config.enable_char_errors)],
[sg.HSeparator()],
[sg.Checkbox('添加免责声明', key='-ADD_DISCLAIMER-', default=config.add_disclaimer)]
]
tab_image_layout = [
[sg.Text('图片处理设置', font=('bold', 12))],
[sg.HSeparator()],
[sg.Text('图片排序方式:', size=(12, 1))],
[sg.Radio('按名称', 'sort', default=config.image_sort_by == "name", key='sort_name'),
sg.Radio('按修改时间', 'sort', default=config.image_sort_by == "time", key='sort_time')],
[sg.HSeparator()],
[sg.Text('图片尺寸调整:', size=(12, 1))],
[sg.Radio('不调整', 'resize', default=config.image_resize == "none", key='resize_none')],
[sg.Radio('按宽度:', 'resize', default=config.image_resize == "width", key='resize_width'),
sg.InputText(str(config.image_width), size=(8, 1), key='image_width'),
sg.Text('英寸')],
[sg.HSeparator()],
[sg.Text('图片对齐方式:', size=(12, 1))],
[sg.Radio('左对齐', 'align', default=config.image_alignment == "left", key='align_left'),
sg.Radio('居中', 'align', default=config.image_alignment == "center", key='align_center'),
sg.Radio('右对齐', 'align', default=config.image_alignment == "right", key='align_right')],
[sg.HSeparator()],
[sg.Text('图片不足时策略:', size=(12, 1))],
[sg.Radio('循环使用', 'strategy', default=config.image_strategy == "cycle", key='strategy_cycle')],
[sg.Radio('忽略多余标题', 'strategy', default=config.image_strategy == "truncate", key='strategy_truncate')],
[sg.Radio('重复最后一张', 'strategy', default=config.image_strategy == "repeat_last", key='strategy_repeat')]
]
tab_format_layout = [
[sg.Text('文档格式设置', font=('bold', 12))],
[sg.HSeparator()],
[sg.Text('行间距:', size=(12, 1)),
sg.InputText(str(config.line_spacing), size=(8, 1), key='line_spacing')],
[sg.Text('最大标题层级:', size=(12, 1)),
sg.Combo([1, 2, 3, 4, 5, 6], default_value=config.title_levels, key='title_levels', size=(8, 1))]
]
layout = [
[sg.TabGroup([
[sg.Tab('文件处理', tab_file_layout, key='tab_file')],
[sg.Tab('文字处理', tab_text_layout, key='tab_text')],
[sg.Tab('图片处理', tab_image_layout, key='tab_image')],
[sg.Tab('文档格式', tab_format_layout, key='tab_format')]
])],
[sg.HSeparator()],
[sg.Button('确定', size=(10, 1)), sg.Button('取消', size=(10, 1)), sg.Button('重置为默认', size=(12, 1))]
]
window = sg.Window('转换设置', layout, modal=True, resizable=True, size=(500, 450))
while True:
event, values = window.read()
if event in (sg.WIN_CLOSED, '取消'):
break
# 处理错别字启用/禁用事件
if event == '-ENABLE_CHAR_ERRORS-':
enabled = values['-ENABLE_CHAR_ERRORS-']
window['char_error_intensity'].update(disabled=not enabled)
window['char_error_db_path'].update(disabled=not enabled)
if event == '重置为默认':
# 重置为默认值
default_config = Config()
window['txt_encoding'].update(default_config.txt_encoding)
window['match_exact'].update(True)
window['output_txt_folder'].update(True)
window['-REVERSE_TEXT-'].update(default_config.reverse_text_order)
window['-REPLACE_PUNCTUATION-'].update(default_config.replace_punctuation)
window['-ENABLE_CHAR_ERRORS-'].update(default_config.enable_char_errors)
window['char_error_intensity'].update(default_config.char_error_intensity)
window['char_error_db_path'].update(default_config.char_error_db_path)
window['-ADD_DISCLAIMER-'].update(default_config.add_disclaimer)
window['sort_name'].update(True)
window['resize_none'].update(True)
window['image_width'].update(str(default_config.image_width))
window['align_center'].update(True)
window['strategy_cycle'].update(True)
window['line_spacing'].update(str(default_config.line_spacing))
window['title_levels'].update(default_config.title_levels)
if event == '确定':
# 保存配置
config.txt_encoding = values['txt_encoding']
if values['match_exact']:
config.match_pattern = "exact"
elif values['match_prefix']:
config.match_pattern = "prefix"
else:
config.match_pattern = "contains"
config.output_location = "txt_folder" if values['output_txt_folder'] else "custom"
config.image_sort_by = "name" if values['sort_name'] else "time"
config.image_resize = "none" if values['resize_none'] else "width"
config.reverse_text_order = values['-REVERSE_TEXT-']
config.replace_punctuation = values['-REPLACE_PUNCTUATION-']
config.add_disclaimer = values['-ADD_DISCLAIMER-']
# 错别字处理配置
config.enable_char_errors = values['-ENABLE_CHAR_ERRORS-']
config.char_error_intensity = values['char_error_intensity']
config.char_error_db_path = values['char_error_db_path']
try:
config.image_width = float(values['image_width'])
except:
pass
if values['align_left']:
config.image_alignment = "left"
elif values['align_right']:
config.image_alignment = "right"
else:
config.image_alignment = "center"
if values['strategy_cycle']:
config.image_strategy = "cycle"
elif values['strategy_truncate']:
config.image_strategy = "truncate"
else:
config.image_strategy = "repeat_last"
try:
config.line_spacing = float(values['line_spacing'])
config.title_levels = int(values['title_levels'])
except:
pass
config.save_to_file(CONFIG_FILE_PATH)
break
window.close()
# 匹配编辑窗口
def show_matching_editor(matched_pairs, images_root):
"""显示匹配编辑窗口,允许手动调整匹配关系"""
all_image_folders = []
if os.path.isdir(images_root):
for root, dirs, _ in os.walk(images_root):
for dir in dirs:
folder_path = os.path.join(root, dir)
rel_path = os.path.relpath(folder_path, images_root)
all_image_folders.append((folder_path, rel_path))
table_data = []
for i, pair in enumerate(matched_pairs):
txt_name = pair['txt']['name']
img_folder = pair['image_folder']['relative_path'] if pair['image_folder'] else "无匹配"
table_data.append([i, txt_name, img_folder])
layout = [
[sg.Text('文件匹配编辑', font=('bold', 14))],
[sg.Text('选择要修改的项目,然后从右侧选择图片文件夹')],
[
sg.Table(
values=table_data,
headings=['序号', 'TXT文件名', '匹配的图片文件夹'],
key='-TABLE-',
select_mode=sg.TABLE_SELECT_MODE_BROWSE,
enable_events=True,
justification='left',
size=(None, 15)
),
sg.VSeparator(),
sg.Listbox(
values=[f[1] for f in all_image_folders],
key='-FOLDERS-',
size=(40, 15),
enable_events=True
)
],
[sg.Button('设置选中项'), sg.Button('清除选中项'), sg.Button('应用所有')]
]
window = sg.Window('匹配编辑', layout, resizable=True)
selected_row = None
while True:
event, values = window.read()
if event in (sg.WIN_CLOSED, '应用所有'):
break
if event == '-TABLE-':
if values['-TABLE-']:
selected_row = values['-TABLE-'][0]
if event == '设置选中项' and selected_row is not None and values['-FOLDERS-']:
folder_idx = [i for i, f in enumerate(all_image_folders) if f[1] == values['-FOLDERS-'][0]][0]
folder_path, folder_rel = all_image_folders[folder_idx]
matched_pairs[selected_row]['image_folder'] = {
"path": folder_path,
"name": os.path.basename(folder_path),
"relative_path": folder_rel
}
table_data[selected_row][2] = folder_rel
window['-TABLE-'].update(values=table_data)
if event == '清除选中项' and selected_row is not None:
matched_pairs[selected_row]['image_folder'] = None
table_data[selected_row][2] = "无匹配"
window['-TABLE-'].update(values=table_data)
window.close()
return matched_pairs
# 帮助窗口
def show_help_window():
"""显示帮助窗口"""
help_text = """
批量Markdown TXT转DOCX工具使用说明:
1. 选择包含Markdown内容的TXT文件所在文件夹
2. 选择图片文件夹的根目录程序会自动查找子文件夹
3. 选择输出文件的保存根目录当选择"输出到指定文件夹"时有效
4. 点击"扫描文件"按钮程序会自动匹配TXT文件和图片文件夹
5. 查看匹配结果可点击"编辑匹配"调整匹配关系
6. 点击"开始批量转换"生成DOCX文件
支持的Markdown格式:
- 标题# ## ### #### ##### ######
- 粗体**文字** __文字__
- 斜体*文字* _文字_
- 行内代码`代码`
- 代码块```语言\\n代码\\n```
- 删除线~~文字~~
- 链接[链接文字](URL)
- 图片![图片描述](图片路径)
- 无序列表- * +
- 有序列表1. 2. 3.
- 引用> 引用内容
- 表格| 列1 | 列2 |
- 水平分隔线--- *** ___
文字处理功能:
- 转换文字顺序将文字内容进行特定转换处理
- 错别字处理可以按设定强度引入常见的错别字用于测试或特殊用途
- 标点符号替换将句号转换为逗号保留文末句号
输出路径选择:
- 输出到TXT文件所在文件夹: 每个DOCX文件会直接保存在对应TXT文件所在的文件夹中
- 输出到指定文件夹: 所有DOCX文件会直接保存在您指定的文件夹中
匹配规则:
- 完全匹配: TXT文件名不含扩展名与图片文件夹名完全相同
- 前缀匹配: 图片文件夹名以前缀形式包含TXT文件名
- 包含匹配: 图片文件夹名中包含TXT文件名
转换规则:
- 每个小标题的第一段后会插入一张图片
- 先将Markdown格式转换为DOCX格式再处理文字内容
- 支持文字顺序调换错别字处理和标点符号替换功能
错别字处理说明:
- 错误强度控制替换比例0.0表示不替换1.0表示替换所有可能的字
- 错别字库可自定义JSON格式的错别字映射文件
- 常见映射事等
"""
sg.popup_scrolled('使用帮助', help_text, size=(70, 25))
# 结果窗口
def show_results_window(results):
"""显示批量处理结果窗口"""
if results['failed'] == 0:
message = f"全部成功!\n共处理 {results['total']} 个文件,全部转换成功。"
if results['main_output_folder']:
message += f"\n主要输出文件夹: {results['main_output_folder']}"
sg.popup('处理完成', message)
else:
failed_text = "\n".join([f"- {item['name']}: {item['error']}" for item in results['failed_items']])
message = (f"处理完成!\n共处理 {results['total']} 个文件,"
f"{results['success']} 个成功,{results['failed']} 个失败。\n\n"
f"失败项:\n{failed_text}")
if results['main_output_folder']:
message += f"\n主要输出文件夹: {results['main_output_folder']}"
sg.popup_scrolled('处理完成', message, size=(60, 20))
# 询问是否打开输出文件夹
if results['main_output_folder'] and os.path.exists(results['main_output_folder']):
if sg.popup_yes_no('是否打开主要输出文件夹?') == 'Yes':
if sys.platform.startswith('win'):
os.startfile(results['main_output_folder'])
elif sys.platform.startswith('darwin'):
os.system(f'open "{results["main_output_folder"]}"')
else:
os.system(f'xdg-open "{results["main_output_folder"]}"')
# 主界面
def main_window():
"""主界面"""
sg.theme('BlueMono')
matched_pairs = []
layout = [
[sg.Text('批量Markdown TXT转DOCX工具', font=('bold', 16))],
[sg.Text('按文件名匹配TXT文件和图片文件夹支持完整Markdown格式', text_color='gray')],
[sg.HSeparator()],
[sg.Text('TXT文件文件夹:', size=(15, 1)),
sg.InputText(key='txt_folder', enable_events=True, default_text=config.last_txt_folder),
sg.FolderBrowse('浏览')],
[sg.Text('图片根文件夹:', size=(15, 1)),
sg.InputText(key='images_root', enable_events=True, default_text=config.last_images_root),
sg.FolderBrowse('浏览')],
[sg.Text('输出根文件夹:', size=(15, 1)),
sg.InputText(key='output_root', enable_events=True, default_text=config.last_output_root),
sg.FolderBrowse('浏览'),
sg.Text('(当选择"输出到指定文件夹"时有效)', text_color='gray')],
[sg.Button('扫描文件', size=(12, 1)),
sg.Button('编辑匹配', size=(12, 1), disabled=True),
sg.Button('转换设置', size=(12, 1)),
sg.Button('帮助', size=(8, 1))],
[sg.HSeparator()],
[sg.Text('匹配结果预览:', font=('bold', 10))],
[sg.Table(
values=[],
headings=['TXT文件名', '相对路径', '匹配的图片文件夹'],
key='-PREVIEW_TABLE-',
auto_size_columns=False,
col_widths=[20, 30, 30],
justification='left',
size=(None, 10)
)],
[sg.ProgressBar(100, orientation='h', size=(80, 20), key='progress_bar', visible=False)],
[sg.Text('状态: 就绪', key='status_text', size=(80, 1))],
[sg.Button('开始批量转换', size=(15, 1), disabled=True), sg.Button('退出')]
]
window = sg.Window('批量Markdown TXT转DOCX工具', layout, resizable=True)
progress_bar = window['progress_bar']
status_text = window['status_text']
preview_table = window['-PREVIEW_TABLE-']
output_root_input = window['output_root']
def update_output_root_state():
"""根据配置更新输出根文件夹输入框的状态"""
if config.output_location == "custom":
output_root_input.update(disabled=False)
output_root_input.Widget.configure(foreground='black')
else:
output_root_input.update(disabled=True)
output_root_input.Widget.configure(foreground='gray')
window.read(timeout=1)
update_output_root_state()
while True:
event, values = window.read()
if event in (sg.WIN_CLOSED, '退出'):
if values is not None:
config.last_txt_folder = values.get('txt_folder', '')
config.last_images_root = values.get('images_root', '')
config.last_output_root = values.get('output_root', '')
config.save_to_file(CONFIG_FILE_PATH)
break
if event == '转换设置':
current_output_root = values['output_root']
show_config_window()
update_output_root_state()
window['output_root'].update(current_output_root)
if event == '帮助':
show_help_window()
if event == '扫描文件':
txt_folder = values['txt_folder']
images_root = values['images_root']
if not txt_folder:
sg.popup_error('请选择TXT文件所在的文件夹')
continue
if not images_root:
sg.popup_error('请选择图片根文件夹')
continue
config.last_txt_folder = txt_folder
config.last_images_root = images_root
if values['output_root']:
config.last_output_root = values['output_root']
config.save_to_file(CONFIG_FILE_PATH)
try:
status_text.update('正在扫描TXT文件...')
window.refresh()
txt_files = FileHandler.scan_txt_files(txt_folder)
status_text.update('正在匹配图片文件夹...')
window.refresh()
matched_pairs = FileHandler.find_matching_image_folders(txt_files, images_root)
table_data = []
for pair in matched_pairs:
img_folder = pair['image_folder']['relative_path'] if pair['image_folder'] else "无匹配"
table_data.append([
pair['txt']['name'],
pair['txt']['relative_path'],
img_folder
])
preview_table.update(values=table_data)
status_text.update(f'扫描完成: 找到 {len(matched_pairs)} 个TXT文件')
window['编辑匹配'].update(disabled=False)
window['开始批量转换'].update(disabled=False)
except Exception as e:
sg.popup_error(f'扫描失败: {str(e)}')
status_text.update('状态: 扫描失败')
if event == '编辑匹配' and matched_pairs:
images_root = values['images_root']
if not images_root:
sg.popup_error('请选择图片根文件夹')
continue
matched_pairs = show_matching_editor(matched_pairs, images_root)
table_data = []
for pair in matched_pairs:
img_folder = pair['image_folder']['relative_path'] if pair['image_folder'] else "无匹配"
table_data.append([
pair['txt']['name'],
pair['txt']['relative_path'],
img_folder
])
preview_table.update(values=table_data)
if event == '开始批量转换' and matched_pairs:
if config.output_location == "custom" and not values['output_root']:
sg.popup_error('请选择输出根文件夹(在"转换设置"中选择了"输出到指定文件夹"')
continue
try:
progress_bar.update(0, visible=True)
status_text.update('开始批量转换...')
window.refresh()
def update_batch_progress(progress, text):
progress_bar.update(progress)
status_text.update(f'状态: {text}')
window.refresh()
results = BatchProcessor.process_batch(matched_pairs, values['output_root'], update_batch_progress)
show_results_window(results)
status_text.update('状态: 批量转换完成')
except Exception as e:
sg.popup_error(f'批量处理失败: {str(e)}')
status_text.update('状态: 批量转换失败')
finally:
progress_bar.update(0, visible=False)
if (event == 'txt_folder' or event == 'images_root') and values[event] and not values['output_root']:
default_output = values['txt_folder'] if values['txt_folder'] else values['images_root']
window['output_root'].update(default_output)
window.close()
# 程序入口
if __name__ == '__main__':
main_window()