import os import sys import glob import re from PIL import Image from docx import Document from docx.shared import Inches, Pt, RGBColor from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.enum.style import WD_STYLE_TYPE import PySimpleGUI as sg from replacestr import replace_text import configparser CONFIG_FILE_PATH = os.path.join(os.path.expanduser("~"), ".txt2md2docx.ini") # 配置设置 class Config: def __init__(self): # 文件处理配置 self.txt_encoding = "utf-8" self.match_pattern = "exact" # exact: 完全匹配, prefix: 前缀匹配, contains: 包含 self.output_location = "txt_folder" # txt_folder or custom # 最近使用的文件夹路径 self.last_txt_folder = "" self.last_images_root = "" self.last_output_root = "" # 文字处理 self.reverse_text_order = False # 转换文字顺序开关 # 图片处理配置 self.image_sort_by = "name" # name or time self.image_resize = "none" # none or width self.image_width = 6 # 英寸 self.image_alignment = "center" # left, center, right self.image_strategy = "cycle" # cycle, truncate, repeat_last # 文档格式配置 self.line_spacing = 1.5 self.title_levels = 6 # 支持的最大标题层级 self.replace_punctuation = False # 是否替换标点符号 self.add_disclaimer = False # 是否添加免责声明 def load_from_file(self, file_path): if not os.path.exists(file_path): return False config_parser = configparser.ConfigParser() config_parser.read(file_path, encoding='utf-8') # 加载文件处理配置 if 'FileHandling' in config_parser: self.txt_encoding = config_parser.get('FileHandling', 'txt_encoding', fallback=self.txt_encoding) self.match_pattern = config_parser.get('FileHandling', 'match_pattern', fallback=self.match_pattern) self.output_location = config_parser.get('FileHandling', 'output_location', fallback=self.output_location) self.last_txt_folder = config_parser.get('FileHandling', 'last_txt_folder', fallback=self.last_txt_folder) self.last_images_root = config_parser.get('FileHandling', 'last_images_root', fallback=self.last_images_root) self.last_output_root = config_parser.get('FileHandling', 'last_output_root', fallback=self.last_output_root) # 加载文字处理配置 if 'TextProcessing' in config_parser: self.reverse_text_order = config_parser.getboolean('TextProcessing', 'reverse_text_order', fallback=self.reverse_text_order) self.replace_punctuation = config_parser.getboolean('TextProcessing', 'replace_punctuation', fallback=self.replace_punctuation) self.add_disclaimer = config_parser.getboolean('TextProcessing', 'add_disclaimer', fallback=self.add_disclaimer) # 加载图片处理配置 if 'ImageProcessing' in config_parser: self.image_sort_by = config_parser.get('ImageProcessing', 'image_sort_by', fallback=self.image_sort_by) self.image_resize = config_parser.get('ImageProcessing', 'image_resize', fallback=self.image_resize) self.image_width = config_parser.getfloat('ImageProcessing', 'image_width', fallback=self.image_width) self.image_alignment = config_parser.get('ImageProcessing', 'image_alignment', fallback=self.image_alignment) self.image_strategy = config_parser.get('ImageProcessing', 'image_strategy', fallback=self.image_strategy) # 加载文档格式配置 if 'DocumentFormat' in config_parser: self.line_spacing = config_parser.getfloat('DocumentFormat', 'line_spacing', fallback=self.line_spacing) self.title_levels = config_parser.getint('DocumentFormat', 'title_levels', fallback=self.title_levels) return True def save_to_file(self, file_path): config_parser = configparser.ConfigParser() # 保存文件处理配置 config_parser['FileHandling'] = { 'txt_encoding': self.txt_encoding, 'match_pattern': self.match_pattern, 'output_location': self.output_location, 'last_txt_folder': self.last_txt_folder, 'last_images_root': self.last_images_root, 'last_output_root': self.last_output_root } # 保存文字处理配置 config_parser['TextProcessing'] = { 'reverse_text_order': str(self.reverse_text_order), 'replace_punctuation': str(self.replace_punctuation), 'add_disclaimer': str(self.add_disclaimer) } # 保存图片处理配置 config_parser['ImageProcessing'] = { 'image_sort_by': self.image_sort_by, 'image_resize': self.image_resize, 'image_width': str(self.image_width), 'image_alignment': self.image_alignment, 'image_strategy': self.image_strategy } # 保存文档格式配置 config_parser['DocumentFormat'] = { 'line_spacing': str(self.line_spacing), 'title_levels': str(self.title_levels) } with open(file_path, 'w', encoding='utf-8') as f: config_parser.write(f) return True # 全局配置实例 config = Config() config.load_from_file(CONFIG_FILE_PATH) # 文字处理工具类 - 增强功能 class TextProcessor: @staticmethod def replace_periods(text: str) -> str: """ 将中间出现的句号统一替换为逗号; 若文本末尾是句号,则直接删除该句号。 """ text = text.rstrip() if not text: return '' # 去掉末尾句号(如果有) if text[-1] == '。': text = text[:-1] # 把剩余句号替换为逗号 return text.replace('。', ',') @staticmethod def reverse_text_order(content): """反转文本顺序(按字符级反转)""" if not content: return content return content[::-1] @staticmethod def reverse_paragraph_order(content): """反转段落顺序(保留段落内文字顺序)""" if not content: return content paragraphs = content.split('\n') return '\n'.join(reversed(paragraphs)) @staticmethod def process_text_content(text): """统一处理文字内容:顺序调换和标点符号替换""" if not text or not text.strip(): return text # 先进行文字顺序处理 if config.reverse_text_order: text = replace_text(text) # 再进行标点符号替换 if config.replace_punctuation: text = TextProcessor.replace_periods(text) return text # 增强的Markdown解析器 class MarkdownParser: # Markdown格式匹配模式 PATTERNS = { 'heading': re.compile(r'^(\s*)(#{1,6})\s+(.+)$'), 'bold_asterisk': re.compile(r'\*\*(.+?)\*\*'), 'bold_underscore': re.compile(r'__(.+?)__'), 'italic_asterisk': re.compile(r'(?\s*(.+)$'), 'horizontal_rule': re.compile(r'^(\s*[-*_]){3,}\s*$'), 'table_row': re.compile(r'^\|(.+)\|$'), 'horizontal_rule': re.compile(r'^(\s*[-*_]){3,}\s*$'), 'table_separator': re.compile(r'^\|(\s*:?-+:?\s*\|)+$') } @staticmethod def parse(txt_content): """解析Markdown内容为结构化数据""" elements = [] lines = txt_content.split('\n') i = 0 current_section = None in_code_block = False code_block_content = [] table_mode = False table_rows = [] while i < len(lines): line = lines[i].rstrip('\r') original_line = line # 处理代码块 if line.strip().startswith('```'): if not in_code_block: in_code_block = True language = line.strip()[3:].strip() code_block_content = [] i += 1 continue else: in_code_block = False elements.append({ 'type': 'code_block', 'language': language if 'language' in locals() else '', 'content': '\n'.join(code_block_content), 'level': 0 }) code_block_content = [] i += 1 continue if in_code_block: code_block_content.append(line) i += 1 continue # 处理表格 table_match = MarkdownParser.PATTERNS['table_row'].match(line) table_sep_match = MarkdownParser.PATTERNS['table_separator'].match(line) if table_match or table_sep_match: if not table_mode: table_mode = True table_rows = [] if table_match and not table_sep_match: cells = [cell.strip() for cell in table_match.group(1).split('|')] table_rows.append(cells) i += 1 continue elif table_mode: # 表格结束 if table_rows: elements.append({ 'type': 'table', 'rows': table_rows, 'level': 0 }) table_mode = False table_rows = [] # 处理标题 heading_match = MarkdownParser.PATTERNS['heading'].match(line) if heading_match: level = len(heading_match.group(2)) if level <= config.title_levels: # 提取标题文本(可能包含粗体等格式) heading_text = heading_match.group(3).strip() # 先移除Markdown标记但保留文本内容 cleaned_text = re.sub(r'\*\*(.+?)\*\*|__(.+?)__', r'\1\2', heading_text) elements.append({ 'type': 'heading', 'level': level, 'content': heading_text, # 保留原始内容用于格式处理 'cleaned_content': cleaned_text # 用于显示的纯文本 }) current_section = elements[-1] current_section['paragraphs'] = [] i += 1 continue # 处理水平分隔线 if MarkdownParser.PATTERNS['horizontal_rule'].match(line): elements.append({ 'type': 'horizontal_rule', 'level': 0 }) i += 1 continue # 处理列表 ul_match = MarkdownParser.PATTERNS['unordered_list'].match(line) ol_match = MarkdownParser.PATTERNS['ordered_list'].match(line) if ul_match: elements.append({ 'type': 'unordered_list', 'content': ul_match.group(1), 'level': 0 }) i += 1 continue if ol_match: elements.append({ 'type': 'ordered_list', 'content': ol_match.group(1), 'level': 0 }) i += 1 continue # 处理引用 quote_match = MarkdownParser.PATTERNS['blockquote'].match(line) if quote_match: elements.append({ 'type': 'blockquote', 'content': quote_match.group(1), 'level': 0 }) i += 1 continue # 处理空行 if line.strip() == '': elements.append({ 'type': 'empty', 'content': '', 'level': 0 }) i += 1 continue # 处理普通段落 elements.append({ 'type': 'paragraph', 'content': line, 'level': 0 }) i += 1 # 处理剩余的表格 if table_mode and table_rows: elements.append({ 'type': 'table', 'rows': table_rows, 'level': 0 }) return MarkdownParser.group_by_sections(elements) @staticmethod def group_by_sections(elements): """将解析的元素按标题分组""" sections = [] current_section = { 'type': 'section', 'level': 0, 'content': '前置内容', 'elements': [] } for element in elements: if element['type'] == 'heading': # 保存当前section if current_section['elements']: sections.append(current_section) # 创建新section current_section = { 'type': 'section', 'level': element['level'], 'content': element['content'], 'elements': [] } else: current_section['elements'].append(element) # 添加最后一个section if current_section['elements']: sections.append(current_section) return sections @staticmethod def extract_inline_formatting(text): """提取行内格式信息""" formatting = [] # 提取粗体 (**) for match in MarkdownParser.PATTERNS['bold_asterisk'].finditer(text): formatting.append({ 'type': 'bold', 'start': match.start(), 'end': match.end(), 'content': match.group(1) }) # 提取粗体 (__) for match in MarkdownParser.PATTERNS['bold_underscore'].finditer(text): formatting.append({ 'type': 'bold', 'start': match.start(), 'end': match.end(), 'content': match.group(1) }) # 提取斜体 (*) for match in MarkdownParser.PATTERNS['italic_asterisk'].finditer(text): # 检查是否与粗体重叠 overlaps = any(f['start'] <= match.start() < f['end'] or f['start'] < match.end() <= f['end'] for f in formatting if f['type'] == 'bold') if not overlaps: formatting.append({ 'type': 'italic', 'start': match.start(), 'end': match.end(), 'content': match.group(1) }) # 提取斜体 (_) for match in MarkdownParser.PATTERNS['italic_underscore'].finditer(text): overlaps = any(f['start'] <= match.start() < f['end'] or f['start'] < match.end() <= f['end'] for f in formatting if f['type'] in ['bold', 'italic']) if not overlaps: formatting.append({ 'type': 'italic', 'start': match.start(), 'end': match.end(), 'content': match.group(1) }) # 提取行内代码 for match in MarkdownParser.PATTERNS['code_inline'].finditer(text): formatting.append({ 'type': 'code', 'start': match.start(), 'end': match.end(), 'content': match.group(1) }) # 提取删除线 for match in MarkdownParser.PATTERNS['strikethrough'].finditer(text): formatting.append({ 'type': 'strikethrough', 'start': match.start(), 'end': match.end(), 'content': match.group(1) }) # 提取链接 for match in MarkdownParser.PATTERNS['link'].finditer(text): formatting.append({ 'type': 'link', 'start': match.start(), 'end': match.end(), 'text': match.group(1), 'url': match.group(2) }) # 按位置排序 formatting.sort(key=lambda x: x['start']) return formatting # 文件处理模块 class FileHandler: @staticmethod def scan_txt_files(folder_path): """扫描文件夹中的所有TXT文件""" if not os.path.isdir(folder_path): raise Exception(f"TXT文件夹不存在: {folder_path}") txt_files = [] for root, dirs, files in os.walk(folder_path): for file in files: if file.lower().endswith(".txt"): txt_path = os.path.join(root, file) file_name = os.path.splitext(file)[0] txt_files.append({ "path": txt_path, "name": file_name, "relative_path": os.path.relpath(txt_path, folder_path), "folder": root }) if not txt_files: raise Exception(f"在 {folder_path} 中未找到任何TXT文件") return sorted(txt_files, key=lambda x: x["relative_path"]) @staticmethod def find_matching_image_folders(txt_files, images_root): """根据TXT文件名匹配图片文件夹""" if not os.path.isdir(images_root): raise Exception(f"图片根文件夹不存在: {images_root}") all_image_folders = [] for root, dirs, _ in os.walk(images_root): for dir in dirs: folder_path = os.path.join(root, dir) all_image_folders.append({ "path": folder_path, "name": dir, "relative_path": os.path.relpath(folder_path, images_root) }) matched_pairs = [] for txt in txt_files: matches = [] txt_name = txt["name"].lower() for img_folder in all_image_folders: folder_name = img_folder["name"].lower() if config.match_pattern == "exact" and txt_name == folder_name: matches.append(img_folder) elif config.match_pattern == "prefix" and folder_name.startswith(txt_name): matches.append(img_folder) elif config.match_pattern == "contains" and txt_name in folder_name: matches.append(img_folder) if matches: matches.sort(key=lambda x: len(x["relative_path"])) matched_pairs.append({ "txt": txt, "image_folder": matches[0], "all_matches": matches }) else: matched_pairs.append({ "txt": txt, "image_folder": None, "all_matches": [] }) return matched_pairs @staticmethod def get_image_files(folder_path): """获取文件夹中的所有图片文件""" if not folder_path or not os.path.isdir(folder_path): return [] image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.gif', '*.webp', '*.tiff'] image_files = [] for ext in image_extensions: image_files.extend(glob.glob(os.path.join(folder_path, ext))) if config.image_sort_by == "name": image_files.sort() elif config.image_sort_by == "time": image_files.sort(key=lambda x: os.path.getmtime(x)) return image_files @staticmethod def read_markdown_txt(file_path): """读取含Markdown内容的TXT文件""" if not os.path.exists(file_path): raise Exception(f"TXT文件不存在: {file_path}") encodings = [config.txt_encoding, "gbk", "utf-16", "iso-8859-1"] for encoding in encodings: try: with open(file_path, 'r', encoding=encoding) as f: content = f.read() content = content.replace("\r\n", "\n").replace("\r", "\n") return content except UnicodeDecodeError: continue raise Exception(f"无法解析TXT文件(编码问题): {file_path}") @staticmethod def prepare_output_path(txt_info, images_root, output_root): """准备输出文件路径""" if config.output_location == "txt_folder": base_folder = txt_info["folder"] else: base_folder = output_root os.makedirs(base_folder, exist_ok=True) txt_name = txt_info["name"] output_path = os.path.join(base_folder, f"{txt_name}.docx") counter = 1 while os.path.exists(output_path): output_path = os.path.join(base_folder, f"{txt_name}_{counter}.docx") counter += 1 return output_path # 图片处理模块 class ImageProcessor: @staticmethod def process_image(image_path): """处理图片""" try: with Image.open(image_path) as img: # 处理图片方向 if hasattr(img, '_getexif'): exif = img._getexif() if exif: orientation_tag = 274 if orientation_tag in exif: orientation = exif[orientation_tag] if orientation == 3: img = img.rotate(180, expand=True) elif orientation == 6: img = img.rotate(270, expand=True) elif orientation == 8: img = img.rotate(90, expand=True) # 调整大小 if config.image_resize == "width" and config.image_width > 0: target_width_px = config.image_width * 96 width, height = img.size if width > target_width_px: ratio = target_width_px / width new_height = int(height * ratio) img = img.resize((int(target_width_px), new_height), Image.LANCZOS) return img, config.image_width else: width_in = img.width / 96 return img, width_in except Exception as e: raise Exception(f"处理图片失败 {image_path}: {str(e)}") @staticmethod def get_image_alignment(): """获取图片对齐方式""" if config.image_alignment == "left": return WD_ALIGN_PARAGRAPH.LEFT elif config.image_alignment == "right": return WD_ALIGN_PARAGRAPH.RIGHT else: return WD_ALIGN_PARAGRAPH.CENTER DISCLAIMER_TEXT = """`[免责声明]文章的时间、过程、图片均来自于网络,文章旨在传播正能量,均无低俗等不良引导,请观众勿对号入座,并上升到人身攻击等方面。观众理性看待本事件,切勿留下主观臆断的恶意评论,互联网不是法外之地。本文如若真实性存在争议、事件版权或图片侵权问题,请及时联系作者,我们将予以删除。`""" # DOCX生成模块 - 完全重构 class DocxGenerator: @staticmethod def generate(sections, image_files, output_path, progress_callback=None): """生成DOCX文档 - 重构版本""" doc = Document() total_sections = len(sections) image_index = 0 image_count = len(image_files) for i, section in enumerate(sections): if progress_callback: progress = int((i / total_sections) * 100) progress_callback(progress, f"处理章节: {section['content'][:30]}...") # 添加标题 if section['level'] > 0 and section['level'] <= config.title_levels: # 使用原始带格式的内容进行处理 heading_text = TextProcessor.process_text_content(section['content']) # 创建标题段落 para = doc.add_heading(level=section['level']) # 应用行内格式(包括粗体) DocxGenerator.apply_inline_formatting(para, heading_text) elif section['content'] != '前置内容': heading_text = TextProcessor.process_text_content(section['content']) para = doc.add_paragraph() run = para.add_run(heading_text) run.font.size = Pt(14) run.font.bold = True para.space_after = Pt(12) # 处理章节中的元素 elements = section.get('elements', []) if not elements: continue # 处理第一个非空元素后插入图片 first_content_added = False for element in elements: # 添加元素到文档 DocxGenerator.add_element_to_doc(doc, element) # 在第一个内容元素后插入图片 if not first_content_added and element['type'] not in ['empty']: first_content_added = True # 插入图片 if image_count > 0 and image_index < image_count: try: DocxGenerator.insert_image(doc, image_files[image_index], output_path) image_index += 1 if image_index >= image_count: if config.image_strategy == "cycle": image_index = 0 elif config.image_strategy == "truncate": image_index = image_count except Exception as e: doc.add_paragraph(f"[图片插入失败: {str(e)}]") # 添加免责声明 if config.add_disclaimer: doc.add_paragraph("---") para = doc.add_paragraph() disclaimer_text = TextProcessor.process_text_content(DISCLAIMER_TEXT) run = para.add_run(disclaimer_text) run.font.size = Pt(10) para.paragraph_format.line_spacing = 1.0 try: doc.save(output_path) if progress_callback: progress_callback(100, "转换完成!") return True except Exception as e: raise Exception(f"保存DOCX失败: {str(e)}") @staticmethod def add_element_to_doc(doc, element): """将解析的元素添加到文档中""" etype = element['type'] content = TextProcessor.process_text_content(element.get('content', '')) if etype == 'paragraph': DocxGenerator.add_formatted_paragraph(doc, content) elif etype == 'unordered_list': para = doc.add_paragraph(style='List Bullet') DocxGenerator.apply_inline_formatting(para, content) elif etype == 'ordered_list': para = doc.add_paragraph(style='List Number') DocxGenerator.apply_inline_formatting(para, content) elif etype == 'blockquote': para = doc.add_paragraph(style='Quote') DocxGenerator.apply_inline_formatting(para, content) elif etype == 'code_block': para = doc.add_paragraph(style='No Spacing') run = para.add_run(element['content']) run.font.name = 'Courier New' run.font.size = Pt(10) elif etype == 'table': DocxGenerator.add_table_to_doc(doc, element['rows']) elif etype == 'horizontal_rule': DocxGenerator.add_horizontal_rule(doc) elif etype == 'empty': doc.add_paragraph() @staticmethod def add_horizontal_rule(doc): """在文档中添加横线""" para = doc.add_paragraph() run = para.add_run() # 添加水平线条(使用下划线作为横线) run.font.underline = True run.text = " " * 100 # 足够长的下划线作为横线 para.alignment = WD_ALIGN_PARAGRAPH.CENTER @staticmethod def add_table_to_doc(doc, rows): """添加表格到文档""" if not rows: return table = doc.add_table(rows=len(rows), cols=len(rows[0])) table.style = 'Table Grid' for i, row_data in enumerate(rows): row_cells = table.rows[i].cells for j, cell_data in enumerate(row_data): if j < len(row_cells): # 处理单元格内容的格式和文字处理 processed_text = TextProcessor.process_text_content(cell_data) row_cells[j].text = processed_text @staticmethod def insert_image(doc, image_path, output_path): """插入图片到文档""" img, width = ImageProcessor.process_image(image_path) temp_img_path = None if config.image_resize == "width": temp_dir = os.path.dirname(output_path) os.makedirs(temp_dir, exist_ok=True) temp_img_path = os.path.join(temp_dir, f"temp_img_{hash(image_path)}.png") img.save(temp_img_path) img_path = temp_img_path else: img_path = image_path para = doc.add_paragraph() run = para.runs[0] if para.runs else para.add_run() run.add_picture(img_path, width=Inches(width)) para.alignment = ImageProcessor.get_image_alignment() if temp_img_path and os.path.exists(temp_img_path): try: os.remove(temp_img_path) except: pass # 忽略删除临时文件的错误 @staticmethod def add_formatted_paragraph(doc, content): """添加带格式的段落""" if not content or not content.strip(): doc.add_paragraph() return para = doc.add_paragraph() DocxGenerator.apply_inline_formatting(para, content) if config.line_spacing > 0: para.paragraph_format.line_spacing = config.line_spacing @staticmethod def apply_inline_formatting(paragraph, text): """应用行内格式到段落""" # 首先处理文字内容(顺序调换和标点符号替换) processed_text = TextProcessor.process_text_content(text) # 重新提取格式信息(因为文字可能已经改变) formatting = MarkdownParser.extract_inline_formatting(processed_text) # 如果没有格式,直接添加文本 if not formatting: paragraph.add_run(processed_text) return current_pos = 0 for fmt in formatting: # 添加格式前的普通文本 if fmt['start'] > current_pos: paragraph.add_run(processed_text[current_pos:fmt['start']]) # 创建格式化的run if fmt['type'] == 'bold': # 移除markdown标记并应用格式 clean_text = re.sub(r'\*\*(.+?)\*\*|__(.+?)__', r'\1\2', processed_text[fmt['start']:fmt['end']]) run = paragraph.add_run(clean_text) run.bold = True elif fmt['type'] == 'italic': clean_text = re.sub(r'(? 0: sample_output = FileHandler.prepare_output_path(matched_pairs[0]['txt'], "", output_root) main_output_folder = os.path.dirname(sample_output) else: main_output_folder = "" return { "total": total, "success": success_count, "failed": len(failed_items), "failed_items": failed_items, "main_output_folder": main_output_folder } # 配置窗口 def show_config_window(): """显示配置窗口""" layout = [ [sg.Text('文件匹配设置', font=('bold', 12))], [sg.Text('TXT编码:'), sg.Combo(['utf-8', 'gbk', 'utf-16'], default_value=config.txt_encoding, key='txt_encoding')], [sg.Text('匹配模式:'), sg.Radio('完全匹配(文件名与文件夹名相同)', 'match', default=config.match_pattern == "exact", key='match_exact'), sg.Radio('前缀匹配', 'match', default=config.match_pattern == "prefix", key='match_prefix'), sg.Radio('包含匹配', 'match', default=config.match_pattern == "contains", key='match_contains')], [sg.HSeparator()], [sg.Checkbox('转换文字顺序', key='-REVERSE_TEXT-', default=config.reverse_text_order)], [sg.HSeparator()], [sg.Checkbox('替换标点符号(句号转逗号,保留结尾句号)', key='-REPLACE_PUNCTUATION-', default=config.replace_punctuation)], [sg.HSeparator()], [sg.Checkbox('添加免责声明', key='-ADD_DISCLAIMER-', default=config.add_disclaimer)], [sg.HSeparator()], [sg.Radio('输出到TXT文件所在文件夹', 'output_loc', default=config.output_location == "txt_folder", key='output_txt_folder'), sg.Radio('输出到指定文件夹', 'output_loc', default=config.output_location == "custom", key='output_custom')], [sg.HSeparator()], [sg.Text('图片处理设置', font=('bold', 12))], [sg.Text('图片排序方式:'), sg.Radio('按名称', 'sort', default=config.image_sort_by == "name", key='sort_name'), sg.Radio('按修改时间', 'sort', default=config.image_sort_by == "time", key='sort_time')], [sg.Text('图片尺寸调整:'), sg.Radio('不调整', 'resize', default=config.image_resize == "none", key='resize_none'), sg.Radio('按宽度:', 'resize', default=config.image_resize == "width", key='resize_width'), sg.InputText(str(config.image_width), size=(5, 1), key='image_width'), sg.Text('英寸')], [sg.Text('图片对齐方式:'), sg.Radio('左对齐', 'align', default=config.image_alignment == "left", key='align_left'), sg.Radio('居中', 'align', default=config.image_alignment == "center", key='align_center'), sg.Radio('右对齐', 'align', default=config.image_alignment == "right", key='align_right')], [sg.HSeparator()], [sg.Text('图片不足时策略', font=('bold', 12))], [sg.Radio('循环使用', 'strategy', default=config.image_strategy == "cycle", key='strategy_cycle'), sg.Radio('忽略多余标题', 'strategy', default=config.image_strategy == "truncate", key='strategy_truncate'), sg.Radio('重复最后一张', 'strategy', default=config.image_strategy == "repeat_last", key='strategy_repeat')], [sg.HSeparator()], [sg.Button('确定'), sg.Button('取消')] ] window = sg.Window('转换设置', layout, modal=True, resizable=True) while True: event, values = window.read() if event in (sg.WIN_CLOSED, '取消'): break if event == '确定': # 保存配置 config.txt_encoding = values['txt_encoding'] if values['match_exact']: config.match_pattern = "exact" elif values['match_prefix']: config.match_pattern = "prefix" else: config.match_pattern = "contains" config.output_location = "txt_folder" if values['output_txt_folder'] else "custom" config.image_sort_by = "name" if values['sort_name'] else "time" config.image_resize = "none" if values['resize_none'] else "width" config.reverse_text_order = values['-REVERSE_TEXT-'] config.replace_punctuation = values['-REPLACE_PUNCTUATION-'] config.add_disclaimer = values['-ADD_DISCLAIMER-'] try: config.image_width = float(values['image_width']) except: pass if values['align_left']: config.image_alignment = "left" elif values['align_right']: config.image_alignment = "right" else: config.image_alignment = "center" if values['strategy_cycle']: config.image_strategy = "cycle" elif values['strategy_truncate']: config.image_strategy = "truncate" else: config.image_strategy = "repeat_last" config.save_to_file(CONFIG_FILE_PATH) break window.close() # 匹配编辑窗口 def show_matching_editor(matched_pairs, images_root): """显示匹配编辑窗口,允许手动调整匹配关系""" all_image_folders = [] if os.path.isdir(images_root): for root, dirs, _ in os.walk(images_root): for dir in dirs: folder_path = os.path.join(root, dir) rel_path = os.path.relpath(folder_path, images_root) all_image_folders.append((folder_path, rel_path)) table_data = [] for i, pair in enumerate(matched_pairs): txt_name = pair['txt']['name'] img_folder = pair['image_folder']['relative_path'] if pair['image_folder'] else "无匹配" table_data.append([i, txt_name, img_folder]) layout = [ [sg.Text('文件匹配编辑', font=('bold', 14))], [sg.Text('选择要修改的项目,然后从右侧选择图片文件夹')], [ sg.Table( values=table_data, headings=['序号', 'TXT文件名', '匹配的图片文件夹'], key='-TABLE-', select_mode=sg.TABLE_SELECT_MODE_BROWSE, enable_events=True, justification='left', size=(None, 15) ), sg.VSeparator(), sg.Listbox( values=[f[1] for f in all_image_folders], key='-FOLDERS-', size=(40, 15), enable_events=True ) ], [sg.Button('设置选中项'), sg.Button('清除选中项'), sg.Button('应用所有')] ] window = sg.Window('匹配编辑', layout, resizable=True) selected_row = None while True: event, values = window.read() if event in (sg.WIN_CLOSED, '应用所有'): break if event == '-TABLE-': if values['-TABLE-']: selected_row = values['-TABLE-'][0] if event == '设置选中项' and selected_row is not None and values['-FOLDERS-']: folder_idx = [i for i, f in enumerate(all_image_folders) if f[1] == values['-FOLDERS-'][0]][0] folder_path, folder_rel = all_image_folders[folder_idx] matched_pairs[selected_row]['image_folder'] = { "path": folder_path, "name": os.path.basename(folder_path), "relative_path": folder_rel } table_data[selected_row][2] = folder_rel window['-TABLE-'].update(values=table_data) if event == '清除选中项' and selected_row is not None: matched_pairs[selected_row]['image_folder'] = None table_data[selected_row][2] = "无匹配" window['-TABLE-'].update(values=table_data) window.close() return matched_pairs # 帮助窗口 def show_help_window(): """显示帮助窗口""" help_text = """ 批量Markdown TXT转DOCX工具使用说明: 1. 选择包含Markdown内容的TXT文件所在文件夹 2. 选择图片文件夹的根目录(程序会自动查找子文件夹) 3. 选择输出文件的保存根目录(当选择"输出到指定文件夹"时有效) 4. 点击"扫描文件"按钮,程序会自动匹配TXT文件和图片文件夹 5. 查看匹配结果,可点击"编辑匹配"调整匹配关系 6. 点击"开始批量转换"生成DOCX文件 支持的Markdown格式: - 标题:# ## ### #### ##### ###### - 粗体:**文字** 或 __文字__ - 斜体:*文字* 或 _文字_ - 行内代码:`代码` - 代码块:```语言\\n代码\\n``` - 删除线:~~文字~~ - 链接:[链接文字](URL) - 图片:![图片描述](图片路径) - 无序列表:- 或 * 或 + - 有序列表:1. 2. 3. - 引用:> 引用内容 - 表格:| 列1 | 列2 | - 水平分隔线:--- 或 *** 或 ___ 输出路径选择: - 输出到TXT文件所在文件夹: 每个DOCX文件会直接保存在对应TXT文件所在的文件夹中 - 输出到指定文件夹: 所有DOCX文件会直接保存在您指定的文件夹中 匹配规则: - 完全匹配: TXT文件名(不含扩展名)与图片文件夹名完全相同 - 前缀匹配: 图片文件夹名以前缀形式包含TXT文件名 - 包含匹配: 图片文件夹名中包含TXT文件名 转换规则: - 每个小标题的第一段后会插入一张图片 - 先将Markdown格式转换为DOCX格式,再处理文字内容 - 支持文字顺序调换和标点符号替换功能 """ sg.popup_scrolled('使用帮助', help_text, size=(70, 25)) # 结果窗口 def show_results_window(results): """显示批量处理结果窗口""" if results['failed'] == 0: message = f"全部成功!\n共处理 {results['total']} 个文件,全部转换成功。" if results['main_output_folder']: message += f"\n主要输出文件夹: {results['main_output_folder']}" sg.popup('处理完成', message) else: failed_text = "\n".join([f"- {item['name']}: {item['error']}" for item in results['failed_items']]) message = (f"处理完成!\n共处理 {results['total']} 个文件," f"{results['success']} 个成功,{results['failed']} 个失败。\n\n" f"失败项:\n{failed_text}") if results['main_output_folder']: message += f"\n主要输出文件夹: {results['main_output_folder']}" sg.popup_scrolled('处理完成', message, size=(60, 20)) # 询问是否打开输出文件夹 if results['main_output_folder'] and os.path.exists(results['main_output_folder']): if sg.popup_yes_no('是否打开主要输出文件夹?') == 'Yes': if sys.platform.startswith('win'): os.startfile(results['main_output_folder']) elif sys.platform.startswith('darwin'): os.system(f'open "{results["main_output_folder"]}"') else: os.system(f'xdg-open "{results["main_output_folder"]}"') # 主界面 def main_window(): """主界面""" sg.theme('BlueMono') matched_pairs = [] layout = [ [sg.Text('批量Markdown TXT转DOCX工具', font=('bold', 16))], [sg.Text('(按文件名匹配TXT文件和图片文件夹,支持完整Markdown格式)', text_color='gray')], [sg.HSeparator()], [sg.Text('TXT文件文件夹:', size=(15, 1)), sg.InputText(key='txt_folder', enable_events=True, default_text=config.last_txt_folder), sg.FolderBrowse('浏览')], [sg.Text('图片根文件夹:', size=(15, 1)), sg.InputText(key='images_root', enable_events=True, default_text=config.last_images_root), sg.FolderBrowse('浏览')], [sg.Text('输出根文件夹:', size=(15, 1)), sg.InputText(key='output_root', enable_events=True, default_text=config.last_output_root), sg.FolderBrowse('浏览'), sg.Text('(当选择"输出到指定文件夹"时有效)', text_color='gray')], [sg.Button('扫描文件', size=(12, 1)), sg.Button('编辑匹配', size=(12, 1), disabled=True), sg.Button('转换设置', size=(12, 1)), sg.Button('帮助', size=(8, 1))], [sg.HSeparator()], [sg.Text('匹配结果预览:', font=('bold', 10))], [sg.Table( values=[], headings=['TXT文件名', '相对路径', '匹配的图片文件夹'], key='-PREVIEW_TABLE-', auto_size_columns=False, col_widths=[20, 30, 30], justification='left', size=(None, 10) )], [sg.ProgressBar(100, orientation='h', size=(80, 20), key='progress_bar', visible=False)], [sg.Text('状态: 就绪', key='status_text', size=(80, 1))], [sg.Button('开始批量转换', size=(15, 1), disabled=True), sg.Button('退出')] ] window = sg.Window('批量Markdown TXT转DOCX工具', layout, resizable=True) progress_bar = window['progress_bar'] status_text = window['status_text'] preview_table = window['-PREVIEW_TABLE-'] output_root_input = window['output_root'] def update_output_root_state(): """根据配置更新输出根文件夹输入框的状态""" if config.output_location == "custom": output_root_input.update(disabled=False) output_root_input.Widget.configure(foreground='black') else: output_root_input.update(disabled=True) output_root_input.Widget.configure(foreground='gray') window.read(timeout=1) update_output_root_state() while True: event, values = window.read() if event in (sg.WIN_CLOSED, '退出'): if values is not None: config.last_txt_folder = values.get('txt_folder', '') config.last_images_root = values.get('images_root', '') config.last_output_root = values.get('output_root', '') config.save_to_file(CONFIG_FILE_PATH) break if event == '转换设置': current_output_root = values['output_root'] show_config_window() update_output_root_state() window['output_root'].update(current_output_root) if event == '帮助': show_help_window() if event == '扫描文件': txt_folder = values['txt_folder'] images_root = values['images_root'] if not txt_folder: sg.popup_error('请选择TXT文件所在的文件夹') continue if not images_root: sg.popup_error('请选择图片根文件夹') continue config.last_txt_folder = txt_folder config.last_images_root = images_root if values['output_root']: config.last_output_root = values['output_root'] config.save_to_file(CONFIG_FILE_PATH) try: status_text.update('正在扫描TXT文件...') window.refresh() txt_files = FileHandler.scan_txt_files(txt_folder) status_text.update('正在匹配图片文件夹...') window.refresh() matched_pairs = FileHandler.find_matching_image_folders(txt_files, images_root) table_data = [] for pair in matched_pairs: img_folder = pair['image_folder']['relative_path'] if pair['image_folder'] else "无匹配" table_data.append([ pair['txt']['name'], pair['txt']['relative_path'], img_folder ]) preview_table.update(values=table_data) status_text.update(f'扫描完成: 找到 {len(matched_pairs)} 个TXT文件') window['编辑匹配'].update(disabled=False) window['开始批量转换'].update(disabled=False) except Exception as e: sg.popup_error(f'扫描失败: {str(e)}') status_text.update('状态: 扫描失败') if event == '编辑匹配' and matched_pairs: images_root = values['images_root'] if not images_root: sg.popup_error('请选择图片根文件夹') continue matched_pairs = show_matching_editor(matched_pairs, images_root) table_data = [] for pair in matched_pairs: img_folder = pair['image_folder']['relative_path'] if pair['image_folder'] else "无匹配" table_data.append([ pair['txt']['name'], pair['txt']['relative_path'], img_folder ]) preview_table.update(values=table_data) if event == '开始批量转换' and matched_pairs: if config.output_location == "custom" and not values['output_root']: sg.popup_error('请选择输出根文件夹(在"转换设置"中选择了"输出到指定文件夹")') continue try: progress_bar.update(0, visible=True) status_text.update('开始批量转换...') window.refresh() def update_batch_progress(progress, text): progress_bar.update(progress) status_text.update(f'状态: {text}') window.refresh() results = BatchProcessor.process_batch(matched_pairs, values['output_root'], update_batch_progress) show_results_window(results) status_text.update('状态: 批量转换完成') except Exception as e: sg.popup_error(f'批量处理失败: {str(e)}') status_text.update('状态: 批量转换失败') if (event == 'txt_folder' or event == 'images_root') and values[event] and not values['output_root']: default_output = values['txt_folder'] if values['txt_folder'] else values['images_root'] window['output_root'].update(default_output) window.close() # 程序入口 if __name__ == '__main__': main_window()