From 0c77c42addf7f4a17d1150d44be084e1b3482cf5 Mon Sep 17 00:00:00 2001
From: taiyi <taiyi1224@qq.com>
Date: Wed, 10 Sep 2025 10:35:03 +0800
Subject: [PATCH] =?UTF-8?q?=E7=AC=AC=E4=B8=80=E6=AC=A1=E6=8F=90=E4=BA=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .idea/TxT2DOCX.iml    |    8 +
 Txt2docx2.py          | 1635 +++++++++++++++++++++++++++++++++++++++++
 data/error_chars.json |  116 +++
 replacestr.py         |  473 ++++++++++++
 4 files changed, 2232 insertions(+)
 create mode 100644 .idea/TxT2DOCX.iml
 create mode 100644 Txt2docx2.py
 create mode 100644 data/error_chars.json
 create mode 100644 replacestr.py
diff --git a/.idea/TxT2DOCX.iml b/.idea/TxT2DOCX.iml
new file mode 100644
index 0000000..8437fe6
--- /dev/null
+++ b/.idea/TxT2DOCX.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.10" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/Txt2docx2.py b/Txt2docx2.py
new file mode 100644
index 0000000..9a4de76
--- /dev/null
+++ b/Txt2docx2.py
@@ -0,0 +1,1635 @@
+import os
+import sys
+import glob
+import re
+import random
+import json
+from typing import Tuple, List
+from PIL import Image
+from docx import Document
+from docx.shared import Inches, Pt, RGBColor
+from docx.enum.text import WD_ALIGN_PARAGRAPH
+from docx.enum.style import WD_STYLE_TYPE
+import PySimpleGUI as sg
+from replacestr import replace_text
+import configparser
+
+CONFIG_FILE_PATH = os.path.join(os.path.expanduser("~"), ".txt2md2docx.ini")
+
+
+# 错别字处理功能集成
+def load_error_chars(db_path: str = "data/error_chars.json") -> dict:
+    """加载错别字库"""
+    # 检查文件夹是否存在，不存在则创建
+    dir_name = os.path.dirname(db_path)
+    if not os.path.exists(dir_name):
+        os.makedirs(dir_name)
+
+    # 检查文件是否存在，不存在则创建默认库
+    if not os.path.exists(db_path):
+        default_chars = {
+            "的": "地",
+            "地": "得",
+            "得": "的",
+            "在": "再",
+            "再": "在",
+            "是": "事",
+            "事": "是",
+            "他": "她",
+            "她": "他",
+            "你": "您",
+            "您": "你",
+            "们": "门",
+            "门": "们",
+            "有": "又",
+            "又": "有",
+            "和": "合",
+            "合": "和",
+            "到": "倒",
+            "倒": "到",
+            "就": "才",
+            "才": "就",
+            "要": "耍",
+            "耍": "要",
+            "会": "汇",
+            "汇": "会",
+            "看": "着",
+            "着": "看",
+            "说": "讲",
+            "讲": "说",
+            "做": "作",
+            "作": "做",
+            "已": "己",
+            "己": "已",
+            "以": "已",
+            "已": "以",
+            "进": "近",
+            "近": "进",
+            "象": "像",
+            "像": "象",
+            "对": "队",
+            "队": "对",
+            "分": "份",
+            "份": "分",
+        }
+
+        with open(db_path, 'w', encoding='utf-8') as f:
+            json.dump(default_chars, f, ensure_ascii=False, indent=2)
+        return default_chars
+
+    # 加载已存在的错别字库
+    with open(db_path, 'r', encoding='utf-8') as f:
+        return json.load(f)
+
+
+def introduce_char_errors(text: str, intensity: float = 1.0, db_path: str = "data/error_chars.json") -> Tuple[
+    str, int, List[str], List[str]]:
+    """
+    将文本中的正确单字替换为常见错误单字
+
+    参数:
+        text: 要处理的文本
+        intensity: 错误引入强度，0.0-1.0之间，1.0表示替换所有可能的字
+        db_path: 错别字库文件路径
+
+    返回:
+        处理后的文本、替换的总数量、原句列表、处理后的句子列表
+    """
+    # 加载错别字库
+    error_chars = load_error_chars(db_path)
+
+    # 句子拆分函数
+    def split_into_sentences(txt: str) -> List[str]:
+        separators = re.compile(r'([。！？；,.!?;])')
+        parts = separators.split(txt)
+        sentences = []
+        for i in range(0, len(parts) - 1, 2):
+            if parts[i] or parts[i + 1]:
+                sentences.append(parts[i] + parts[i + 1])
+        if len(parts) % 2 == 1 and parts[-1]:
+            sentences.append(parts[-1])
+        return sentences
+
+    # 单句错误引入函数
+    def introduce_errors_to_sentence(sentence: str) -> Tuple[str, int]:
+        modified = list(sentence)
+        replace_count = 0
+        for i, char in enumerate(modified):
+            if char in error_chars and random.random() <= intensity:
+                modified[i] = error_chars[char]
+                replace_count += 1
+        return ''.join(modified), replace_count
+
+    # 处理整个文本
+    original_sentences = split_into_sentences(text)
+    modified_sentences = []
+    total_replace = 0
+
+    for sentence in original_sentences:
+        modified, count = introduce_errors_to_sentence(sentence)
+        modified_sentences.append(modified)
+        total_replace += count
+
+    modified_text = ''.join(modified_sentences)
+    return modified_text, total_replace, original_sentences, modified_sentences
+
+
+# 配置设置
+class Config:
+    def __init__(self):
+        # 文件处理配置
+        self.txt_encoding = "utf-8"
+        self.match_pattern = "exact"  # exact: 完全匹配, prefix: 前缀匹配, contains: 包含
+        self.output_location = "txt_folder"  # txt_folder or custom
+        # 最近使用的文件夹路径
+        self.last_txt_folder = ""
+        self.last_images_root = ""
+        self.last_output_root = ""
+        # 文字处理
+        self.reverse_text_order = False  # 转换文字顺序开关
+        # 错别字处理配置
+        self.enable_char_errors = False  # 是否启用错别字处理
+        self.char_error_intensity = 0.3  # 错别字强度 0.0-1.0
+        self.char_error_db_path = "data/error_chars.json"  # 错别字库路径
+        # 图片处理配置
+        self.image_sort_by = "name"  # name or time
+        self.image_resize = "none"  # none or width
+        self.image_width = 6  # 英寸
+        self.image_alignment = "center"  # left, center, right
+        self.image_strategy = "cycle"  # cycle, truncate, repeat_last
+        # 文档格式配置
+        self.line_spacing = 1.5
+        self.title_levels = 6  # 支持的最大标题层级
+        self.replace_punctuation = False  # 是否替换标点符号
+        self.add_disclaimer = False  # 是否添加免责声明
+
+    def load_from_file(self, file_path):
+        if not os.path.exists(file_path):
+            return False
+
+        config_parser = configparser.ConfigParser()
+        config_parser.read(file_path, encoding='utf-8')
+
+        # 加载文件处理配置
+        if 'FileHandling' in config_parser:
+            self.txt_encoding = config_parser.get('FileHandling', 'txt_encoding', fallback=self.txt_encoding)
+            self.match_pattern = config_parser.get('FileHandling', 'match_pattern', fallback=self.match_pattern)
+            self.output_location = config_parser.get('FileHandling', 'output_location',
+                                                     fallback=self.output_location)
+            self.last_txt_folder = config_parser.get('FileHandling', 'last_txt_folder',
+                                                     fallback=self.last_txt_folder)
+            self.last_images_root = config_parser.get('FileHandling', 'last_images_root',
+                                                      fallback=self.last_images_root)
+            self.last_output_root = config_parser.get('FileHandling', 'last_output_root',
+                                                      fallback=self.last_output_root)
+
+        # 加载文字处理配置
+        if 'TextProcessing' in config_parser:
+            self.reverse_text_order = config_parser.getboolean('TextProcessing', 'reverse_text_order',
+                                                               fallback=self.reverse_text_order)
+            self.replace_punctuation = config_parser.getboolean('TextProcessing', 'replace_punctuation',
+                                                                fallback=self.replace_punctuation)
+            self.add_disclaimer = config_parser.getboolean('TextProcessing', 'add_disclaimer',
+                                                           fallback=self.add_disclaimer)
+            # 错别字处理配置
+            self.enable_char_errors = config_parser.getboolean('TextProcessing', 'enable_char_errors',
+                                                               fallback=self.enable_char_errors)
+            self.char_error_intensity = config_parser.getfloat('TextProcessing', 'char_error_intensity',
+                                                               fallback=self.char_error_intensity)
+            self.char_error_db_path = config_parser.get('TextProcessing', 'char_error_db_path',
+                                                        fallback=self.char_error_db_path)
+
+        # 加载图片处理配置
+        if 'ImageProcessing' in config_parser:
+            self.image_sort_by = config_parser.get('ImageProcessing', 'image_sort_by', fallback=self.image_sort_by)
+            self.image_resize = config_parser.get('ImageProcessing', 'image_resize', fallback=self.image_resize)
+            self.image_width = config_parser.getfloat('ImageProcessing', 'image_width', fallback=self.image_width)
+            self.image_alignment = config_parser.get('ImageProcessing', 'image_alignment',
+                                                     fallback=self.image_alignment)
+            self.image_strategy = config_parser.get('ImageProcessing', 'image_strategy',
+                                                    fallback=self.image_strategy)
+
+        # 加载文档格式配置
+        if 'DocumentFormat' in config_parser:
+            self.line_spacing = config_parser.getfloat('DocumentFormat', 'line_spacing', fallback=self.line_spacing)
+            self.title_levels = config_parser.getint('DocumentFormat', 'title_levels', fallback=self.title_levels)
+
+        return True
+
+    def save_to_file(self, file_path):
+        config_parser = configparser.ConfigParser()
+
+        # 保存文件处理配置
+        config_parser['FileHandling'] = {
+            'txt_encoding': self.txt_encoding,
+            'match_pattern': self.match_pattern,
+            'output_location': self.output_location,
+            'last_txt_folder': self.last_txt_folder,
+            'last_images_root': self.last_images_root,
+            'last_output_root': self.last_output_root
+        }
+
+        # 保存文字处理配置
+        config_parser['TextProcessing'] = {
+            'reverse_text_order': str(self.reverse_text_order),
+            'replace_punctuation': str(self.replace_punctuation),
+            'add_disclaimer': str(self.add_disclaimer),
+            'enable_char_errors': str(self.enable_char_errors),
+            'char_error_intensity': str(self.char_error_intensity),
+            'char_error_db_path': self.char_error_db_path
+        }
+
+        # 保存图片处理配置
+        config_parser['ImageProcessing'] = {
+            'image_sort_by': self.image_sort_by,
+            'image_resize': self.image_resize,
+            'image_width': str(self.image_width),
+            'image_alignment': self.image_alignment,
+            'image_strategy': self.image_strategy
+        }
+
+        # 保存文档格式配置
+        config_parser['DocumentFormat'] = {
+            'line_spacing': str(self.line_spacing),
+            'title_levels': str(self.title_levels)
+        }
+
+        with open(file_path, 'w', encoding='utf-8') as f:
+            config_parser.write(f)
+
+        return True
+
+
+# 全局配置实例
+config = Config()
+config.load_from_file(CONFIG_FILE_PATH)
+
+
+# 文字处理工具类 - 增强功能
+class TextProcessor:
+    @staticmethod
+    def replace_periods(text: str) -> str:
+        """
+        将中间出现的句号统一替换为逗号；
+        若文本末尾是句号，则直接删除该句号。
+        """
+        text = text.rstrip()
+        if not text:
+            return ''
+
+        # 去掉末尾句号（如果有）
+        if text[-1] == '。':
+            text = text[:-1]
+
+        # 把剩余句号替换为逗号
+        return text.replace('。', '，')
+
+    @staticmethod
+    def reverse_text_order(content):
+        """反转文本顺序（按字符级反转）"""
+        if not content:
+            return content
+        return content[::-1]
+
+    @staticmethod
+    def reverse_paragraph_order(content):
+        """反转段落顺序（保留段落内文字顺序）"""
+        if not content:
+            return content
+        paragraphs = content.split('\n')
+        return '\n'.join(reversed(paragraphs))
+
+    @staticmethod
+    def apply_char_errors(text: str) -> str:
+        """应用错别字处理"""
+        if not config.enable_char_errors or not text:
+            return text
+
+        try:
+            modified_text, replace_count, _, _ = introduce_char_errors(
+                text,
+                config.char_error_intensity,
+                config.char_error_db_path
+            )
+            print(f"已应用错别字处理，替换了 {replace_count} 个字符。")
+            return modified_text
+        except Exception as e:
+            # 如果错别字处理出错，返回原文本
+            print(f"错别字处理出错: {e}")
+            return text
+
+    @staticmethod
+    def process_text_content(text):
+        """统一处理文字内容：顺序调换、错别字处理和标点符号替换"""
+        if not text or not text.strip():
+            return text
+
+        # 先进行文字顺序处理
+        if config.reverse_text_order:
+            text = replace_text(text)
+
+        # 应用错别字处理
+        text = TextProcessor.apply_char_errors(text)
+
+        # 最后进行标点符号替换
+        if config.replace_punctuation:
+            text = TextProcessor.replace_periods(text)
+
+        return text
+
+
+# 增强的Markdown解析器
+class MarkdownParser:
+    # Markdown格式匹配模式
+    PATTERNS = {
+        'heading': re.compile(r'^(\s*)(#{1,6})\s+(.+)$'),
+        'bold_asterisk': re.compile(r'\*\*(.+?)\*\*'),
+        'bold_underscore': re.compile(r'__(.+?)__'),
+        'italic_asterisk': re.compile(r'(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)'),
+        'italic_underscore': re.compile(r'_(.+?)_'),
+        'code_inline': re.compile(r'`([^`]+)`'),
+        'code_block': re.compile(r'^```(\w+)?\s*\n(.*?)\n```', re.MULTILINE | re.DOTALL),
+        'strikethrough': re.compile(r'~~(.+?)~~'),
+        'link': re.compile(r'\[([^\]]+)\]\(([^)]+)\)'),
+        'image': re.compile(r'!\[([^\]]*)\]\(([^)]+)\)'),
+        'unordered_list': re.compile(r'^\s*[-*+]\s+(.+)$'),
+        'ordered_list': re.compile(r'^\s*\d+\.\s+(.+)$'),
+        'blockquote': re.compile(r'^\s*>\s*(.+)$'),
+        'horizontal_rule': re.compile(r'^(\s*[-*_]){3,}\s*$'),
+        'table_row': re.compile(r'^\|(.+)\|$'),
+        'table_separator': re.compile(r'^\|(\s*:?-+:?\s*\|)+$')
+    }
+
+    @staticmethod
+    def parse(txt_content):
+        """解析Markdown内容为结构化数据"""
+        elements = []
+        lines = txt_content.split('\n')
+        i = 0
+        current_section = None
+        in_code_block = False
+        code_block_content = []
+        table_mode = False
+        table_rows = []
+
+        while i < len(lines):
+            line = lines[i].rstrip('\r')
+            original_line = line
+
+            # 处理代码块
+            if line.strip().startswith('```'):
+                if not in_code_block:
+                    in_code_block = True
+                    language = line.strip()[3:].strip()
+                    code_block_content = []
+                    i += 1
+                    continue
+                else:
+                    in_code_block = False
+                    elements.append({
+                        'type': 'code_block',
+                        'language': language if 'language' in locals() else '',
+                        'content': '\n'.join(code_block_content),
+                        'level': 0
+                    })
+                    code_block_content = []
+                    i += 1
+                    continue
+
+            if in_code_block:
+                code_block_content.append(line)
+                i += 1
+                continue
+
+            # 处理表格
+            table_match = MarkdownParser.PATTERNS['table_row'].match(line)
+            table_sep_match = MarkdownParser.PATTERNS['table_separator'].match(line)
+
+            if table_match or table_sep_match:
+                if not table_mode:
+                    table_mode = True
+                    table_rows = []
+
+                if table_match and not table_sep_match:
+                    cells = [cell.strip() for cell in table_match.group(1).split('|')]
+                    table_rows.append(cells)
+
+                i += 1
+                continue
+            elif table_mode:
+                # 表格结束
+                if table_rows:
+                    elements.append({
+                        'type': 'table',
+                        'rows': table_rows,
+                        'level': 0
+                    })
+                table_mode = False
+                table_rows = []
+
+            # 处理标题
+            heading_match = MarkdownParser.PATTERNS['heading'].match(line)
+            if heading_match:
+                level = len(heading_match.group(2))
+                if level <= config.title_levels:
+                    # 提取标题文本（可能包含粗体等格式）
+                    heading_text = heading_match.group(3).strip()
+                    # 先移除Markdown标记但保留文本内容
+                    cleaned_text = re.sub(r'\*\*(.+?)\*\*|__(.+?)__', r'\1\2', heading_text)
+                    elements.append({
+                        'type': 'heading',
+                        'level': level,
+                        'content': heading_text,  # 保留原始内容用于格式处理
+                        'cleaned_content': cleaned_text  # 用于显示的纯文本
+                    })
+                    current_section = elements[-1]
+                    current_section['paragraphs'] = []
+                i += 1
+                continue
+
+            # 处理水平分隔线
+            if MarkdownParser.PATTERNS['horizontal_rule'].match(line):
+                elements.append({
+                    'type': 'horizontal_rule',
+                    'level': 0
+                })
+                i += 1
+                continue
+
+            # 处理列表
+            ul_match = MarkdownParser.PATTERNS['unordered_list'].match(line)
+            ol_match = MarkdownParser.PATTERNS['ordered_list'].match(line)
+
+            if ul_match:
+                elements.append({
+                    'type': 'unordered_list',
+                    'content': ul_match.group(1),
+                    'level': 0
+                })
+                i += 1
+                continue
+
+            if ol_match:
+                elements.append({
+                    'type': 'ordered_list',
+                    'content': ol_match.group(1),
+                    'level': 0
+                })
+                i += 1
+                continue
+
+            # 处理引用
+            quote_match = MarkdownParser.PATTERNS['blockquote'].match(line)
+            if quote_match:
+                elements.append({
+                    'type': 'blockquote',
+                    'content': quote_match.group(1),
+                    'level': 0
+                })
+                i += 1
+                continue
+
+            # 处理空行
+            if line.strip() == '':
+                elements.append({
+                    'type': 'empty',
+                    'content': '',
+                    'level': 0
+                })
+                i += 1
+                continue
+
+            # 处理普通段落
+            elements.append({
+                'type': 'paragraph',
+                'content': line,
+                'level': 0
+            })
+
+            i += 1
+
+        # 处理剩余的表格
+        if table_mode and table_rows:
+            elements.append({
+                'type': 'table',
+                'rows': table_rows,
+                'level': 0
+            })
+
+        return MarkdownParser.group_by_sections(elements)
+
+    @staticmethod
+    def group_by_sections(elements):
+        """将解析的元素按标题分组"""
+        sections = []
+        current_section = {
+            'type': 'section',
+            'level': 0,
+            'content': '前置内容',
+            'elements': []
+        }
+
+        for element in elements:
+            if element['type'] == 'heading':
+                # 保存当前section
+                if current_section['elements']:
+                    sections.append(current_section)
+
+                # 创建新section
+                current_section = {
+                    'type': 'section',
+                    'level': element['level'],
+                    'content': element['content'],
+                    'elements': []
+                }
+            else:
+                current_section['elements'].append(element)
+
+        # 添加最后一个section
+        if current_section['elements']:
+            sections.append(current_section)
+
+        return sections
+
+    @staticmethod
+    def extract_inline_formatting(text):
+        """提取行内格式信息"""
+        formatting = []
+
+        # 提取粗体 (**)
+        for match in MarkdownParser.PATTERNS['bold_asterisk'].finditer(text):
+            formatting.append({
+                'type': 'bold',
+                'start': match.start(),
+                'end': match.end(),
+                'content': match.group(1)
+            })
+
+        # 提取粗体 (__)
+        for match in MarkdownParser.PATTERNS['bold_underscore'].finditer(text):
+            formatting.append({
+                'type': 'bold',
+                'start': match.start(),
+                'end': match.end(),
+                'content': match.group(1)
+            })
+
+        # 提取斜体 (*)
+        for match in MarkdownParser.PATTERNS['italic_asterisk'].finditer(text):
+            # 检查是否与粗体重叠
+            overlaps = any(f['start'] <= match.start() < f['end'] or f['start'] < match.end() <= f['end']
+                           for f in formatting if f['type'] == 'bold')
+            if not overlaps:
+                formatting.append({
+                    'type': 'italic',
+                    'start': match.start(),
+                    'end': match.end(),
+                    'content': match.group(1)
+                })
+
+        # 提取斜体 (_)
+        for match in MarkdownParser.PATTERNS['italic_underscore'].finditer(text):
+            overlaps = any(f['start'] <= match.start() < f['end'] or f['start'] < match.end() <= f['end']
+                           for f in formatting if f['type'] in ['bold', 'italic'])
+            if not overlaps:
+                formatting.append({
+                    'type': 'italic',
+                    'start': match.start(),
+                    'end': match.end(),
+                    'content': match.group(1)
+                })
+
+        # 提取行内代码
+        for match in MarkdownParser.PATTERNS['code_inline'].finditer(text):
+            formatting.append({
+                'type': 'code',
+                'start': match.start(),
+                'end': match.end(),
+                'content': match.group(1)
+            })
+
+        # 提取删除线
+        for match in MarkdownParser.PATTERNS['strikethrough'].finditer(text):
+            formatting.append({
+                'type': 'strikethrough',
+                'start': match.start(),
+                'end': match.end(),
+                'content': match.group(1)
+            })
+
+        # 提取链接
+        for match in MarkdownParser.PATTERNS['link'].finditer(text):
+            formatting.append({
+                'type': 'link',
+                'start': match.start(),
+                'end': match.end(),
+                'text': match.group(1),
+                'url': match.group(2)
+            })
+
+        # 按位置排序
+        formatting.sort(key=lambda x: x['start'])
+        return formatting
+
+
+# 文件处理模块
+class FileHandler:
+    @staticmethod
+    def scan_txt_files(folder_path):
+        """扫描文件夹中的所有TXT文件"""
+        if not os.path.isdir(folder_path):
+            raise Exception(f"TXT文件夹不存在: {folder_path}")
+
+        txt_files = []
+        for root, dirs, files in os.walk(folder_path):
+            for file in files:
+                if file.lower().endswith(".txt"):
+                    txt_path = os.path.join(root, file)
+                    file_name = os.path.splitext(file)[0]
+                    txt_files.append({
+                        "path": txt_path,
+                        "name": file_name,
+                        "relative_path": os.path.relpath(txt_path, folder_path),
+                        "folder": root
+                    })
+
+        if not txt_files:
+            raise Exception(f"在 {folder_path} 中未找到任何TXT文件")
+
+        return sorted(txt_files, key=lambda x: x["relative_path"])
+
+    @staticmethod
+    def find_matching_image_folders(txt_files, images_root):
+        """根据TXT文件名匹配图片文件夹"""
+        if not os.path.isdir(images_root):
+            raise Exception(f"图片根文件夹不存在: {images_root}")
+
+        all_image_folders = []
+        for root, dirs, _ in os.walk(images_root):
+            for dir in dirs:
+                folder_path = os.path.join(root, dir)
+                all_image_folders.append({
+                    "path": folder_path,
+                    "name": dir,
+                    "relative_path": os.path.relpath(folder_path, images_root)
+                })
+
+        matched_pairs = []
+        for txt in txt_files:
+            matches = []
+            txt_name = txt["name"].lower()
+
+            for img_folder in all_image_folders:
+                folder_name = img_folder["name"].lower()
+
+                if config.match_pattern == "exact" and txt_name == folder_name:
+                    matches.append(img_folder)
+                elif config.match_pattern == "prefix" and folder_name.startswith(txt_name):
+                    matches.append(img_folder)
+                elif config.match_pattern == "contains" and txt_name in folder_name:
+                    matches.append(img_folder)
+
+            if matches:
+                matches.sort(key=lambda x: len(x["relative_path"]))
+                matched_pairs.append({
+                    "txt": txt,
+                    "image_folder": matches[0],
+                    "all_matches": matches
+                })
+            else:
+                matched_pairs.append({
+                    "txt": txt,
+                    "image_folder": None,
+                    "all_matches": []
+                })
+
+        return matched_pairs
+
+    @staticmethod
+    def get_image_files(folder_path):
+        """获取文件夹中的所有图片文件"""
+        if not folder_path or not os.path.isdir(folder_path):
+            return []
+
+        image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.gif', '*.webp', '*.tiff']
+        image_files = []
+
+        for ext in image_extensions:
+            image_files.extend(glob.glob(os.path.join(folder_path, ext)))
+
+        if config.image_sort_by == "name":
+            image_files.sort()
+        elif config.image_sort_by == "time":
+            image_files.sort(key=lambda x: os.path.getmtime(x))
+
+        return image_files
+
+    @staticmethod
+    def read_markdown_txt(file_path):
+        """读取含Markdown内容的TXT文件"""
+        if not os.path.exists(file_path):
+            raise Exception(f"TXT文件不存在: {file_path}")
+
+        encodings = [config.txt_encoding, "gbk", "utf-16", "iso-8859-1"]
+        for encoding in encodings:
+            try:
+                with open(file_path, 'r', encoding=encoding) as f:
+                    content = f.read()
+                content = content.replace("\r\n", "\n").replace("\r", "\n")
+                return content
+            except UnicodeDecodeError:
+                continue
+
+        raise Exception(f"无法解析TXT文件（编码问题）: {file_path}")
+
+    @staticmethod
+    def prepare_output_path(txt_info, images_root, output_root):
+        """准备输出文件路径"""
+        if config.output_location == "txt_folder":
+            base_folder = txt_info["folder"]
+        else:
+            base_folder = output_root
+
+        os.makedirs(base_folder, exist_ok=True)
+
+        txt_name = txt_info["name"]
+        output_path = os.path.join(base_folder, f"{txt_name}.docx")
+
+        counter = 1
+        while os.path.exists(output_path):
+            output_path = os.path.join(base_folder, f"{txt_name}_{counter}.docx")
+            counter += 1
+
+        return output_path
+
+
+# 图片处理模块
+class ImageProcessor:
+    @staticmethod
+    def process_image(image_path):
+        """处理图片"""
+        try:
+            with Image.open(image_path) as img:
+                # 处理图片方向
+                if hasattr(img, '_getexif'):
+                    exif = img._getexif()
+                    if exif:
+                        orientation_tag = 274
+                        if orientation_tag in exif:
+                            orientation = exif[orientation_tag]
+                            if orientation == 3:
+                                img = img.rotate(180, expand=True)
+                            elif orientation == 6:
+                                img = img.rotate(270, expand=True)
+                            elif orientation == 8:
+                                img = img.rotate(90, expand=True)
+
+                # 调整大小
+                if config.image_resize == "width" and config.image_width > 0:
+                    target_width_px = config.image_width * 96
+                    width, height = img.size
+
+                    if width > target_width_px:
+                        ratio = target_width_px / width
+                        new_height = int(height * ratio)
+                        img = img.resize((int(target_width_px), new_height), Image.LANCZOS)
+
+                    return img, config.image_width
+                else:
+                    width_in = img.width / 96
+                    return img, width_in
+        except Exception as e:
+            raise Exception(f"处理图片失败 {image_path}: {str(e)}")
+
+    @staticmethod
+    def get_image_alignment():
+        """获取图片对齐方式"""
+        if config.image_alignment == "left":
+            return WD_ALIGN_PARAGRAPH.LEFT
+        elif config.image_alignment == "right":
+            return WD_ALIGN_PARAGRAPH.RIGHT
+        else:
+            return WD_ALIGN_PARAGRAPH.CENTER
+
+
+DISCLAIMER_TEXT = """`[免责声明]文章的时间、过程、图片均来自于网络，文章旨在传播正能量，均无低俗等不良引导，请观众勿对号入座，并上升到人身攻击等方面。观众理性看待本事件，切勿留下主观臆断的恶意评论，互联网不是法外之地。本文如若真实性存在争议、事件版权或图片侵权问题，请及时联系作者，我们将予以删除。`"""
+
+
+# DOCX生成模块 - 完全重构
+class DocxGenerator:
+    @staticmethod
+    def generate(sections, image_files, output_path, progress_callback=None):
+        """生成DOCX文档 - 重构版本"""
+        doc = Document()
+        total_sections = len(sections)
+        image_index = 0
+        image_count = len(image_files)
+
+        for i, section in enumerate(sections):
+            if progress_callback:
+                progress = int((i / total_sections) * 100)
+                progress_callback(progress, f"处理章节: {section['content'][:30]}...")
+
+            # 添加标题
+            if section['level'] > 0 and section['level'] <= config.title_levels:
+                # 使用原始带格式的内容进行处理
+                heading_text = TextProcessor.process_text_content(section['content'])
+                # 创建标题段落
+                para = doc.add_heading(level=section['level'])
+                # 应用行内格式（包括粗体）
+                DocxGenerator.apply_inline_formatting(para, heading_text)
+            elif section['content'] != '前置内容':
+                heading_text = TextProcessor.process_text_content(section['content'])
+                para = doc.add_paragraph()
+                run = para.add_run(heading_text)
+                run.font.size = Pt(14)
+                run.font.bold = True
+                para.space_after = Pt(12)
+
+            # 处理章节中的元素
+            elements = section.get('elements', [])
+            if not elements:
+                continue
+
+            # 处理第一个非空元素后插入图片
+            first_content_added = False
+
+            for element in elements:
+                # 添加元素到文档
+                DocxGenerator.add_element_to_doc(doc, element)
+
+                # 在第一个内容元素后插入图片
+                if not first_content_added and element['type'] not in ['empty']:
+                    first_content_added = True
+
+                    # 插入图片
+                    if image_count > 0 and image_index < image_count:
+                        try:
+                            DocxGenerator.insert_image(doc, image_files[image_index], output_path)
+                            image_index += 1
+
+                            if image_index >= image_count:
+                                if config.image_strategy == "cycle":
+                                    image_index = 0
+                                elif config.image_strategy == "truncate":
+                                    image_index = image_count
+
+                        except Exception as e:
+                            doc.add_paragraph(f"[图片插入失败: {str(e)}]")
+
+        # 添加免责声明
+        if config.add_disclaimer:
+            doc.add_paragraph("---")
+            para = doc.add_paragraph()
+            disclaimer_text = TextProcessor.process_text_content(DISCLAIMER_TEXT)
+            run = para.add_run(disclaimer_text)
+            run.font.size = Pt(10)
+            para.paragraph_format.line_spacing = 1.0
+
+        try:
+            doc.save(output_path)
+            if progress_callback:
+                progress_callback(100, "转换完成!")
+            return True
+        except Exception as e:
+            raise Exception(f"保存DOCX失败: {str(e)}")
+
+    @staticmethod
+    def add_element_to_doc(doc, element):
+        """将解析的元素添加到文档中"""
+        etype = element['type']
+        content = TextProcessor.process_text_content(element.get('content', ''))
+
+        if etype == 'paragraph':
+            DocxGenerator.add_formatted_paragraph(doc, content)
+
+        elif etype == 'unordered_list':
+            para = doc.add_paragraph(style='List Bullet')
+            DocxGenerator.apply_inline_formatting(para, content)
+
+        elif etype == 'ordered_list':
+            para = doc.add_paragraph(style='List Number')
+            DocxGenerator.apply_inline_formatting(para, content)
+
+        elif etype == 'blockquote':
+            para = doc.add_paragraph(style='Quote')
+            DocxGenerator.apply_inline_formatting(para, content)
+
+        elif etype == 'code_block':
+            para = doc.add_paragraph(style='No Spacing')
+            run = para.add_run(element['content'])
+            run.font.name = 'Courier New'
+            run.font.size = Pt(10)
+
+        elif etype == 'table':
+            DocxGenerator.add_table_to_doc(doc, element['rows'])
+
+        elif etype == 'horizontal_rule':
+            DocxGenerator.add_horizontal_rule(doc)
+
+        elif etype == 'empty':
+            doc.add_paragraph()
+
+    @staticmethod
+    def add_horizontal_rule(doc):
+        """在文档中添加横线"""
+        para = doc.add_paragraph()
+        run = para.add_run()
+        # 添加水平线条（使用下划线作为横线）
+        run.font.underline = True
+        run.text = " " * 100  # 足够长的下划线作为横线
+        para.alignment = WD_ALIGN_PARAGRAPH.CENTER
+
+    @staticmethod
+    def add_table_to_doc(doc, rows):
+        """添加表格到文档"""
+        if not rows:
+            return
+
+        table = doc.add_table(rows=len(rows), cols=len(rows[0]))
+        table.style = 'Table Grid'
+
+        for i, row_data in enumerate(rows):
+            row_cells = table.rows[i].cells
+            for j, cell_data in enumerate(row_data):
+                if j < len(row_cells):
+                    # 处理单元格内容的格式和文字处理
+                    processed_text = TextProcessor.process_text_content(cell_data)
+                    row_cells[j].text = processed_text
+
+    @staticmethod
+    def insert_image(doc, image_path, output_path):
+        """插入图片到文档"""
+        img, width = ImageProcessor.process_image(image_path)
+
+        temp_img_path = None
+        if config.image_resize == "width":
+            temp_dir = os.path.dirname(output_path)
+            os.makedirs(temp_dir, exist_ok=True)
+            temp_img_path = os.path.join(temp_dir, f"temp_img_{hash(image_path)}.png")
+            img.save(temp_img_path)
+            img_path = temp_img_path
+        else:
+            img_path = image_path
+
+        para = doc.add_paragraph()
+        run = para.runs[0] if para.runs else para.add_run()
+        run.add_picture(img_path, width=Inches(width))
+        para.alignment = ImageProcessor.get_image_alignment()
+
+        if temp_img_path and os.path.exists(temp_img_path):
+            try:
+                os.remove(temp_img_path)
+            except:
+                pass  # 忽略删除临时文件的错误
+
+    @staticmethod
+    def add_formatted_paragraph(doc, content):
+        """添加带格式的段落"""
+        if not content or not content.strip():
+            doc.add_paragraph()
+            return
+
+        para = doc.add_paragraph()
+        DocxGenerator.apply_inline_formatting(para, content)
+
+        if config.line_spacing > 0:
+            para.paragraph_format.line_spacing = config.line_spacing
+
+    @staticmethod
+    def apply_inline_formatting(paragraph, text):
+        """应用行内格式到段落"""
+        # 首先处理文字内容（顺序调换、错别字和标点符号替换）
+        processed_text = TextProcessor.process_text_content(text)
+
+        # 重新提取格式信息（因为文字可能已经改变）
+        formatting = MarkdownParser.extract_inline_formatting(processed_text)
+
+        # 如果没有格式，直接添加文本
+        if not formatting:
+            paragraph.add_run(processed_text)
+            return
+
+        current_pos = 0
+
+        for fmt in formatting:
+            # 添加格式前的普通文本
+            if fmt['start'] > current_pos:
+                paragraph.add_run(processed_text[current_pos:fmt['start']])
+
+            # 创建格式化的run
+            if fmt['type'] == 'bold':
+                # 移除markdown标记并应用格式
+                clean_text = re.sub(r'\*\*(.+?)\*\*|__(.+?)__', r'\1\2', processed_text[fmt['start']:fmt['end']])
+                run = paragraph.add_run(clean_text)
+                run.bold = True
+
+            elif fmt['type'] == 'italic':
+                clean_text = re.sub(r'(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)|_(.+?)_', r'\1\2',
+                                    processed_text[fmt['start']:fmt['end']])
+                run = paragraph.add_run(clean_text)
+                run.italic = True
+
+            elif fmt['type'] == 'code':
+                clean_text = re.sub(r'`([^`]+)`', r'\1', processed_text[fmt['start']:fmt['end']])
+                run = paragraph.add_run(clean_text)
+                run.font.name = 'Courier New'
+                run.font.size = Pt(10)
+
+            elif fmt['type'] == 'strikethrough':
+                clean_text = re.sub(r'~~(.+?)~~', r'\1', processed_text[fmt['start']:fmt['end']])
+                run = paragraph.add_run(clean_text)
+                run.font.strike = True
+
+            elif fmt['type'] == 'link':
+                # 对于链接，只显示链接文本
+                run = paragraph.add_run(fmt['text'])
+                run.font.color.rgb = RGBColor(0, 0, 255)  # 蓝色
+                run.underline = True
+
+            current_pos = fmt['end']
+
+        # 添加剩余的普通文本
+        if current_pos < len(processed_text):
+            paragraph.add_run(processed_text[current_pos:])
+
+
+# 批量处理模块
+class BatchProcessor:
+    @staticmethod
+    def process_batch(matched_pairs, output_root, progress_callback=None):
+        """批量处理匹配的文件对"""
+        total = len(matched_pairs)
+        success_count = 0
+        failed_items = []
+
+        for i, pair in enumerate(matched_pairs):
+            try:
+                if progress_callback:
+                    overall_progress = int((i / total) * 100)
+                    progress_callback(overall_progress,
+                                      f"处理 {i + 1}/{total}: {pair['txt']['name']}")
+
+                # 准备输出路径
+                output_path = FileHandler.prepare_output_path(
+                    pair['txt'],
+                    pair['image_folder']['path'] if pair['image_folder'] else "",
+                    output_root
+                )
+
+                # 读取TXT内容
+                txt_content = FileHandler.read_markdown_txt(pair['txt']['path'])
+
+                # 解析内容为结构化数据
+                sections = MarkdownParser.parse(txt_content)
+
+                if not sections:
+                    raise Exception("未解析到有效内容")
+
+                # 获取图片文件
+                image_files = []
+                if pair['image_folder']:
+                    image_files = FileHandler.get_image_files(pair['image_folder']['path'])
+
+                # 生成DOCX
+                def update_file_progress(progress, text):
+                    if progress_callback:
+                        sub_progress = int((i + progress / 100) / total * 100)
+                        progress_callback(sub_progress, f"{pair['txt']['name']}: {text}")
+
+                DocxGenerator.generate(sections, image_files, output_path, update_file_progress)
+                success_count += 1
+
+            except Exception as e:
+                failed_items.append({
+                    "name": pair['txt']['name'],
+                    "error": str(e)
+                })
+
+        # 确定主要输出文件夹
+        if matched_pairs and success_count > 0:
+            sample_output = FileHandler.prepare_output_path(matched_pairs[0]['txt'], "", output_root)
+            main_output_folder = os.path.dirname(sample_output)
+        else:
+            main_output_folder = ""
+
+        return {
+            "total": total,
+            "success": success_count,
+            "failed": len(failed_items),
+            "failed_items": failed_items,
+            "main_output_folder": main_output_folder
+        }
+
+
+# 配置窗口 - 优化排版
+def show_config_window():
+    """显示配置窗口 - 优化排版"""
+    # 创建标签页布局
+    tab_file_layout = [
+        [sg.Text('文件处理设置', font=('bold', 12))],
+        [sg.HSeparator()],
+        [sg.Text('TXT编码:', size=(12, 1)),
+         sg.Combo(['utf-8', 'gbk', 'utf-16'], default_value=config.txt_encoding, key='txt_encoding', size=(15, 1))],
+        [sg.Text('匹配模式:', size=(12, 1))],
+        [sg.Radio('完全匹配（文件名与文件夹名相同）', 'match', default=config.match_pattern == "exact",
+                  key='match_exact')],
+        [sg.Radio('前缀匹配', 'match', default=config.match_pattern == "prefix", key='match_prefix')],
+        [sg.Radio('包含匹配', 'match', default=config.match_pattern == "contains", key='match_contains')],
+        [sg.HSeparator()],
+        [sg.Text('输出位置:', size=(12, 1))],
+        [sg.Radio('输出到TXT文件所在文件夹', 'output_loc', default=config.output_location == "txt_folder",
+                  key='output_txt_folder')],
+        [sg.Radio('输出到指定文件夹', 'output_loc', default=config.output_location == "custom", key='output_custom')]
+    ]
+
+    tab_text_layout = [
+        [sg.Text('文字处理设置', font=('bold', 12))],
+        [sg.HSeparator()],
+        [sg.Checkbox('转换文字顺序', key='-REVERSE_TEXT-', default=config.reverse_text_order)],
+        [sg.Checkbox('替换标点符号（句号转逗号，保留结尾句号）', key='-REPLACE_PUNCTUATION-',
+                     default=config.replace_punctuation)],
+        [sg.HSeparator()],
+        [sg.Text('错别字处理', font=('bold', 11), text_color='darkblue')],
+        [sg.Checkbox('启用错别字处理', key='-ENABLE_CHAR_ERRORS-', default=config.enable_char_errors,
+                     enable_events=True)],
+        [sg.Text('错误强度:', size=(10, 1)),
+         sg.Slider(range=(0.0, 1.0), default_value=config.char_error_intensity, resolution=0.1,
+                   orientation='h', size=(20, 15), key='char_error_intensity', disabled=not config.enable_char_errors)],
+        [sg.Text('错别字库路径:', size=(12, 1)),
+         sg.InputText(config.char_error_db_path, key='char_error_db_path', size=(30, 1),
+                      disabled=not config.enable_char_errors),
+         sg.FileBrowse('浏览', file_types=(("JSON Files", "*.json"),), disabled=not config.enable_char_errors)],
+        [sg.HSeparator()],
+        [sg.Checkbox('添加免责声明', key='-ADD_DISCLAIMER-', default=config.add_disclaimer)]
+    ]
+
+    tab_image_layout = [
+        [sg.Text('图片处理设置', font=('bold', 12))],
+        [sg.HSeparator()],
+        [sg.Text('图片排序方式:', size=(12, 1))],
+        [sg.Radio('按名称', 'sort', default=config.image_sort_by == "name", key='sort_name'),
+         sg.Radio('按修改时间', 'sort', default=config.image_sort_by == "time", key='sort_time')],
+        [sg.HSeparator()],
+        [sg.Text('图片尺寸调整:', size=(12, 1))],
+        [sg.Radio('不调整', 'resize', default=config.image_resize == "none", key='resize_none')],
+        [sg.Radio('按宽度:', 'resize', default=config.image_resize == "width", key='resize_width'),
+         sg.InputText(str(config.image_width), size=(8, 1), key='image_width'),
+         sg.Text('英寸')],
+        [sg.HSeparator()],
+        [sg.Text('图片对齐方式:', size=(12, 1))],
+        [sg.Radio('左对齐', 'align', default=config.image_alignment == "left", key='align_left'),
+         sg.Radio('居中', 'align', default=config.image_alignment == "center", key='align_center'),
+         sg.Radio('右对齐', 'align', default=config.image_alignment == "right", key='align_right')],
+        [sg.HSeparator()],
+        [sg.Text('图片不足时策略:', size=(12, 1))],
+        [sg.Radio('循环使用', 'strategy', default=config.image_strategy == "cycle", key='strategy_cycle')],
+        [sg.Radio('忽略多余标题', 'strategy', default=config.image_strategy == "truncate", key='strategy_truncate')],
+        [sg.Radio('重复最后一张', 'strategy', default=config.image_strategy == "repeat_last", key='strategy_repeat')]
+    ]
+
+    tab_format_layout = [
+        [sg.Text('文档格式设置', font=('bold', 12))],
+        [sg.HSeparator()],
+        [sg.Text('行间距:', size=(12, 1)),
+         sg.InputText(str(config.line_spacing), size=(8, 1), key='line_spacing')],
+        [sg.Text('最大标题层级:', size=(12, 1)),
+         sg.Combo([1, 2, 3, 4, 5, 6], default_value=config.title_levels, key='title_levels', size=(8, 1))]
+    ]
+
+    layout = [
+        [sg.TabGroup([
+            [sg.Tab('文件处理', tab_file_layout, key='tab_file')],
+            [sg.Tab('文字处理', tab_text_layout, key='tab_text')],
+            [sg.Tab('图片处理', tab_image_layout, key='tab_image')],
+            [sg.Tab('文档格式', tab_format_layout, key='tab_format')]
+        ])],
+        [sg.HSeparator()],
+        [sg.Button('确定', size=(10, 1)), sg.Button('取消', size=(10, 1)), sg.Button('重置为默认', size=(12, 1))]
+    ]
+
+    window = sg.Window('转换设置', layout, modal=True, resizable=True, size=(500, 450))
+
+    while True:
+        event, values = window.read()
+
+        if event in (sg.WIN_CLOSED, '取消'):
+            break
+
+        # 处理错别字启用/禁用事件
+        if event == '-ENABLE_CHAR_ERRORS-':
+            enabled = values['-ENABLE_CHAR_ERRORS-']
+            window['char_error_intensity'].update(disabled=not enabled)
+            window['char_error_db_path'].update(disabled=not enabled)
+
+        if event == '重置为默认':
+            # 重置为默认值
+            default_config = Config()
+            window['txt_encoding'].update(default_config.txt_encoding)
+            window['match_exact'].update(True)
+            window['output_txt_folder'].update(True)
+            window['-REVERSE_TEXT-'].update(default_config.reverse_text_order)
+            window['-REPLACE_PUNCTUATION-'].update(default_config.replace_punctuation)
+            window['-ENABLE_CHAR_ERRORS-'].update(default_config.enable_char_errors)
+            window['char_error_intensity'].update(default_config.char_error_intensity)
+            window['char_error_db_path'].update(default_config.char_error_db_path)
+            window['-ADD_DISCLAIMER-'].update(default_config.add_disclaimer)
+            window['sort_name'].update(True)
+            window['resize_none'].update(True)
+            window['image_width'].update(str(default_config.image_width))
+            window['align_center'].update(True)
+            window['strategy_cycle'].update(True)
+            window['line_spacing'].update(str(default_config.line_spacing))
+            window['title_levels'].update(default_config.title_levels)
+
+        if event == '确定':
+            # 保存配置
+            config.txt_encoding = values['txt_encoding']
+
+            if values['match_exact']:
+                config.match_pattern = "exact"
+            elif values['match_prefix']:
+                config.match_pattern = "prefix"
+            else:
+                config.match_pattern = "contains"
+
+            config.output_location = "txt_folder" if values['output_txt_folder'] else "custom"
+            config.image_sort_by = "name" if values['sort_name'] else "time"
+            config.image_resize = "none" if values['resize_none'] else "width"
+            config.reverse_text_order = values['-REVERSE_TEXT-']
+            config.replace_punctuation = values['-REPLACE_PUNCTUATION-']
+            config.add_disclaimer = values['-ADD_DISCLAIMER-']
+
+            # 错别字处理配置
+            config.enable_char_errors = values['-ENABLE_CHAR_ERRORS-']
+            config.char_error_intensity = values['char_error_intensity']
+            config.char_error_db_path = values['char_error_db_path']
+
+            try:
+                config.image_width = float(values['image_width'])
+            except:
+                pass
+
+            if values['align_left']:
+                config.image_alignment = "left"
+            elif values['align_right']:
+                config.image_alignment = "right"
+            else:
+                config.image_alignment = "center"
+
+            if values['strategy_cycle']:
+                config.image_strategy = "cycle"
+            elif values['strategy_truncate']:
+                config.image_strategy = "truncate"
+            else:
+                config.image_strategy = "repeat_last"
+
+            try:
+                config.line_spacing = float(values['line_spacing'])
+                config.title_levels = int(values['title_levels'])
+            except:
+                pass
+
+            config.save_to_file(CONFIG_FILE_PATH)
+            break
+
+    window.close()
+
+
+# 匹配编辑窗口
+def show_matching_editor(matched_pairs, images_root):
+    """显示匹配编辑窗口，允许手动调整匹配关系"""
+    all_image_folders = []
+    if os.path.isdir(images_root):
+        for root, dirs, _ in os.walk(images_root):
+            for dir in dirs:
+                folder_path = os.path.join(root, dir)
+                rel_path = os.path.relpath(folder_path, images_root)
+                all_image_folders.append((folder_path, rel_path))
+
+    table_data = []
+    for i, pair in enumerate(matched_pairs):
+        txt_name = pair['txt']['name']
+        img_folder = pair['image_folder']['relative_path'] if pair['image_folder'] else "无匹配"
+        table_data.append([i, txt_name, img_folder])
+
+    layout = [
+        [sg.Text('文件匹配编辑', font=('bold', 14))],
+        [sg.Text('选择要修改的项目，然后从右侧选择图片文件夹')],
+        [
+            sg.Table(
+                values=table_data,
+                headings=['序号', 'TXT文件名', '匹配的图片文件夹'],
+                key='-TABLE-',
+                select_mode=sg.TABLE_SELECT_MODE_BROWSE,
+                enable_events=True,
+                justification='left',
+                size=(None, 15)
+            ),
+            sg.VSeparator(),
+            sg.Listbox(
+                values=[f[1] for f in all_image_folders],
+                key='-FOLDERS-',
+                size=(40, 15),
+                enable_events=True
+            )
+        ],
+        [sg.Button('设置选中项'), sg.Button('清除选中项'), sg.Button('应用所有')]
+    ]
+
+    window = sg.Window('匹配编辑', layout, resizable=True)
+    selected_row = None
+
+    while True:
+        event, values = window.read()
+
+        if event in (sg.WIN_CLOSED, '应用所有'):
+            break
+
+        if event == '-TABLE-':
+            if values['-TABLE-']:
+                selected_row = values['-TABLE-'][0]
+
+        if event == '设置选中项' and selected_row is not None and values['-FOLDERS-']:
+            folder_idx = [i for i, f in enumerate(all_image_folders) if f[1] == values['-FOLDERS-'][0]][0]
+            folder_path, folder_rel = all_image_folders[folder_idx]
+
+            matched_pairs[selected_row]['image_folder'] = {
+                "path": folder_path,
+                "name": os.path.basename(folder_path),
+                "relative_path": folder_rel
+            }
+
+            table_data[selected_row][2] = folder_rel
+            window['-TABLE-'].update(values=table_data)
+
+        if event == '清除选中项' and selected_row is not None:
+            matched_pairs[selected_row]['image_folder'] = None
+            table_data[selected_row][2] = "无匹配"
+            window['-TABLE-'].update(values=table_data)
+
+    window.close()
+    return matched_pairs
+
+
+# 帮助窗口
+def show_help_window():
+    """显示帮助窗口"""
+    help_text = """
+批量Markdown TXT转DOCX工具使用说明:
+
+1. 选择包含Markdown内容的TXT文件所在文件夹
+2. 选择图片文件夹的根目录（程序会自动查找子文件夹）
+3. 选择输出文件的保存根目录（当选择"输出到指定文件夹"时有效）
+4. 点击"扫描文件"按钮，程序会自动匹配TXT文件和图片文件夹
+5. 查看匹配结果，可点击"编辑匹配"调整匹配关系
+6. 点击"开始批量转换"生成DOCX文件
+
+支持的Markdown格式:
+- 标题：# ## ### #### ##### ######
+- 粗体：**文字** 或 __文字__
+- 斜体：*文字* 或 _文字_
+- 行内代码：`代码`
+- 代码块：```语言\\n代码\\n```
+- 删除线：~~文字~~
+- 链接：[链接文字](URL)
+- 图片：![图片描述](图片路径)
+- 无序列表：- 或 * 或 +
+- 有序列表：1. 2. 3.
+- 引用：> 引用内容
+- 表格：| 列1 | 列2 |
+- 水平分隔线：--- 或 *** 或 ___
+
+文字处理功能:
+- 转换文字顺序：将文字内容进行特定转换处理
+- 错别字处理：可以按设定强度引入常见的错别字，用于测试或特殊用途
+- 标点符号替换：将句号转换为逗号，保留文末句号
+
+输出路径选择:
+- 输出到TXT文件所在文件夹: 每个DOCX文件会直接保存在对应TXT文件所在的文件夹中
+- 输出到指定文件夹: 所有DOCX文件会直接保存在您指定的文件夹中
+
+匹配规则:
+- 完全匹配: TXT文件名（不含扩展名）与图片文件夹名完全相同
+- 前缀匹配: 图片文件夹名以前缀形式包含TXT文件名
+- 包含匹配: 图片文件夹名中包含TXT文件名
+
+转换规则:
+- 每个小标题的第一段后会插入一张图片
+- 先将Markdown格式转换为DOCX格式，再处理文字内容
+- 支持文字顺序调换、错别字处理和标点符号替换功能
+
+错别字处理说明:
+- 错误强度：控制替换比例，0.0表示不替换，1.0表示替换所有可能的字
+- 错别字库：可自定义JSON格式的错别字映射文件
+- 常见映射：的↔地↔得、在↔再、是↔事等
+"""
+    sg.popup_scrolled('使用帮助', help_text, size=(70, 25))
+
+
+# 结果窗口
+def show_results_window(results):
+    """显示批量处理结果窗口"""
+    if results['failed'] == 0:
+        message = f"全部成功！\n共处理 {results['total']} 个文件，全部转换成功。"
+        if results['main_output_folder']:
+            message += f"\n主要输出文件夹: {results['main_output_folder']}"
+        sg.popup('处理完成', message)
+    else:
+        failed_text = "\n".join([f"- {item['name']}: {item['error']}" for item in results['failed_items']])
+        message = (f"处理完成！\n共处理 {results['total']} 个文件，"
+                   f"{results['success']} 个成功，{results['failed']} 个失败。\n\n"
+                   f"失败项:\n{failed_text}")
+        if results['main_output_folder']:
+            message += f"\n主要输出文件夹: {results['main_output_folder']}"
+        sg.popup_scrolled('处理完成', message, size=(60, 20))
+
+    # 询问是否打开输出文件夹
+    if results['main_output_folder'] and os.path.exists(results['main_output_folder']):
+        if sg.popup_yes_no('是否打开主要输出文件夹?') == 'Yes':
+            if sys.platform.startswith('win'):
+                os.startfile(results['main_output_folder'])
+            elif sys.platform.startswith('darwin'):
+                os.system(f'open "{results["main_output_folder"]}"')
+            else:
+                os.system(f'xdg-open "{results["main_output_folder"]}"')
+
+
+# 主界面
+def main_window():
+    """主界面"""
+    sg.theme('BlueMono')
+    matched_pairs = []
+
+    layout = [
+        [sg.Text('批量Markdown TXT转DOCX工具', font=('bold', 16))],
+        [sg.Text('（按文件名匹配TXT文件和图片文件夹，支持完整Markdown格式）', text_color='gray')],
+        [sg.HSeparator()],
+        [sg.Text('TXT文件文件夹:', size=(15, 1)),
+         sg.InputText(key='txt_folder', enable_events=True, default_text=config.last_txt_folder),
+         sg.FolderBrowse('浏览')],
+        [sg.Text('图片根文件夹:', size=(15, 1)),
+         sg.InputText(key='images_root', enable_events=True, default_text=config.last_images_root),
+         sg.FolderBrowse('浏览')],
+        [sg.Text('输出根文件夹:', size=(15, 1)),
+         sg.InputText(key='output_root', enable_events=True, default_text=config.last_output_root),
+         sg.FolderBrowse('浏览'),
+         sg.Text('(当选择"输出到指定文件夹"时有效)', text_color='gray')],
+        [sg.Button('扫描文件', size=(12, 1)),
+         sg.Button('编辑匹配', size=(12, 1), disabled=True),
+         sg.Button('转换设置', size=(12, 1)),
+         sg.Button('帮助', size=(8, 1))],
+        [sg.HSeparator()],
+        [sg.Text('匹配结果预览:', font=('bold', 10))],
+        [sg.Table(
+            values=[],
+            headings=['TXT文件名', '相对路径', '匹配的图片文件夹'],
+            key='-PREVIEW_TABLE-',
+            auto_size_columns=False,
+            col_widths=[20, 30, 30],
+            justification='left',
+            size=(None, 10)
+        )],
+        [sg.ProgressBar(100, orientation='h', size=(80, 20), key='progress_bar', visible=False)],
+        [sg.Text('状态: 就绪', key='status_text', size=(80, 1))],
+        [sg.Button('开始批量转换', size=(15, 1), disabled=True), sg.Button('退出')]
+    ]
+
+    window = sg.Window('批量Markdown TXT转DOCX工具', layout, resizable=True)
+    progress_bar = window['progress_bar']
+    status_text = window['status_text']
+    preview_table = window['-PREVIEW_TABLE-']
+    output_root_input = window['output_root']
+
+    def update_output_root_state():
+        """根据配置更新输出根文件夹输入框的状态"""
+        if config.output_location == "custom":
+            output_root_input.update(disabled=False)
+            output_root_input.Widget.configure(foreground='black')
+        else:
+            output_root_input.update(disabled=True)
+            output_root_input.Widget.configure(foreground='gray')
+
+    window.read(timeout=1)
+    update_output_root_state()
+
+    while True:
+        event, values = window.read()
+
+        if event in (sg.WIN_CLOSED, '退出'):
+            if values is not None:
+                config.last_txt_folder = values.get('txt_folder', '')
+                config.last_images_root = values.get('images_root', '')
+                config.last_output_root = values.get('output_root', '')
+                config.save_to_file(CONFIG_FILE_PATH)
+            break
+
+        if event == '转换设置':
+            current_output_root = values['output_root']
+            show_config_window()
+            update_output_root_state()
+            window['output_root'].update(current_output_root)
+
+        if event == '帮助':
+            show_help_window()
+
+        if event == '扫描文件':
+            txt_folder = values['txt_folder']
+            images_root = values['images_root']
+
+            if not txt_folder:
+                sg.popup_error('请选择TXT文件所在的文件夹')
+                continue
+
+            if not images_root:
+                sg.popup_error('请选择图片根文件夹')
+                continue
+
+            config.last_txt_folder = txt_folder
+            config.last_images_root = images_root
+            if values['output_root']:
+                config.last_output_root = values['output_root']
+            config.save_to_file(CONFIG_FILE_PATH)
+
+            try:
+                status_text.update('正在扫描TXT文件...')
+                window.refresh()
+                txt_files = FileHandler.scan_txt_files(txt_folder)
+
+                status_text.update('正在匹配图片文件夹...')
+                window.refresh()
+                matched_pairs = FileHandler.find_matching_image_folders(txt_files, images_root)
+
+                table_data = []
+                for pair in matched_pairs:
+                    img_folder = pair['image_folder']['relative_path'] if pair['image_folder'] else "无匹配"
+                    table_data.append([
+                        pair['txt']['name'],
+                        pair['txt']['relative_path'],
+                        img_folder
+                    ])
+
+                preview_table.update(values=table_data)
+                status_text.update(f'扫描完成: 找到 {len(matched_pairs)} 个TXT文件')
+
+                window['编辑匹配'].update(disabled=False)
+                window['开始批量转换'].update(disabled=False)
+
+            except Exception as e:
+                sg.popup_error(f'扫描失败: {str(e)}')
+                status_text.update('状态: 扫描失败')
+
+        if event == '编辑匹配' and matched_pairs:
+            images_root = values['images_root']
+            if not images_root:
+                sg.popup_error('请选择图片根文件夹')
+                continue
+
+            matched_pairs = show_matching_editor(matched_pairs, images_root)
+
+            table_data = []
+            for pair in matched_pairs:
+                img_folder = pair['image_folder']['relative_path'] if pair['image_folder'] else "无匹配"
+                table_data.append([
+                    pair['txt']['name'],
+                    pair['txt']['relative_path'],
+                    img_folder
+                ])
+
+            preview_table.update(values=table_data)
+
+        if event == '开始批量转换' and matched_pairs:
+            if config.output_location == "custom" and not values['output_root']:
+                sg.popup_error('请选择输出根文件夹（在"转换设置"中选择了"输出到指定文件夹"）')
+                continue
+
+            try:
+                progress_bar.update(0, visible=True)
+                status_text.update('开始批量转换...')
+                window.refresh()
+
+                def update_batch_progress(progress, text):
+                    progress_bar.update(progress)
+                    status_text.update(f'状态: {text}')
+                    window.refresh()
+
+                results = BatchProcessor.process_batch(matched_pairs, values['output_root'], update_batch_progress)
+                show_results_window(results)
+                status_text.update('状态: 批量转换完成')
+
+            except Exception as e:
+                sg.popup_error(f'批量处理失败: {str(e)}')
+                status_text.update('状态: 批量转换失败')
+            finally:
+                progress_bar.update(0, visible=False)
+
+        if (event == 'txt_folder' or event == 'images_root') and values[event] and not values['output_root']:
+            default_output = values['txt_folder'] if values['txt_folder'] else values['images_root']
+            window['output_root'].update(default_output)
+
+    window.close()
+
+
+# 程序入口
+if __name__ == '__main__':
+    main_window()
\ No newline at end of file
diff --git a/data/error_chars.json b/data/error_chars.json
new file mode 100644
index 0000000..9df266a
--- /dev/null
+++ b/data/error_chars.json
@@ -0,0 +1,116 @@
+{
+  "日": "曰",
+  "木": "本",
+  "度": "渡",
+  "暴": "爆",
+  "籍": "藉",
+  "销": "消",
+  "璧": "壁",
+  "讴": "呕",
+  "勠": "戮",
+  "篡": "纂",
+  "需": "须",
+  "迄": "讫",
+  "磬": "罄",
+  "驰": "弛",
+  "拨": "拔",
+  "朴": "扑",
+  "沾": "粘",
+  "戊": "戌",
+  "崇": "祟",
+  "菅": "管",
+  "荼": "茶",
+  "灸": "炙",
+  "钓": "钧",
+  "丐": "丏",
+  "亨": "享",
+  "赢": "羸",
+  "肓": "盲",
+  "赝": "膺",
+  "掣": "擎",
+  "峰": "锋",
+  "读": "续",
+  "眯": "咪",
+  "胶": "狡",
+  "旯": "旮",
+  "奄": "掩",
+  "恃": "持",
+  "径": "胫",
+  "坝": "狈",
+  "幅": "副",
+  "颗": "棵",
+  "即": "既",
+  "俩": "两",
+  "辨": "辩",
+  "树立": "竖立",
+  "其他": "其它",
+  "截止": "截至",
+  "考查": "考察",
+  "治服": "制服",
+  "权利": "权力",
+  "申明": "声明",
+  "交代": "交待",
+  "含义": "涵义",
+  "安": "按",
+  "曝": "暴",
+  "博": "搏",
+  "灿": "粲",
+  "毫": "豪",
+  "检": "捡",
+  "骄": "娇",
+  "梁": "粱",
+  "蓬": "篷",
+  "辟": "僻",
+  "欺": "期",
+  "洽": "恰",
+
+  "皱": "邹",
+  "诸": "著",
+  "煮": "著",
+  "壮": "状",
+  "追": "摧",
+  "卓": "桌",
+  "咨": "资",
+  "滋": "磁",
+  "阻": "组",
+  "遵": "尊",
+  "的": "得",
+  "她": "他",
+  "到": "倒",
+  "倒": "到",
+  "要": "耍",
+  "说": "讲",
+  "讲": "说",
+  "做": "作",
+  "作": "做",
+  "已": "已",
+  "己": "已",
+  "以": "已",
+  "进": "近",
+  "近": "进",
+  "象": "像",
+  "像": "象",
+  "茶": "荼",
+  "孑": "孓",
+  "子": "孑",
+  "雎": "睢",
+  "汆": "氽",
+  "戍": "戌",
+  "妹": "妺",
+  "口": "囗",
+  "姬": "姫",
+  "祎": "袆",
+  "亳": "毫",
+  "汩": "汨",
+  "市": "巿",
+  "壸": "壶",
+  "祒": "袑",
+  "洗": "冼",
+  "夂": "夊",
+  "祖": "袓",
+  "芙": "褔",
+  "萬": "萭"
+}
+
+
+
diff --git a/replacestr.py b/replacestr.py
new file mode 100644
index 0000000..e35e174
--- /dev/null
+++ b/replacestr.py
@@ -0,0 +1,473 @@
+import re
+import random
+import argparse
+import sys
+import os
+from typing import List, Tuple, Optional, Dict, Any
+from pathlib import Path
+import logging
+
+
+class TextProcessor:
+    """文本处理器类，支持句子拆分和字符交换"""
+
+    def __init__(self, min_length: int = 30, custom_punctuation: Optional[str] = None):
+        """
+        初始化文本处理器
+
+        Args:
+            min_length: 句子长度阈值
+            custom_punctuation: 自定义标点符号，如果为None则使用默认标点
+        """
+        self.min_length = min_length
+        self.sentence_endings = custom_punctuation or r'[，。！？；?!;]'
+        self.statistics = {
+            'total_sentences': 0,
+            'processed_sentences': 0,
+            'total_chars': 0,
+            'swapped_chars': 0
+        }
+
+        # 设置日志
+        logging.basicConfig(level=logging.INFO,
+                            format='%(asctime)s - %(levelname)s - %(message)s')
+        self.logger = logging.getLogger(__name__)
+
+    def split_sentences(self, text: str) -> List[Tuple[str, str]]:
+        """
+        按标点符号拆分句子，保留标点符号
+
+        Args:
+            text: 输入文本
+
+        Returns:
+            List[Tuple[str, str]]: 每个元组包含 (句子内容, 标点符号)
+        """
+        if not text.strip():
+            return []
+
+        # 使用正则表达式拆分，保留分隔符
+        parts = re.split(f'({self.sentence_endings})', text)
+
+        sentences = []
+        i = 0
+        while i < len(parts):
+            content = parts[i].strip()
+            if content:  # 非空内容
+                # 检查下一个部分是否是标点符号
+                if i + 1 < len(parts) and re.match(self.sentence_endings, parts[i + 1]):
+                    punctuation = parts[i + 1]
+                    i += 2
+                else:
+                    punctuation = ''
+                    i += 1
+                sentences.append((content, punctuation))
+                self.statistics['total_sentences'] += 1
+            else:
+                i += 1
+
+        return sentences
+
+    def swap_random_chars(self, sentence: str) -> str:
+        """
+        对超长句子随机交换相邻两个字符的顺序
+
+        Args:
+            sentence: 输入句子
+
+        Returns:
+            str: 处理后的句子
+        """
+        # 边界情况处理
+        if not sentence or len(sentence) <= self.min_length or len(sentence) <= 3:
+            return sentence
+
+        # 转换为字符列表便于操作
+        chars = list(sentence)
+        original_length = len(chars)
+
+        # 确定可交换的范围（避开首尾字符，且需要成对相邻）
+        # 对于长度为n的句子，可交换的相邻对位置为：(1,2), (2,3), ..., (n-3,n-2)
+        start_idx = 1
+        end_idx = len(chars) - 3  # 最后一个可交换对的起始位置
+
+        if end_idx < start_idx:
+            return sentence
+
+        try:
+            # 随机选择一个相邻对的起始位置
+            swap_start = random.randint(start_idx, end_idx)
+            swap_end = swap_start + 1
+
+            # 交换相邻的两个字符
+            chars[swap_start], chars[swap_end] = chars[swap_end], chars[swap_start]
+
+            # 更新统计信息
+            self.statistics['processed_sentences'] += 1
+            self.statistics['swapped_chars'] += 2
+
+            self.logger.debug(f"交换相邻位置 {swap_start} 和 {swap_end}，句子长度：{original_length}")
+
+        except (ValueError, IndexError) as e:
+            self.logger.warning(f"字符交换失败：{e}")
+            return sentence
+
+        return ''.join(chars)
+
+    def process_text(self, text: str) -> str:
+        """
+        处理文本：拆分句子并对超长句子进行字符交换
+
+        Args:
+            text: 输入文本
+
+        Returns:
+            str: 处理后的文本
+        """
+        if not text:
+            return text
+
+        # 重置统计信息
+        self.statistics = {
+            'total_sentences': 0,
+            'processed_sentences': 0,
+            'total_chars': len(text),
+            'swapped_chars': 0
+        }
+
+        # 按段落分割
+        paragraphs = text.split('\n')
+        processed_paragraphs = []
+
+        for paragraph in paragraphs:
+            if not paragraph.strip():
+                processed_paragraphs.append(paragraph)
+                continue
+
+            # 拆分句子
+            sentences = self.split_sentences(paragraph)
+
+            # 处理每个句子
+            processed_sentences = []
+            for sentence_content, punctuation in sentences:
+                # 对句子内容进行字符交换
+                processed_content = self.swap_random_chars(sentence_content)
+                processed_sentences.append(processed_content + punctuation)
+
+            # 重新组合句子
+            processed_paragraph = ''.join(processed_sentences)
+            processed_paragraphs.append(processed_paragraph)
+
+        return '\n'.join(processed_paragraphs)
+
+    def get_statistics(self) -> Dict[str, Any]:
+        """获取处理统计信息"""
+        return self.statistics.copy()
+
+    def print_statistics(self):
+        """打印处理统计信息"""
+        stats = self.get_statistics()
+        print("\n" + "=" * 50)
+        print("处理统计信息：")
+        print(f"总字符数：{stats['total_chars']}")
+        print(f"总句子数：{stats['total_sentences']}")
+        print(f"处理句子数：{stats['processed_sentences']}")
+        print(f"交换字符数：{stats['swapped_chars']}")
+        if stats['total_sentences'] > 0:
+            print(f"处理率：{stats['processed_sentences'] / stats['total_sentences'] * 100:.1f}%")
+        print("=" * 50)
+
+
+class FileHandler:
+    """文件处理器，负责文件的读写操作"""
+
+    @staticmethod
+    def read_file(filename: str) -> str:
+        """
+        读取文件内容，支持多种编码
+
+        Args:
+            filename: 文件路径
+
+        Returns:
+            str: 文件内容
+
+        Raises:
+            FileNotFoundError: 文件不存在
+            PermissionError: 权限不足
+            UnicodeDecodeError: 编码错误
+        """
+        if not os.path.exists(filename):
+            raise FileNotFoundError(f"文件 '{filename}' 不存在")
+
+        if not os.access(filename, os.R_OK):
+            raise PermissionError(f"没有读取文件 '{filename}' 的权限")
+
+        # 尝试多种编码格式
+        encodings = ['utf-8', 'gbk', 'gb2312', 'latin-1']
+
+        for encoding in encodings:
+            try:
+                with open(filename, 'r', encoding=encoding) as f:
+                    content = f.read()
+                    logging.info(f"使用 {encoding} 编码成功读取文件：{filename}")
+                    return content
+            except UnicodeDecodeError:
+                continue
+
+        raise UnicodeDecodeError(f"无法解码文件 '{filename}'，尝试的编码格式：{encodings}")
+
+    @staticmethod
+    def write_file(filename: str, content: str, encoding: str = 'utf-8') -> None:
+        """
+        写入文件内容
+
+        Args:
+            filename: 输出文件路径
+            content: 要写入的内容
+            encoding: 编码格式
+
+        Raises:
+            PermissionError: 权限不足
+            OSError: 磁盘空间不足等系统错误
+        """
+        # 确保目录存在
+        output_dir = os.path.dirname(filename)
+        if output_dir and not os.path.exists(output_dir):
+            os.makedirs(output_dir, exist_ok=True)
+
+        try:
+            with open(filename, 'w', encoding=encoding) as f:
+                f.write(content)
+            logging.info(f"成功写入文件：{filename}")
+        except PermissionError:
+            raise PermissionError(f"没有写入文件 '{filename}' 的权限")
+        except OSError as e:
+            raise OSError(f"写入文件 '{filename}' 时发生错误：{e}")
+
+
+def setup_argument_parser() -> argparse.ArgumentParser:
+    """设置命令行参数解析器"""
+    parser = argparse.ArgumentParser(
+        description='文本句子字符交换处理器',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+使用示例：
+  %(prog)s -f input.txt                    # 处理文件
+  %(prog)s -t "你的文本内容"               # 直接处理文本
+  %(prog)s -f input.txt -l 20              # 设置长度阈值为20
+  %(prog)s -f input.txt -o output.txt      # 输出到文件
+  %(prog)s -f input.txt -p "。！？" -s     # 自定义标点符号并显示统计
+        """
+    )
+
+    # 输入选项
+    input_group = parser.add_mutually_exclusive_group(required=True)
+    input_group.add_argument('-f', '--file', help='输入文件路径')
+    input_group.add_argument('-t', '--text', help='直接输入文本')
+    input_group.add_argument('--stdin', action='store_true',
+                             help='从标准输入读取文本')
+
+    # 处理选项
+    parser.add_argument('-l', '--length', type=int, default=30,
+                        help='句子长度阈值（默认30）')
+    parser.add_argument('-p', '--punctuation',
+                        help='自定义标点符号（默认：。！？；?!;）')
+    parser.add_argument('-o', '--output', help='输出文件路径')
+    parser.add_argument('-e', '--encoding', default='utf-8',
+                        help='输出文件编码（默认utf-8）')
+
+    # 其他选项
+    parser.add_argument('-s', '--statistics', action='store_true',
+                        help='显示处理统计信息')
+    parser.add_argument('-v', '--verbose', action='store_true',
+                        help='显示详细日志')
+    parser.add_argument('--seed', type=int, help='随机数种子（用于测试）')
+
+    return parser
+
+
+def main():
+    """主函数：处理命令行参数和文本处理"""
+    parser = setup_argument_parser()
+    args = parser.parse_args()
+
+    # 设置日志级别
+    if args.verbose:
+        logging.getLogger().setLevel(logging.DEBUG)
+
+    # 设置随机数种子（用于测试）
+    if args.seed:
+        random.seed(args.seed)
+
+    # 获取输入文本
+    try:
+        if args.file:
+            text = FileHandler.read_file(args.file)
+        elif args.text:
+            text = args.text
+        elif args.stdin:
+            text = sys.stdin.read()
+        else:
+            print("错误：请指定输入源")
+            sys.exit(1)
+
+        if not text.strip():
+            print("警告：输入文本为空")
+            sys.exit(0)
+
+    except (FileNotFoundError, PermissionError, UnicodeDecodeError) as e:
+        print(f"错误：{e}")
+        sys.exit(1)
+
+    # 创建处理器并处理文本
+    try:
+        processor = TextProcessor(
+            min_length=args.length,
+            custom_punctuation=args.punctuation
+        )
+
+        processed_text = processor.process_text(text)
+
+        # 输出结果
+        if args.output:
+            FileHandler.write_file(args.output, processed_text, args.encoding)
+            print(f"处理完成，结果已保存到 '{args.output}'")
+        else:
+            print("处理结果：")
+            print("-" * 50)
+            print(processed_text)
+
+        # 显示统计信息
+        if args.statistics:
+            processor.print_statistics()
+
+    except Exception as e:
+        print(f"处理过程中发生错误：{e}")
+        if args.verbose:
+            import traceback
+            traceback.print_exc()
+        sys.exit(1)
+
+
+# 单元测试
+def run_tests():
+    """运行基本的单元测试"""
+    print("运行单元测试...")
+
+    # 测试句子拆分
+    processor = TextProcessor(min_length=6)
+
+    # 测试1：普通句子拆分
+    test_text = "这是第一句。这是第二句！第三句？"
+    sentences = processor.split_sentences(test_text)
+    assert len(sentences) == 3, f"期望3个句子，实际{len(sentences)}个"
+    assert sentences[0] == ("这是第一句", "。"), f"第一句解析错误：{sentences[0]}"
+
+    # 测试2：相邻字符交换
+    long_sentence = "这是一个很长的句子用来测试字符交换功能"
+    random.seed(42)  # 固定种子以便测试
+    result = processor.swap_random_chars(long_sentence)
+    assert result != long_sentence, "长句子应该被修改"
+    assert len(result) == len(long_sentence), "交换后长度应该不变"
+
+    # 验证只交换了相邻的两个字符
+    diff_count = sum(1 for i, (a, b) in enumerate(zip(long_sentence, result)) if a != b)
+    assert diff_count == 2, f"应该只有2个字符位置发生变化，实际{diff_count}个"
+
+    # 测试3：短句子不变
+    short_sentence = "短句"
+    result = processor.swap_random_chars(short_sentence)
+    assert result == short_sentence, "短句子不应该被修改"
+
+    # 测试4：边界情况
+    empty_result = processor.swap_random_chars("")
+    assert empty_result == "", "空字符串应该保持不变"
+
+    print("✓ 所有测试通过！")
+
+
+# 示例使用
+def replace_text(text):
+    # 检查是否运行测试
+    if len(sys.argv) > 1 and sys.argv[1] == 'test':
+        run_tests()
+        sys.exit(0)
+
+    # 命令行模式
+    if len(sys.argv) > 1:
+        main()
+    else:
+        # 示例演示
+        sample_text = text
+
+        print("示例演示：")
+        print("原文：")
+        print(sample_text)
+        print("\n" + "=" * 50 + "\n")
+        min_length = 12
+        processor = TextProcessor(min_length)
+        processed = processor.process_text(sample_text)
+        print("处理后：")
+        print(processed)
+
+        processor.print_statistics()
+
+
+        print("\n使用说明：")
+        print("命令行用法：")
+        print("  python script.py -f input.txt              # 处理文件")
+        print("  python script.py -t '你的文本内容'          # 直接处理文本")
+        print("  python script.py -f input.txt -l 20        # 设置长度阈值为20")
+        print("  python script.py -f input.txt -o output.txt # 输出到文件")
+        print("  python script.py -f input.txt -p '。！？' -s # 自定义标点符号并显示统计")
+        print("  python script.py test                       # 运行单元测试")
+
+        return processed
+
+
+
+text = """阅读此文之前，麻烦您点击一下“关注”，既方便您进行讨论和分享，又能给您带来不一样的参与感，创作不易，感谢您的支持。
+
+曾经“半路出家”，如今黯然无声，他的故事值得一品
+说起央视的主持人，大家第一反应肯定是一个个字正腔圆、形象出彩的脸孔。可是在这其中，有一位却用浓厚的潮汕口音，还有点“油滑”的幽默，自成一派。他就是阿丘。
+
+这个名字，可能在现在已经鲜有人提起，但在过去，他可是实打实的“名嘴”。不过，咱来说点耐人寻味的，他是怎么走到央视巅峰，又怎么“高台跳水”的？这故事，够扎心，更够意味深长。
+
+看似格格不入，却杀出重围
+熟悉阿丘的人，一听他那口音，就知道这是“岭南口音模块”的标配。他是个土生土长的广东人，也因为家里是军人家庭，小时候经常搬家，成了个活脱脱的语言天才，学会了好几个地方的方言。什么潮汕话、粤语、客家话，信手拈来。不得不说，小时候到处跑打下的基础，倒给他多了一点和别人不一样的“人味儿”。
+
+他大学学的专业，可和主持半毛钱关系没有，是经济学。毕业后，他分配到了南宁的棉纺印染厂，待遇不错，是个政工干部。这时候的阿丘，怎么看都是个稳稳的职场小白，可谁能想到，后来的他能走上舞台呢？
+
+90年代，相声、小品各类幽默比赛风靡全国。阿丘平时最爱的就是琢磨这些妙语段子，一心觉得自己是个“未被发现的宝藏男孩”。机会来了，1992年，他参加广西举办的笑星大赛，居然拿了个一等奖。这下可出名了，厂里人都认识他，他本人也成了“地方笑星”。
+
+再后来，他调到了广西电视台，开始主持节目。头几年波澜不惊，直到他参加《南北笑星火辣辣》，凭借风趣和机灵吸引了更多目光。2003年，这个来自地方台的主持人，直接杀进了央视主持圈。靠什么？靠他的个性和风格。
+
+从风光无限到画风突变
+阿丘进入央视后，主持了好几档节目。他的幽默和接地气，与当时一板一眼的正规主持人大不相同。因此，他迅速被贴上“个性主持”的标签。尤其是在《社会记录》里，那带点潮汕腔调的问句，竟成了一种标志。
+
+可惜，说话爽快的他，也因为“不当言论”栽了跟头。事情发生在2020年，正是全国上下齐心合力抗击疫情的时候。阿丘不知道怎么回事，在自己的博客里发了一些让人难以接受的言论。里头什么“东亚病夫”“道歉”显得格外刺眼。
+
+不得不说，这一锅凉水泼得够彻底。网友立刻开始深挖，一挖还真揭出不少黑历史。有人爆料，他婚内包养女大学生，还试图给实习机会。虽然阿丘本人否认得七七八八，但这些传闻和再度破裂的婚姻，难免让人联想。
+
+面对铺天盖地的指责，阿丘的态度是硬得离谱，一句道歉都没有。“嘴皮子”在这时候完全失灵了。要说在镜头前笑侃万事的大叔，这一次是真没能站住脚。
+
+离开央视后的低调生活
+最后，阿丘与央视长达12年的缘分彻底告一段落。此后的日子，他也算是从公众视线中消失了。最让人记得的，是他两年后现身老搭档张泉灵的节目，只是，这一次，他的亮相显得缥缈又散淡。
+
+如今阿丘的身份，更多转向了自媒体。开了个叫“阿丘观山”的账号，做起旅游文化博主。视频里，他介绍名山大川，什么五台山、武当山，天天讲人生感悟。这画风，和过去主持访谈节目的他，可真是差太远了。
+
+不少老观众打开他的账号，可能都得感叹一声“物是人非”。更有网友直言，他的语气里听到了些许“悔意”，又觉得是假装云淡风轻，实际还是难以摆脱舆论的阴影。
+
+留下的启示和争议
+阿丘的故事，是难得一见的。从地方电视台到央视舞台，他用12年时间登上顶峰，却因为12个字毁了前程。这起伏，真像一出大戏。
+
+咱们反思一下，也许有些人，天赋、机遇都抓得很精准，但言行失当，永远是会砸场子的导火索。阿丘的人生轨迹，正说明了这一点。
+
+现在问题来了，大家怎么看阿丘这个人？你是觉得他个性可惜，还是自毁前程？
+
+欢迎留言讨论，你们的每一次互动，都是创作的动力。"""
+
+
+result = replace_text(text)
+print(result)
\ No newline at end of file