修改逻辑，优化代码

2025-10-07 15:46:36 +08:00 · 2025-10-07 15:46:36 +08:00 · 7ebf20ed6c
commit 7ebf20ed6c
parent 86417f4e69
23 changed files with 784 additions and 309 deletions
--- a/batch_processor.py
+++ b/batch_processor.py
@ -129,11 +129,18 @@ class BatchProcessor:
            # 生成DOCX
            def update_file_progress(progress: int, text: str):
                if progress_callback:
+                    # 优化进度计算，避免浮点数精度问题
+                    # 确保当前文件进度在0-100范围内
+                    file_progress = max(0, min(100, progress))
+                    
                    # 计算整体进度：当前文件的进度在总进度中的占比
-                    file_weight = 1.0 / total_count
-                    current_file_progress = current_index + (progress / 100.0)
-                    overall_progress = int((current_file_progress / total_count) * 100)
-                    progress_callback(overall_progress, f"{pair['txt']['name']}: {text}")
+                    if total_count > 0:
+                        # 使用整数运算避免浮点数精度问题
+                        overall_progress = (current_index * 100 + file_progress) // total_count
+                        overall_progress = max(0, min(100, overall_progress))
+                        progress_callback(int(overall_progress), f"{pair['txt']['name']}: {text}")
+                    else:
+                        progress_callback(file_progress, f"{pair['txt']['name']}: {text}")

            success = self.docx_generator.generate(sections, image_files, output_path, update_file_progress)
            
@ -148,7 +155,7 @@ class BatchProcessor:
        return result

    def validate_batch_input(self, txt_folder: str, images_root: str, 
-                           output_root: str = None) -> Dict[str, Any]:
+                           output_root: Optional[str] = None) -> Dict[str, Any]:
        """
        验证批量处理的输入参数
        
--- a/config.json
+++ b/config.json
@ -1,15 +0,0 @@
-{
-  "text_order_conversion": false,
-  "typo_handling": false,
-  "punctuation_replacement": false,
-  "paragraph_formatting": false,
-  "paragraph_min_length": 100,
-  "paragraph_max_length": 300,
-  "typo_intensity": 0.5,
-  "custom_punctuation": "，。！？；?!;",
-  "output_path": "",
-  "use_same_folder": true,
-  "last_txt_folder": "",
-  "last_images_root": "",
-  "last_output_root": ""
-}
--- a/docx_generator.py
+++ b/docx_generator.py
@ -226,49 +226,19 @@ class DocxGenerator:
            para = doc.add_paragraph(style='List Bullet')
            self._apply_inline_formatting(para, content)
            # 应用列表样式
-            if hasattr(self, 'current_document_style') and self.current_document_style and self.current_document_style.unordered_list:
-                list_style = self.current_document_style.unordered_list
-                if list_style.paragraph:
-                    if list_style.paragraph.space_before > 0:
-                        para.paragraph_format.space_before = Pt(list_style.paragraph.space_before)
-                    if list_style.paragraph.space_after > 0:
-                        para.paragraph_format.space_after = Pt(list_style.paragraph.space_after)
+            self._apply_list_style(para, 'unordered')

        elif element_type == 'ordered_list':
            para = doc.add_paragraph(style='List Number')
            self._apply_inline_formatting(para, content)
            # 应用列表样式
-            if hasattr(self, 'current_document_style') and self.current_document_style and self.current_document_style.ordered_list:
-                list_style = self.current_document_style.ordered_list
-                if list_style.paragraph:
-                    if list_style.paragraph.space_before > 0:
-                        para.paragraph_format.space_before = Pt(list_style.paragraph.space_before)
-                    if list_style.paragraph.space_after > 0:
-                        para.paragraph_format.space_after = Pt(list_style.paragraph.space_after)
+            self._apply_list_style(para, 'ordered')

        elif element_type == 'blockquote':
            para = doc.add_paragraph(style='Quote')
            self._apply_inline_formatting(para, content)
            # 应用引用样式
-            if hasattr(self, 'current_document_style') and self.current_document_style and self.current_document_style.quote_block:
-                quote_style = self.current_document_style.quote_block
-                if quote_style.paragraph:
-                    if quote_style.paragraph.line_spacing > 0:
-                        para.paragraph_format.line_spacing = quote_style.paragraph.line_spacing
-                    if quote_style.paragraph.space_before > 0:
-                        para.paragraph_format.space_before = Pt(quote_style.paragraph.space_before)
-                    if quote_style.paragraph.space_after > 0:
-                        para.paragraph_format.space_after = Pt(quote_style.paragraph.space_after)
-                    if quote_style.paragraph.first_line_indent > 0:
-                        para.paragraph_format.first_line_indent = Pt(quote_style.paragraph.first_line_indent * 12)
-                    
-                    # 设置对齐方式
-                    if quote_style.paragraph.alignment == "center":
-                        para.alignment = WD_ALIGN_PARAGRAPH.CENTER
-                    elif quote_style.paragraph.alignment == "right":
-                        para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
-                    elif quote_style.paragraph.alignment == "justify":
-                        para.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
+            self._apply_quote_style(para)

        elif element_type == 'code_block':
            self._add_code_block(doc, element.get('content', ''), element.get('language', ''))
@ -282,6 +252,58 @@ class DocxGenerator:
        elif element_type == 'empty':
            doc.add_paragraph()

+    def _apply_list_style(self, paragraph, list_type: str) -> None:
+        """
+        应用列表样式到段落
+        
+        Args:
+            paragraph: DOCX段落对象
+            list_type: 列表类型 ('unordered' 或 'ordered')
+        """
+        if not (hasattr(self, 'current_document_style') and self.current_document_style):
+            return
+            
+        list_style = None
+        if list_type == 'unordered' and self.current_document_style.unordered_list:
+            list_style = self.current_document_style.unordered_list
+        elif list_type == 'ordered' and self.current_document_style.ordered_list:
+            list_style = self.current_document_style.ordered_list
+            
+        if list_style and list_style.paragraph:
+            if list_style.paragraph.space_before > 0:
+                paragraph.paragraph_format.space_before = Pt(list_style.paragraph.space_before)
+            if list_style.paragraph.space_after > 0:
+                paragraph.paragraph_format.space_after = Pt(list_style.paragraph.space_after)
+
+    def _apply_quote_style(self, paragraph) -> None:
+        """
+        应用引用块样式到段落
+        
+        Args:
+            paragraph: DOCX段落对象
+        """
+        if not (hasattr(self, 'current_document_style') and self.current_document_style and self.current_document_style.quote_block):
+            return
+            
+        quote_style = self.current_document_style.quote_block
+        if quote_style.paragraph:
+            if quote_style.paragraph.line_spacing > 0:
+                paragraph.paragraph_format.line_spacing = quote_style.paragraph.line_spacing
+            if quote_style.paragraph.space_before > 0:
+                paragraph.paragraph_format.space_before = Pt(quote_style.paragraph.space_before)
+            if quote_style.paragraph.space_after > 0:
+                paragraph.paragraph_format.space_after = Pt(quote_style.paragraph.space_after)
+            if quote_style.paragraph.first_line_indent > 0:
+                paragraph.paragraph_format.first_line_indent = Pt(quote_style.paragraph.first_line_indent * 12)
+            
+            # 设置对齐方式
+            if quote_style.paragraph.alignment == "center":
+                paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
+            elif quote_style.paragraph.alignment == "right":
+                paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
+            elif quote_style.paragraph.alignment == "justify":
+                paragraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
+
    def _add_formatted_paragraph(self, doc: DocxDocument, content: str) -> None:
        """
        添加带格式的段落
@ -551,20 +573,23 @@ class DocxGenerator:
            output_path: 输出文件路径（用于临时文件）
        """
        try:
-            # 处理图片
-            img, width = ImageProcessor.process_image(image_path)
+            # 使用优化方法处理图片
+            temp_dir = os.path.dirname(output_path)
+            optimized_image_path = ImageProcessor.optimize_image_for_docx(image_path, temp_dir)
+            
+            # 处理图片（方向修正和尺寸调整）
+            img, width = ImageProcessor.process_image(optimized_image_path)

            temp_img_path = None
            if config.image_resize == "width":
                # 需要保存临时图片
-                temp_dir = os.path.dirname(output_path)
                os.makedirs(temp_dir, exist_ok=True)
                temp_img_path = os.path.join(temp_dir, f"temp_img_{hash(image_path)}.png")
                img.save(temp_img_path)
                self.temp_files.append(temp_img_path)
                img_path = temp_img_path
            else:
-                img_path = image_path
+                img_path = optimized_image_path if optimized_image_path != image_path else image_path

            # 创建段落并插入图片
            para = doc.add_paragraph()
--- a/file_handler.py
+++ b/file_handler.py
@ -6,7 +6,7 @@

 import os
 import glob
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Optional
 from config import config


@ -67,21 +67,37 @@ class FileHandler:
        if not os.path.isdir(images_root):
            raise Exception(f"图片根文件夹不存在: {images_root}")

-        # 获取所有图片文件夹
-        all_image_folders = []
+        matched_pairs = []
+        
+        # 优化：直接在遍历过程中进行匹配，避免先获取所有文件夹
+        for txt in txt_files:
+            matches = []
+            txt_name = txt["name"].lower()
+            
+            # 遍历图片根目录下的所有子目录进行匹配
            for root, dirs, _ in os.walk(images_root):
                for dir_name in dirs:
                    folder_path = os.path.join(root, dir_name)
-                all_image_folders.append({
+                    folder_name = dir_name.lower()
+                    
+                    if config.match_pattern == "exact" and txt_name == folder_name:
+                        matches.append({
+                            "path": folder_path,
+                            "name": dir_name,
+                            "relative_path": os.path.relpath(folder_path, images_root)
+                        })
+                    elif config.match_pattern == "prefix" and folder_name.startswith(txt_name):
+                        matches.append({
+                            "path": folder_path,
+                            "name": dir_name,
+                            "relative_path": os.path.relpath(folder_path, images_root)
+                        })
+                    elif config.match_pattern == "contains" and txt_name in folder_name:
+                        matches.append({
                            "path": folder_path,
                            "name": dir_name,
                            "relative_path": os.path.relpath(folder_path, images_root)
                        })
-
-        matched_pairs = []
-        
-        for txt in txt_files:
-            matches = FileHandler._find_matches_for_txt(txt, all_image_folders)

            if matches:
                # 选择最短路径的匹配项
@ -100,33 +116,6 @@ class FileHandler:

        return matched_pairs

-    @staticmethod
-    def _find_matches_for_txt(txt_info: Dict[str, str], image_folders: List[Dict[str, str]]) -> List[Dict[str, str]]:
-        """
-        为单个TXT文件查找匹配的图片文件夹
-        
-        Args:
-            txt_info: TXT文件信息
-            image_folders: 所有图片文件夹信息列表
-            
-        Returns:
-            List[Dict[str, str]]: 匹配的图片文件夹列表
-        """
-        matches = []
-        txt_name = txt_info["name"].lower()
-
-        for img_folder in image_folders:
-            folder_name = img_folder["name"].lower()
-
-            if config.match_pattern == "exact" and txt_name == folder_name:
-                matches.append(img_folder)
-            elif config.match_pattern == "prefix" and folder_name.startswith(txt_name):
-                matches.append(img_folder)
-            elif config.match_pattern == "contains" and txt_name in folder_name:
-                matches.append(img_folder)
-
-        return matches
-
    @staticmethod
    def get_image_files(folder_path: str) -> List[str]:
        """
@ -142,17 +131,16 @@ class FileHandler:
            return []

        image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.gif', '*.webp', '*.tiff']
-        image_files = []
+        image_files = set()  # 使用集合去重

        for ext in image_extensions:
+            # 优化：只进行一次glob搜索，同时匹配大小写
            pattern = os.path.join(folder_path, ext)
-            image_files.extend(glob.glob(pattern))
-            # 也检查大写扩展名
+            image_files.update(glob.glob(pattern))
            pattern_upper = os.path.join(folder_path, ext.upper())
-            image_files.extend(glob.glob(pattern_upper))
+            image_files.update(glob.glob(pattern_upper))

-        # 去重（防止大小写扩展名重复）
-        image_files = list(set(image_files))
+        image_files = list(image_files)  # 转换回列表

        # 根据配置排序
        if config.image_sort_by == "name":
@ -234,7 +222,7 @@ class FileHandler:
        return output_path

    @staticmethod
-    def validate_paths(txt_folder: str, images_root: str, output_root: str = None) -> Dict[str, bool]:
+    def validate_paths(txt_folder: str, images_root: str, output_root: Optional[str] = None) -> Dict[str, bool]:
        """
        验证路径的有效性
        
--- a/gui_style_manager.py
+++ b/gui_style_manager.py
@ -9,12 +9,9 @@ from tkinter import ttk, filedialog, messagebox, simpledialog
 from typing import Optional

 from style_manager import style_manager, DocumentStyle
-from config import Config
+from config import config as config_manager
 from advanced_style_editor import open_advanced_editor

-# 创建配置管理器实例
-config_manager = Config()
-

 def create_style_tab(parent):
    """创建样式管理选项卡"""
@ -41,7 +38,8 @@ def create_style_tab(parent):
    
    def on_style_change(*args):
        config_manager.current_style = style_var.get()
-        config_manager.save_to_file('config.json')
+        from config import CONFIG_FILE_PATH
+        config_manager.save_to_file(CONFIG_FILE_PATH)
        _update_style_info()
    
    style_var.trace('w', on_style_change)
--- a/replacestr.py
+++ b/replacestr.py
@ -394,6 +394,10 @@ def replace_text(text):
        run_tests()
        sys.exit(0)
    
+    # 实际的文本处理逻辑
+    processor = TextProcessor(min_length=30)
+    return processor.process_text(text)
+
 if __name__ == "__main__":
    # 命令行模式
    if len(sys.argv) > 1:
--- a/style_manager.py
+++ b/style_manager.py
@ -174,6 +174,33 @@ class StyleManager:
        styles = {}
        
        # 1. 爆款文章风格 - 参考知乎、头条等平台
+        styles["爆款文章风格"] = self._create_viral_style()
+        
+        # 2. 微信公众号风格 - 专业的新媒体排版
+        styles["微信公众号风格"] = self._create_wechat_style()
+        
+        # 3. 知乎高赞回答风格 - 逻辑清晰，层次分明
+        styles["知乎高赞回答风格"] = self._create_zhihu_style()
+        
+        # 4. 小红书笔记风格 - 清新文艺，少女心
+        styles["小红书笔记风格"] = self._create_xiaohongshu_style()
+        
+        # 5. 今日头条新闻风格 - 信息量大，节奏紧凑
+        styles["今日头条新闻风格"] = self._create_toutiao_style()
+        
+        # 6. B站UP主视频脚本风格 - 轻松活泼，年轻化
+        styles["B站UP主视频脚本风格"] = self._create_bilibili_style()
+        
+        # 7. 企业微信群通知风格 - 正式严肃
+        styles["企业微信群通知风格"] = self._create_enterprise_style()
+        
+        # 8. 情感鸡汤文风格 - 温暖治愈
+        styles["情感鸡汤文风格"] = self._create_emotional_style()
+        
+        return styles
+
+    def _create_viral_style(self) -> DocumentStyle:
+        """创建爆款文章风格"""
        viral_style = DocumentStyle(
            name="爆款文章风格",
            description="高阅读量爆款文章风格，层次分明，吸引眼球",
@ -223,9 +250,10 @@ class StyleManager:
            background_color="#F8F9FA"
        )
        
-        styles["爆款文章风格"] = viral_style
+        return viral_style

-        # 2. 微信公众号风格 - 专业的新媒体排版
+    def _create_wechat_style(self) -> DocumentStyle:
+        """创建微信公众号风格"""
        wechat_style = DocumentStyle(
            name="微信公众号风格",
            description="专业的微信公众号排版，阅读体验佳",
@ -267,9 +295,10 @@ class StyleManager:
            border=True
        )
        
-        styles["微信公众号风格"] = wechat_style
+        return wechat_style

-        # 3. 知乎高赞回答风格 - 逻辑清晰，层次分明
+    def _create_zhihu_style(self) -> DocumentStyle:
+        """创建知乎高赞回答风格"""
        zhihu_style = DocumentStyle(
            name="知乎高赞回答风格",
            description="逻辑清晰，层次分明，专业权威",
@ -300,9 +329,10 @@ class StyleManager:
                line_spacing=1.3, space_before=10, space_after=8
            )
        
-        styles["知乎高赞回答风格"] = zhihu_style
+        return zhihu_style

-        # 4. 小红书笔记风格 - 清新文艺，少女心
+    def _create_xiaohongshu_style(self) -> DocumentStyle:
+        """创建小红书笔记风格"""
        xiaohongshu_style = DocumentStyle(
            name="小红书笔记风格",
            description="清新文艺，适合生活方式类内容",
@ -332,9 +362,10 @@ class StyleManager:
                line_spacing=1.3, space_before=8, space_after=6
            )
        
-        styles["小红书笔记风格"] = xiaohongshu_style
+        return xiaohongshu_style

-        # 5. 今日头条新闻风格 - 信息量大，节奏紧凑
+    def _create_toutiao_style(self) -> DocumentStyle:
+        """创建今日头条新闻风格"""
        toutiao_style = DocumentStyle(
            name="今日头条新闻风格",
            description="信息密度高，节奏紧凑，突出重点",
@ -364,9 +395,10 @@ class StyleManager:
                line_spacing=1.3, space_before=8, space_after=6
            )
        
-        styles["今日头条新闻风格"] = toutiao_style
+        return toutiao_style

-        # 6. B站UP主视频脚本风格 - 轻松活泼，年轻化
+    def _create_bilibili_style(self) -> DocumentStyle:
+        """创建B站UP主视频脚本风格"""
        bilibili_style = DocumentStyle(
            name="B站UP主视频脚本风格",
            description="轻松活泼，适合年轻受众，有趣有料",
@ -396,9 +428,10 @@ class StyleManager:
                line_spacing=1.3, space_before=8, space_after=6
            )
        
-        styles["B站UP主视频脚本风格"] = bilibili_style
+        return bilibili_style

-        # 7. 企业微信群通知风格 - 正式严肃
+    def _create_enterprise_style(self) -> DocumentStyle:
+        """创建企业微信群通知风格"""
        enterprise_style = DocumentStyle(
            name="企业微信群通知风格",
            description="正式严肃，信息传达清晰，商务风格",
@ -423,9 +456,10 @@ class StyleManager:
                line_spacing=1.3, space_before=12, space_after=8
            )
        
-        styles["企业微信群通知风格"] = enterprise_style
+        return enterprise_style

-        # 8. 情感鸡汤文风格 - 温暖治愈
+    def _create_emotional_style(self) -> DocumentStyle:
+        """创建情感鸡汤文风格"""
        emotional_style = DocumentStyle(
            name="情感鸡汤文风格",
            description="温暖治愈，情感丰富，適合心灵鸡汤类内容",
@ -460,9 +494,7 @@ class StyleManager:
            background_color="#FFF3E0"
        )
        
-        styles["情感鸡汤文风格"] = emotional_style
-        
-        return styles
+        return emotional_style
    
    def _load_custom_styles(self) -> None:
        """加载自定义样式"""
--- a/test.py
+++ b/test.py
@ -0,0 +1,438 @@
+import re
+import random
+from typing import List, Dict, Tuple, Optional
+import jieba
+import jieba.posseg as pseg
+
+
+class EnhancedArticleRewriter:
+    """
+    增强版文章智能改写工具
+    实现更自然的句子变化，符合人工书写逻辑
+    """
+
+    def __init__(self):
+        # 标点符号定义
+        self.sentence_endings = ['。', '！', '？', '…']
+        self.pause_marks = ['，', '；', '：', '、']
+
+        # 句子长度分布（模拟人工书写习惯）
+        self.sentence_length_distribution = {
+            'short': (5, 15),  # 短句
+            'medium': (16, 30),  # 中句
+            'long': (31, 50),  # 长句
+            'extra_long': (51, 80)  # 超长句
+        }
+
+        # 连接词库（更丰富的连接词）
+        self.connectors = {
+            'sequence': ['随后', '接着', '然后', '紧接着', '继而', '进而'],
+            'addition': ['并且', '同时', '此外', '另外', '再者', '况且', '而且'],
+            'contrast': ['但是', '然而', '不过', '可是', '却', '反而', '相反'],
+            'cause': ['因为', '由于', '因此', '所以', '故而', '从而'],
+            'condition': ['如果', '假如', '倘若', '若是', '要是'],
+            'concession': ['虽然', '尽管', '即使', '纵然', '固然'],
+            'summary': ['总之', '综上', '总的来说', '概括地说', '简言之'],
+            'example': ['比如', '例如', '譬如', '好比', '正如'],
+            'emphasis': ['特别是', '尤其是', '更重要的是', '值得注意的是'],
+            'explanation': ['也就是说', '换句话说', '具体来说', '准确地说']
+        }
+
+        # 句式模板
+        self.sentence_patterns = {
+            'statement': ['{}'],  # 陈述句
+            'emphasis_front': ['值得注意的是，{}', '需要强调的是，{}', '重要的是，{}'],
+            'emphasis_back': ['{}，这一点尤为重要', '{}，这是关键所在'],
+            'question_rhetorical': ['难道不是{}吗？', '{}，不是吗？'],
+            'parallel': ['不仅{}，而且{}', '既{}，又{}', '一方面{}，另一方面{}'],
+            'progressive': ['先是{}，然后{}', '从{}到{}', '由{}发展到{}']
+        }
+
+        # 同义词/近义词替换库
+        self.synonyms = {
+            '发展': ['演进', '进步', '演变', '发展', '进化', '提升', '推进'],
+            '改变': ['变化', '转变', '改变', '变革', '转换', '调整', '革新'],
+            '重要': ['关键', '重要', '核心', '主要', '根本', '要紧', '关键性'],
+            '影响': ['作用', '影响', '效应', '冲击', '波及', '涉及'],
+            '提高': ['提升', '增强', '改善', '优化', '加强', '增进'],
+            '显示': ['表明', '显示', '说明', '揭示', '体现', '反映', '展现'],
+            '通过': ['利用', '运用', '借助', '凭借', '依靠', '经由'],
+            '实现': ['达成', '实现', '完成', '达到', '做到', '落实'],
+            '问题': ['难题', '问题', '挑战', '困难', '障碍', '瓶颈'],
+            '方法': ['方式', '手段', '途径', '办法', '策略', '措施'],
+            '需要': ['需要', '要求', '必须', '应该', '亟需', '急需'],
+            '能够': ['能够', '可以', '能', '可', '得以', '足以'],
+            '非常': ['十分', '相当', '特别', '格外', '极其', '异常', '颇为'],
+            '很多': ['许多', '大量', '众多', '诸多', '不少', '大批'],
+            '所有': ['全部', '一切', '所有', '整个', '全体', '各个'],
+            '已经': ['已', '已经', '业已', '早已', '都已'],
+            '正在': ['正', '正在', '在', '正处于', '目前正'],
+            '越来越': ['日益', '愈发', '愈加', '更加', '日渐', '渐渐'],
+            '不断': ['持续', '不断', '连续', '陆续', '继续', '频繁'],
+            '各种': ['各类', '各种', '多种', '种种', '诸般', '多样'],
+        }
+
+    def _get_random_sentence_length_type(self) -> str:
+        """根据正态分布随机选择句子长度类型"""
+        # 模拟人工书写的句长分布：中句最多，短句和长句次之，超长句最少
+        weights = {'short': 25, 'medium': 40, 'long': 25, 'extra_long': 10}
+        types = list(weights.keys())
+        probs = [weights[t] / 100 for t in types]
+        return random.choices(types, weights=probs)[0]
+
+    def _smart_split_merge_sentences(self, sentences: List[str]) -> List[str]:
+        """智能拆分和合并句子，创造自然的长短句节奏"""
+        if not sentences:
+            return sentences
+
+        result = []
+        i = 0
+
+        while i < len(sentences):
+            # 获取目标句长类型
+            target_type = self._get_random_sentence_length_type()
+            min_len, max_len = self.sentence_length_distribution[target_type]
+
+            current_sentence = sentences[i].strip()
+            current_len = len(current_sentence)
+
+            # 如果当前句子太长，尝试拆分
+            if current_len > max_len:
+                split_sentences = self._split_sentence_naturally(current_sentence, max_len)
+                result.extend(split_sentences)
+
+            # 如果当前句子太短，尝试与下一句合并
+            elif current_len < min_len and i + 1 < len(sentences):
+                # 30%概率合并短句
+                if random.random() < 0.3:
+                    merged = self._merge_sentences(current_sentence, sentences[i + 1])
+                    result.append(merged)
+                    i += 1  # 跳过下一句
+                else:
+                    result.append(current_sentence)
+
+            # 长度合适，直接添加
+            else:
+                result.append(current_sentence)
+
+            i += 1
+
+        return result
+
+    def _split_sentence_naturally(self, sentence: str, max_length: int) -> List[str]:
+        """自然地拆分长句"""
+        if len(sentence) <= max_length:
+            return [sentence]
+
+        # 保存句尾标点
+        ending = ''
+        for mark in self.sentence_endings:
+            if sentence.endswith(mark):
+                ending = mark
+                sentence = sentence[:-len(mark)]
+                break
+
+        # 优先在逗号处拆分
+        parts = []
+        if '，' in sentence:
+            segments = sentence.split('，')
+            current = ""
+
+            for i, segment in enumerate(segments):
+                if not current:
+                    current = segment
+                elif len(current + '，' + segment) <= max_length:
+                    current += '，' + segment
+                else:
+                    # 添加句号使其成为完整句子
+                    if random.random() < 0.7:  # 70%概率添加句号
+                        parts.append(current + '。')
+                    else:  # 30%概率使用其他句尾
+                        parts.append(current + random.choice(['。', '！', '']))
+                    current = segment
+
+            # 处理最后一部分
+            if current:
+                parts.append(current + ending)
+        else:
+            # 如果没有逗号，尝试在其他标点处拆分
+            parts = [sentence + ending]
+
+        return parts if parts else [sentence + ending]
+
+    def _merge_sentences(self, sent1: str, sent2: str) -> str:
+        """智能合并两个句子"""
+        # 移除第一个句子的句尾标点
+        for mark in self.sentence_endings:
+            if sent1.endswith(mark):
+                sent1 = sent1[:-len(mark)]
+                break
+
+        # 选择连接方式
+        merge_type = random.choice(['comma', 'connector', 'semicolon'])
+
+        if merge_type == 'comma':
+            return sent1 + '，' + sent2
+        elif merge_type == 'connector':
+            # 随机选择连接词类型
+            conn_type = random.choice(list(self.connectors.keys()))
+            connector = random.choice(self.connectors[conn_type])
+            return sent1 + '，' + connector + sent2
+        else:  # semicolon
+            return sent1 + '；' + sent2
+
+    def _replace_synonyms(self, text: str, intensity: float) -> str:
+        """同义词替换"""
+        words = list(jieba.cut(text))
+        result = []
+
+        for word in words:
+            if word in self.synonyms and random.random() < intensity:
+                # 选择一个同义词（避免选到原词）
+                alternatives = [w for w in self.synonyms[word] if w != word]
+                if alternatives:
+                    result.append(random.choice(alternatives))
+                else:
+                    result.append(word)
+            else:
+                result.append(word)
+
+        return ''.join(result)
+
+    def _adjust_sentence_structure(self, sentence: str, intensity: float) -> str:
+        """调整句子结构，使其更自然"""
+        if random.random() > intensity:
+            return sentence
+
+        # 保存句尾标点
+        ending = ''
+        for mark in self.sentence_endings:
+            if sentence.endswith(mark):
+                ending = mark
+                sentence = sentence[:-len(mark)]
+                break
+
+        # 随机选择调整方式
+        adjust_type = random.choice(['reorder', 'add_emphasis', 'change_pattern'])
+
+        if adjust_type == 'reorder' and '，' in sentence:
+            # 重新排列子句
+            parts = sentence.split('，')
+            if len(parts) >= 2:
+                # 智能重排：不是完全随机，而是有逻辑的调整
+                if len(parts) == 2:
+                    # 两个子句直接交换
+                    sentence = parts[1] + '，' + parts[0]
+                else:
+                    # 多个子句，将中间的提前或延后
+                    mid_idx = len(parts) // 2
+                    if random.random() < 0.5:
+                        # 中间提前
+                        parts = [parts[mid_idx]] + parts[:mid_idx] + parts[mid_idx + 1:]
+                    else:
+                        # 中间延后
+                        parts = parts[:mid_idx] + parts[mid_idx + 1:] + [parts[mid_idx]]
+                    sentence = '，'.join(parts)
+
+        elif adjust_type == 'add_emphasis':
+            # 添加强调
+            if random.random() < 0.3:
+                pattern = random.choice(self.sentence_patterns['emphasis_front'])
+                sentence = pattern.format(sentence)
+            elif random.random() < 0.3:
+                pattern = random.choice(self.sentence_patterns['emphasis_back'])
+                sentence = pattern.format(sentence)
+
+        elif adjust_type == 'change_pattern':
+            # 改变句式
+            if '，' in sentence and random.random() < 0.4:
+                parts = sentence.split('，', 1)
+                if len(parts) == 2:
+                    # 使用并列或递进句式
+                    if random.random() < 0.5:
+                        sentence = f"不仅{parts[0]}，而且{parts[1]}"
+                    else:
+                        sentence = f"{parts[0]}，进而{parts[1]}"
+
+        return sentence + ending
+
+    def _add_natural_variations(self, sentence: str, intensity: float) -> str:
+        """添加自然的语言变化"""
+        if random.random() > intensity:
+            return sentence
+
+        variations = []
+
+        # 20%概率添加过渡词
+        if random.random() < 0.2:
+            transition = random.choice(['其实', '事实上', '实际上', '确实', '显然'])
+            variations.append(f"{transition}，{sentence}")
+
+        # 15%概率添加程度副词
+        if random.random() < 0.15:
+            adverb = random.choice(['更', '更加', '尤其', '特别', '格外'])
+            # 简单地在"是"、"有"、"能"等词前添加副词
+            for verb in ['是', '有', '能', '会', '要']:
+                if verb in sentence:
+                    sentence = sentence.replace(verb, f"{adverb}{verb}", 1)
+                    break
+
+        return variations[0] if variations else sentence
+
+    def rewrite(self, text: str, config: Dict = None) -> str:
+        """
+        主函数：改写文章
+
+        参数:
+        text: 输入文章
+        config: 配置字典
+            - intensity: 修改强度 0.0-1.0
+            - preserve_meaning: 是否保持原意
+            - natural_flow: 是否保持自然流畅
+            - vary_sentence_length: 是否变化句长
+        """
+        if config is None:
+            config = {}
+
+        # 设置默认值
+        config.setdefault('intensity', 0.6)
+        config.setdefault('preserve_meaning', True)
+        config.setdefault('natural_flow', True)
+        config.setdefault('vary_sentence_length', True)
+
+        intensity = config['intensity']
+
+        # 分段处理
+        paragraphs = [p.strip() for p in text.split('\n') if p.strip()]
+        result_paragraphs = []
+
+        for para in paragraphs:
+            # 分句
+            sentences = self._split_sentences(para)
+
+            # 1. 首先进行句子长短调整
+            if config['vary_sentence_length']:
+                sentences = self._smart_split_merge_sentences(sentences)
+
+            # 2. 处理每个句子
+            processed_sentences = []
+            for i, sent in enumerate(sentences):
+                # 同义词替换
+                sent = self._replace_synonyms(sent, intensity * 0.5)
+
+                # 句子结构调整
+                sent = self._adjust_sentence_structure(sent, intensity * 0.7)
+
+                # 添加自然变化
+                sent = self._add_natural_variations(sent, intensity * 0.3)
+
+                processed_sentences.append(sent)
+
+            # 3. 段落重组（偶尔调整句子顺序）
+            if len(processed_sentences) > 3 and random.random() < intensity * 0.2:
+                # 20%概率微调句子顺序（只交换相邻句子）
+                idx = random.randint(0, len(processed_sentences) - 2)
+                processed_sentences[idx], processed_sentences[idx + 1] = \
+                    processed_sentences[idx + 1], processed_sentences[idx]
+
+            result_paragraphs.append(''.join(processed_sentences))
+
+        return '\n\n'.join(result_paragraphs)
+
+    def _split_sentences(self, text: str) -> List[str]:
+        """改进的句子分割"""
+        # 处理多种句尾标点
+        pattern = '([。！？…]+)'
+        parts = re.split(pattern, text)
+
+        sentences = []
+        for i in range(0, len(parts) - 1, 2):
+            if parts[i].strip():
+                sentences.append(parts[i] + parts[i + 1])
+
+        # 处理最后一个部分
+        if len(parts) % 2 == 1 and parts[-1].strip():
+            sentences.append(parts[-1] + '。')  # 添加默认句号
+
+        return sentences
+
+
+def demo():
+    """使用示例"""
+    sample_text = """
+最近，晓蕾又上热搜了！
+
+咋回事呢？原来，她和老公刘剑一起开了直播带货的副业。但特意声明：她早就离开了上海电视台的编制，也不拿电视台的工资。换句话说，现在卖东西，完全是私营业态。
+
+这事儿一下子引爆了大家的八卦魂。毕竟，明星主持扎堆直播间，也不算新鲜事。但还是挺多人纳闷：这些当年的 "话筒头牌"，是不是集体选择摆烂了？
+
+其实，晓蕾和刘剑干脆落落大方，在直播间直接回应了这点。俩人意思很明确：“我们不是来拉低职业口碑的”。而且还耐心解释了自己转行的理由。
+曾经的大佬，变成了烟火气
+
+说到晓蕾，不了解点她背景都不好意思讨论人家。当年上视新闻部的 "当家花旦"，光学历和气质，足够秒杀隔壁主持圈的八条街。而刘剑，早年可是 "台柱子"，播音腔精致到令人耳膜怀孕。照理来说，这样一对，在编制铁饭碗里躺平一辈子没毛病。
+
+可人家偏不。
+
+晓蕾说过这样一句话：“其实，我就是个普通人。” 真的那么普通吗？她不这么说，没人敢忘了她的标杆履历啊！她离开台里后，居然一头扎进了童语言教育这个赛道，一干就是十年，让机构做到了业内小圈子的爆款水准。
+
+而这次直播，打的商品也不混乱，主打性价比和实用属性，晓蕾每件商品还得亲测过。如果你觉得她自吹自擂，建议去看看她直播间的粉丝评论。大家的意思是：晓蕾推品 = ·9放心买。
+刘剑这枚 “前一哥”，更狠！
+
+说晓蕾牛，别忘了，刘剑十年前也上演了一场 “豪赌”。那个年代，辞去电视台稳定工作，和 “打水漂” 差不多。
+
+可是刘剑敢把梭全下，为啥？因为他看中了播音考生和辅导课程的市场，那时还没有多少人扎堆干这块，他觉得这是个机会。
+
+果然，就这么辞了职，工作的腰板从跟组织吃工资，摇身变成了名副其实的事业单位 —— 自己家老板。虽然后来也是磕磕绊绊，但终究从试验田里掘出了一片肥沃地。
+主持人的 “下海”，是换方向走
+
+有人觉得，曾经的新闻人、主持人 “跑去带货”，肯定是混不下去了。你要放在十年前，这种联想不稀奇，可现在不一样了。大环境变了，传统媒体是真的在互联网时代被打败得找不到调。
+
+原来电视频道的观众，现在早转移到手机端，看知乎、刷短视频，甚至晚上蹲个带货直播会。你说新闻节目的高冷主播，现在换脸做带货主持，是不是 “落魄”？未必。
+
+晓蕾夫妻这一波，实际上是转型很成功的范例。不管带啥网红货，他们俩把品质第一的逻辑摆明白了。这样的主播，不止卖产品，更卖信誉，靠着时间积攒了观众的信任。
+直播间哪门子 LOW？明明是主战场
+
+网友说得有趣：“谁嫌直播带货 LOW，谁就输定了。” 道理没跑儿，移动互联网成了咱生活重心，生意也跟着迁移。这是明显趋势，看不懂的还真不想赚钱了。
+
+而且，做直播一点不轻松。站几个小时口播、随时照顾弹幕情绪，这比坐着念提词器辛苦多了。像晓蕾和刘剑这样的 “摸鱼资历”，能转过身来赚饭钱，这不是 “混”，是 “拼” 啊。
+
+别说传统意义的职业崇拜消失殆尽，你觉得稳如狗的岗位，说散架就散。老一辈金饭碗情结，对于下一代新创别说香，而是种被淘汰跑赢速度内心创新积极点。
+
+我不是电视台员工了，早就离职 10 年了。
+"""
+
+    rewriter = EnhancedArticleRewriter()
+
+    print("=" * 60)
+    print("原文:")
+    print("=" * 60)
+    print(sample_text)
+
+    # 测试不同强度的改写
+    for intensity in [0.3, 0.6, 0.9]:
+        print(f"\n{'=' * 60}")
+        print(f"改写强度: {intensity}")
+        print("=" * 60)
+
+        config = {
+            'intensity': intensity,
+            'preserve_meaning': True,
+            'natural_flow': True,
+            'vary_sentence_length': True
+        }
+
+        result = rewriter.rewrite(sample_text, config)
+        print(result)
+
+        # 统计句子长度分布
+        sentences = re.split('[。！？…]+', result)
+        lengths = [len(s) for s in sentences if s.strip()]
+        if lengths:
+            print(f"\n句子长度分布: 最短={min(lengths)}, 最长={max(lengths)}, 平均={sum(lengths) / len(lengths):.1f}")
+            print(f"句子数量: {len(lengths)}")
+
+
+if __name__ == '__main__':
+    # 注意：需要安装jieba库
+    # pip install jieba
+    demo()
--- a/test_1.py
+++ b/test_1.py
@ -0,0 +1,174 @@
+import re
+import jieba
+import random
+from typing import List
+
+
+class HeavyHumanizer:
+    """重度人类化改写器 - 保持逻辑，可读性高，适合绕过 AI 检测"""
+
+    def __init__(self):
+        jieba.initialize()
+        # 人类化开头/转折/插话
+        self.openings = ['说到', '提到', '关于', '其实', '要说', '你知道吗', '顺便说']
+        self.transitions = ['但是', '不过', '然而', '话说回来', '可惜的是', '偏偏']
+        self.fillers = ['其实', '当然', '显然', '我觉得', '说起来', '顺便说']
+        # 主观表达
+        self.subjective = ['我认为', '我觉得', '在我看来', '就我所知', '据我了解']
+        # 口语词汇
+        self.colloquial_particles = ['呢', '吧', '啊', '哦', '嘛', '哈', '呀']
+        # 高风险词替换
+        self.high_risk = {
+            '重要': ['关键', '核心', '主要'],
+            '显著': ['明显', '突出', '很大'],
+            '提升': ['提高', '增强', '改善'],
+            '确保': ['保证', '做到', '维护'],
+            '实施': ['执行', '开展', '推行']
+        }
+        # 分句标点
+        self.sentence_endings = {'。', '！', '？', '.', '!', '?', '…', '；', ';'}
+
+    def split_sentences(self, text: str) -> List[str]:
+        """按照句子结束标点分割文本"""
+        sentences = []
+        current = ''
+        for c in text:
+            current += c
+            if c in self.sentence_endings:
+                sentences.append(current.strip())
+                current = ''
+        if current.strip():
+            sentences.append(current.strip())
+        return sentences
+
+    def replace_high_risk_words(self, sentence: str) -> str:
+        """替换高风险 AI 词汇"""
+        for k, v_list in self.high_risk.items():
+            if k in sentence and random.random() < 0.8:
+                sentence = sentence.replace(k, random.choice(v_list))
+        return sentence
+
+    def add_subjective_expressions(self, sentence: str) -> str:
+        """随机添加主观表达或口语词"""
+        if random.random() < 0.3:
+            expr = random.choice(self.subjective)
+            sentence = expr + '，' + sentence
+        if random.random() < 0.2:
+            particle = random.choice(self.colloquial_particles)
+            if sentence.endswith('。'):
+                sentence = sentence[:-1] + particle + '。'
+        if random.random() < 0.15:
+            filler = random.choice(self.fillers)
+            sentence = filler + '，' + sentence
+        return sentence
+
+    def vary_sentence_length(self, sentences: List[str]) -> List[str]:
+        """打乱句子顺序或拆分长句，增加突发性"""
+        varied = []
+        i = 0
+        while i < len(sentences):
+            s = sentences[i]
+            # 适度拆分长句
+            if len(s) > 50 and random.random() < 0.5:
+                mid = len(s) // 2
+                # 找最近的逗号
+                comma_pos = max(s.rfind('，', 0, mid), s.rfind(',', 0, mid))
+                if comma_pos > 5:
+                    first = s[:comma_pos + 1].strip()
+                    second = s[comma_pos + 1:].strip()
+                    varied.extend([first, second])
+                    i += 1
+                    continue
+            varied.append(s)
+            i += 1
+        # 打乱顺序但保持逻辑块
+        if random.random() < 0.3:
+            random.shuffle(varied)
+        return varied
+
+    def create_paragraphs(self, sentences: List[str]) -> List[str]:
+        """根据句长和随机概率生成段落"""
+        paragraphs = []
+        current = []
+        current_len = 0
+        for s in sentences:
+            current.append(s)
+            current_len += len(s)
+            if current_len > 80 and (random.random() < 0.4 or current_len > 150):
+                paragraphs.append(''.join(current))
+                current = []
+                current_len = 0
+        if current:
+            paragraphs.append(''.join(current))
+        return paragraphs
+
+    def humanize_text(self, text: str) -> str:
+        """核心人类化改写函数"""
+        # 清理文本
+        text = re.sub(r'\s+', '', text)
+        sentences = self.split_sentences(text)
+
+        # 高风险词替换
+        sentences = [self.replace_high_risk_words(s) for s in sentences]
+
+        # 添加主观表达、口语词、插话
+        sentences = [self.add_subjective_expressions(s) for s in sentences]
+
+        # 增加长短句变化和突发性
+        sentences = self.vary_sentence_length(sentences)
+
+        # 创建段落
+        paragraphs = self.create_paragraphs(sentences)
+
+        # 格式化段落
+        formatted = '\n\n'.join(['　　' + p for p in paragraphs])
+        return formatted
+
+
+# ================== 使用示例 ==================
+if __name__ == "__main__":
+    text = """
+　　最近，晓蕾又上热搜了！
+
+　　换句话说，现在卖东西，完全是私营业态。咋回事呢？但特意声明：她早就离开了上海电视台的编制，也不拿电视台的工资欸。原来，她和老公刘剑一起开了直播带货的副业。
+
+　　这事儿一下子引爆了大家的八卦魂。毕竟，明星主持扎堆直播间，也不算新鲜事你说呢。
+
+　　但还是挺多人纳闷：这些当年的 "话筒头牌"，是不是集体选择摆烂了？
+
+　　其实，晓蕾和刘剑干…俩人意思很明确：“我们不是来拉低职业口碑的”。而且还耐心解释了自己转行的理由，曾经的大佬，变成了烟火气。
+
+　　说到晓蕾，不了解点她背景都不好意思讨论人家。当年上视新闻部的 "当家花旦"，光学历和气质，足够秒杀隔壁主持圈的八条街。而刘剑，似乎早年可是 "台柱子"，播音腔精致到令人耳膜怀孕嗯。照理来说，这样一对，在编制铁饭碗里躺平一辈子没毛病。
+
+　　可人家偏不。
+
+　　晓蕾说过这样一句话：“其实，我就是个普通人。” 真的那么普通吗？她不这么说，没人敢忘了她的标杆履历啊！她离开台里后，居然一头扎进了童语言教育这个赛道，一干就是十年，让机构做到了业内小圈子的爆款水准。
+
+　　而这次直播，打的商品也不混乱，主打性价比和实用属性，晓蕾每件商品还得亲测过。如果你觉得她自吹自擂，建议去看看她直播间的粉丝评论。大家地意思是：晓蕾推品 = 放心买。
+
+　　刘剑这枚 “前一哥”，更狠！
+
+　　说晓蕾牛，看起来别忘了，刘剑十年前也上演了一场 “豪赌”。那个年代，辞去电视台稳定工作，和 “打水漂” 差不多。
+
+　　可是刘剑敢把梭全下，为啥？因为他看中了播音考生和辅导课程的市场，那时还没有多少人扎堆干这块，他觉得这是个机会。
+
+　　果然，就这么辞了职，工作的腰板从跟组织吃工资，摇身变成了名副其实的事业单位 —— 自己家老板。虽然后来也是磕磕绊绊，但终究从试验田里掘出了一片肥沃地。主持人的 “下海”，是换方向走。
+
+　　有人觉得，曾经的新闻人、主持人 “跑去带货”，肯定是混不下去了。你要放在十年前，这种联想不稀奇，可现在不一样了。大环境变了，看起来传统媒体是真的在互联网时代被打败得找不到调。
+
+　　原来电视频道的观众，现在早转移到手机端，看知乎、刷短视频，甚至晚上蹲个带货直播会。就像我说的，我认为，你说新闻节目的高冷主播，现在换脸做带货主持，是不是 “落魄”？未必。
+
+　　其实，晓蕾夫妻这一波，实际上是转型很成功的范例。不管带啥网红货，可能他们俩把品质第一的逻辑摆明白了啊。这样的主播，不止卖产品，更卖信誉，靠着时间积攒了观众的信任嗯。也许，直播间哪门子 LOW？明明是主战场。
+
+　　网友说得有趣：“谁嫌直播带货 LOW，谁就输定了。” 道理没跑儿，似乎移动互联网成了咱生活重心，生意也跟着迁移啊。
+
+　　这是明显趋势，看不懂的还真不想赚钱了。
+
+　　而且，似乎做直播一点不轻松。站几个小时口播、随时照顾弹幕情绪，这比坐着念提词器辛苦多了。其实，像晓蕾和刘剑这样的 “摸鱼资历”，能转过身来赚饭钱，这不是 “混”，是 “拼” 啊。
+
+　　别说传统意义的职业崇拜消失殆尽，你觉得稳如狗的岗位，说散架就散你说呢。老一辈金饭碗情结，对于下一代新创别说香，而是种被淘汰跑赢速度内心创新积极点。
+
+　　我不是电视台员工了，早就离职 10 年了。    """
+    humanizer = HeavyHumanizer()
+    result = humanizer.humanize_text(text)
+    print(result)
--- a/test_chinese_splitting.py
+++ b/test_chinese_splitting.py
--- a/test_image_splitting.py
+++ b/test_image_splitting.py
--- a/test_image_text_splitting.py
+++ b/test_image_text_splitting.py
--- a/test_image_text_splitting_debug.py
+++ b/test_image_text_splitting_debug.py
--- a/test_image_with_text_splitting.py
+++ b/test_image_with_text_splitting.py
--- a/test_output/test_image_text_splitting.docx
+++ b/test_output/test_image_text_splitting.docx
--- a/test_output/test_image_text_splitting_debug.docx
+++ b/test_output/test_image_text_splitting_debug.docx
--- a/test_output/test_image_with_text_splitting.docx
+++ b/test_output/test_image_with_text_splitting.docx
--- a/test_segment.txt
+++ b/test_segment.txt
@ -1 +0,0 @@
-这是一个测试文本。它包含多个句子。每个句子都很短。但是我们需要测试分段排版功能。当文本长度超过最小段落长度时。应该被分割成多个段落。这样可以提高文档的可读性。让内容更加清晰易懂。
--- a/test_segment_function.py
+++ b/test_segment_function.py
@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-"""测试分段排版功能"""
-
-import sys
-import os
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-
-from text_splitter import TextSplitter
-
-# 测试文本
-test_text = """这是一个测试文本。它包含多个句子。每个句子都很短。但是我们需要测试分段排版功能。
-当文本长度超过最小段落长度时。应该被分割成多个段落。这样可以提高文档的可读性。
-让内容更加清晰易懂。"""
-
-def test_text_splitting():
-    print("=== 测试分段排版功能 ===")
-    print(f"原始文本长度: {len(test_text)} 字符")
-    print(f"原始文本: {test_text}")
-    print()
-    
-    # 创建分段器
-    splitter = TextSplitter(min_length=50, max_length=200)
-    
-    # 分段处理
-    paragraphs = splitter.split_text(test_text)
-    
-    print(f"分段结果 ({len(paragraphs)} 个段落):")
-    for i, paragraph in enumerate(paragraphs, 1):
-        print(f"段落 {i} ({len(paragraph)} 字符): {paragraph}")
-    
-    print()
-    print("=== 测试完成 ===")
-
-if __name__ == "__main__":
-    test_text_splitting()
--- a/test_split_behavior.py
+++ b/test_split_behavior.py
--- a/test_text_processor_splitting.py
+++ b/test_text_processor_splitting.py
--- a/test_text_splitter.py
+++ b/test_text_splitter.py
--- a/text_splitter.py
+++ b/text_splitter.py
@ -1,140 +0,0 @@
-import re
-
-class TextSplitter:
-    def __init__(self, min_length=100, max_length=300):
-        """
-        初始化文本分段器
-        :param min_length: 目标段落最小长度
-        :param max_length: 目标段落最大长度
-        """
-        self.min_length = min_length
-        self.max_length = max_length
-        # 匹配标点符号的正则表达式，作为分段点（中文和英文标点）
-        # 这些标点符号通常表示一个完整句子的结束
-        self.sentence_ending_punct = re.compile(r'([。？！.!?])')
-    
-    def split_text(self, text):
-        """
-        将文本分割成符合长度要求的段落，仅使用标点符号分割
-        :param text: 待分割的原始文本
-        :return: 分割后的段落列表
-        """
-        if not text:
-            return []
-        
-        # 自动判断原始文本长度
-        original_length = len(text)
-        print(f"原始文本长度: {original_length} 字符")
-        
-        # 如果原始文本长度小于最小长度，直接返回
-        if original_length <= self.min_length:
-            return [text.strip()]
-        
-        # 将文本分割成完整句子（保留标点符号）
-        parts = self.sentence_ending_punct.split(text)
-        sentences = []
-        
-        # 重组句子，确保标点符号与前面的文本在一起
-        for i in range(0, len(parts)-1, 2):
-            sentence = (parts[i] + parts[i+1]).strip()
-            if sentence:  # 跳过空句子
-                sentences.append(sentence)
-        
-        # 如果没有找到任何标点符号，将整个文本作为一个段落
-        if not sentences:
-            return [text.strip()]
-        
-        # 合并句子形成段落，确保在长度范围内
-        paragraphs = []
-        current_paragraph = ""
-        
-        for sentence in sentences:
-            # 尝试添加当前句子
-            temp = current_paragraph + (" " if current_paragraph else "") + sentence
-            
-            # 检查添加后是否超出最大长度
-            if len(temp) > self.max_length:
-                # 如果当前段落不为空，先保存当前段落
-                if current_paragraph:
-                    paragraphs.append(current_paragraph)
-                    current_paragraph = sentence
-                else:
-                    # 如果单个句子就超过最大长度，也必须接受（避免分割句子）
-                    paragraphs.append(sentence)
-                    current_paragraph = ""
-            else:
-                current_paragraph = temp
-        
-        # 添加最后一个段落
-        if current_paragraph:
-            paragraphs.append(current_paragraph)
-        
-        # 检查是否有段落短于最小长度，如果有则与下一段合并
-        i = 0
-        while i < len(paragraphs) - 1:
-            if len(paragraphs[i]) < self.min_length:
-                # 合并当前段落和下一段落
-                paragraphs[i] = paragraphs[i] + " " + paragraphs[i+1]
-                del paragraphs[i+1]
-            else:
-                i += 1
-        
-        print(f"分割后段落数量: {len(paragraphs)}")
-        return paragraphs
-
-# 使用示例
-if __name__ == "__main__":
-    # 示例文本
-    sample_text = """
-最近，晓蕾又上热搜了！
-
-咋回事呢？原来，她和老公刘剑一起开了直播带货的副业。但特意声明：她早就离开了上海电视台的编制，也不拿电视台的工资。换句话说，现在卖东西，完全是私营业态。
-
-这事儿一下子引爆了大家的八卦魂。毕竟，明星主持扎堆直播间，也不算新鲜事。但还是挺多人纳闷：这些当年的 "话筒头牌"，是不是集体选择摆烂了？
-
-其实，晓蕾和刘剑干脆落落大方，在直播间直接回应了这点。俩人意思很明确：“我们不是来拉低职业口碑的”。而且还耐心解释了自己转行的理由。
-曾经的大佬，变成了烟火气
-
-说到晓蕾，不了解点她背景都不好意思讨论人家。当年上视新闻部的 "当家花旦"，光学历和气质，足够秒杀隔壁主持圈的八条街。而刘剑，早年可是 "台柱子"，播音腔精致到令人耳膜怀孕。照理来说，这样一对，在编制铁饭碗里躺平一辈子没毛病。
-
-可人家偏不。
-
-晓蕾说过这样一句话：“其实，我就是个普通人。” 真的那么普通吗？她不这么说，没人敢忘了她的标杆履历啊！她离开台里后，居然一头扎进了童语言教育这个赛道，一干就是十年，让机构做到了业内小圈子的爆款水准。
-
-而这次直播，打的商品也不混乱，主打性价比和实用属性，晓蕾每件商品还得亲测过。如果你觉得她自吹自擂，建议去看看她直播间的粉丝评论。大家的意思是：晓蕾推品 = 放心买。
-刘剑这枚 “前一哥”，更狠！
-
-说晓蕾牛，别忘了，刘剑十年前也上演了一场 “豪赌”。那个年代，辞去电视台稳定工作，和 “打水漂” 差不多。
-
-可是刘剑敢把梭全下，为啥？因为他看中了播音考生和辅导课程的市场，那时还没有多少人扎堆干这块，他觉得这是个机会。
-
-果然，就这么辞了职，工作的腰板从跟组织吃工资，摇身变成了名副其实的事业单位 —— 自己家老板。虽然后来也是磕磕绊绊，但终究从试验田里掘出了一片肥沃地。
-主持人的 “下海”，是换方向走
-
-有人觉得，曾经的新闻人、主持人 “跑去带货”，肯定是混不下去了。你要放在十年前，这种联想不稀奇，可现在不一样了。大环境变了，传统媒体是真的在互联网时代被打败得找不到调。
-
-原来电视频道的观众，现在早转移到手机端，看知乎、刷短视频，甚至晚上蹲个带货直播会。你说新闻节目的高冷主播，现在换脸做带货主持，是不是 “落魄”？未必。
-
-晓蕾夫妻这一波，实际上是转型很成功的范例。不管带啥网红货，他们俩把品质第一的逻辑摆明白了。这样的主播，不止卖产品，更卖信誉，靠着时间积攒了观众的信任。
-直播间哪门子 LOW？明明是主战场
-
-网友说得有趣：“谁嫌直播带货 LOW，谁就输定了。” 道理没跑儿，移动互联网成了咱生活重心，生意也跟着迁移。这是明显趋势，看不懂的还真不想赚钱了。
-
-而且，做直播一点不轻松。站几个小时口播、随时照顾弹幕情绪，这比坐着念提词器辛苦多了。像晓蕾和刘剑这样的 “摸鱼资历”，能转过身来赚饭钱，这不是 “混”，是 “拼” 啊。
-
-别说传统意义的职业崇拜消失殆尽，你觉得稳如狗的岗位，说散架就散。老一辈金饭碗情结，对于下一代新创别说香，而是种被淘汰跑赢速度内心创新积极点。
-
-我不是电视台员工了，早就离职 10 年了。"""
-    
-    # 创建分段器实例，设置目标段落长度范围
-    splitter = TextSplitter(min_length=10, max_length=20)
-    
-    # 分割文本
-    paragraphs = splitter.split_text(sample_text)
-    
-    # 打印结果
-    print("\n分割结果:")
-    for i, para in enumerate(paragraphs, 1):
-      
-        print(para)
-
				`@ -1 +0,0 @@`
				`这是一个测试文本。它包含多个句子。每个句子都很短。但是我们需要测试分段排版功能。当文本长度超过最小段落长度时。应该被分割成多个段落。这样可以提高文档的可读性。让内容更加清晰易懂。`