From d3ac3238ed2374a99dd554a932f64b5559714dcb Mon Sep 17 00:00:00 2001
From: wsb1224 <shuobo1224@qq.com>
Date: Wed, 15 Oct 2025 17:54:51 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E5=8A=9F=E8=83=BD=EF=BC=9A?=
 =?UTF-8?q?=20=E6=AE=B5=E8=90=BD=E6=8E=A7=E5=88=B6=E5=8A=9F=E8=83=BD?=
 =?UTF-8?q?=EF=BC=8C=E5=8F=AF=E8=87=AA=E5=AE=9A=E4=B9=89=E6=8E=A7=E5=88=B6?=
 =?UTF-8?q?=E6=AF=8F=E4=B8=AA=E6=AE=B5=E8=90=BD=E6=9C=89=E5=A4=9A=E5=B0=91?=
 =?UTF-8?q?=E5=8F=A5=E8=AF=9D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md         | 10 +++++--
 config.py         | 47 +++++++++++++++++++++++++++++--
 gui_config.py     | 35 +++++++++++++++++++++--
 text_processor.py | 71 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 155 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 4e79276..7ea1df3 100644
--- a/README.md
+++ b/README.md
@@ -93,6 +93,7 @@ TxT2DOCX/
 - **编码检测** - 自动识别文件编码（UTF-8, GBK, GB2312等）
 - **错别字纠正** - 基于词典的智能错别字替换
 - **文本清洗** - 去除多余空白和格式化字符
+- **段落控制** - 控制每段最大句子数，自动分割过长段落
 - **Markdown解析** - 支持标题、列表、引用、代码块等
 
 ### 4. 图片处理功能
@@ -154,7 +155,7 @@ TxT2DOCX/
 
 支持的 Markdown 语法：
 
-```markdown
+```
 # 一级标题
 ## 二级标题  
 ### 三级标题
@@ -172,7 +173,7 @@ TxT2DOCX/
 
 `行内代码`
 
-```代码块```
+``代码块```
 
 ![图片](path/to/image.png)
 ```
@@ -242,6 +243,11 @@ TxT2DOCX/
 2. 在 `batch_processor.py` 中调用新功能
 3. 可选：在 GUI 中添加相应配置选项
 
+#### 段落句子数控制
+1. 在配置界面的"文字处理"选项卡中设置"每段最大句子数"
+2. 设置为0表示不限制，大于0的数值表示每段最多包含的句子数
+3. 程序会自动将超过限制的段落分割成多个段落
+
 #### 扩展图片处理
 1. 在 `image_processor.py` 中添加新的处理方法
 2. 支持新的图片格式或处理效果
diff --git a/config.py b/config.py
index f6c8a8a..40f0a7b 100644
--- a/config.py
+++ b/config.py
@@ -29,6 +29,7 @@ class Config:
         self.reverse_text_order = False  # 转换文字顺序开关
         self.replace_punctuation = False  # 是否替换标点符号
         self.add_disclaimer = False  # 是否添加免责声明
+        self.max_sentences_per_paragraph = 0  # 每段最大句子数，0表示不限制
         
         # 错别字处理配置
         self.enable_char_errors = False  # 是否启用错别字处理
@@ -88,6 +89,8 @@ class Config:
                 self.enable_char_errors = section.getboolean('enable_char_errors', self.enable_char_errors)
                 self.char_error_intensity = section.getfloat('char_error_intensity', self.char_error_intensity)
                 self.char_error_db_path = section.get('char_error_db_path', self.char_error_db_path)
+                # 新增段落句子数控制配置
+                self.max_sentences_per_paragraph = section.getint('max_sentences_per_paragraph', self.max_sentences_per_paragraph)
 
             # 加载图片处理配置
             if 'ImageProcessing' in config_parser:
@@ -145,7 +148,8 @@ class Config:
                 'add_disclaimer': str(self.add_disclaimer),
                 'enable_char_errors': str(self.enable_char_errors),
                 'char_error_intensity': str(self.char_error_intensity),
-                'char_error_db_path': self.char_error_db_path
+                'char_error_db_path': self.char_error_db_path,
+                'max_sentences_per_paragraph': str(self.max_sentences_per_paragraph)  # 新增配置项
             }
 
             # 保存图片处理配置
@@ -246,6 +250,8 @@ class Config:
             self.enable_char_errors = tp.get('enable_char_errors', self.enable_char_errors)
             self.char_error_intensity = tp.get('char_error_intensity', self.char_error_intensity)
             self.char_error_db_path = tp.get('char_error_db_path', self.char_error_db_path)
+            # 新增段落句子数控制配置
+            self.max_sentences_per_paragraph = tp.get('max_sentences_per_paragraph', self.max_sentences_per_paragraph)
 
         # 图片处理配置
         if 'image_processing' in config_dict:
@@ -268,8 +274,43 @@ class Config:
 
     def reset_to_defaults(self) -> None:
         """重置所有配置为默认值"""
-        self.__init__()
-
+        # 文件处理配置
+        self.txt_encoding = "utf-8"
+        self.match_pattern = "exact"  # exact: 完全匹配, prefix: 前缀匹配, contains: 包含
+        self.output_location = "txt_folder"  # txt_folder or custom
+        
+        # 最近使用的文件夹路径
+        self.last_txt_folder = ""
+        self.last_images_root = ""
+        self.last_output_root = ""
+        
+        # 文字处理配置
+        self.reverse_text_order = False  # 转换文字顺序开关
+        self.replace_punctuation = False  # 是否替换标点符号
+        self.add_disclaimer = False  # 是否添加免责声明
+        self.max_sentences_per_paragraph = 0  # 每段最大句子数，0表示不限制
+        
+        # 错别字处理配置
+        self.enable_char_errors = False  # 是否启用错别字处理
+        self.char_error_intensity = 0.3  # 错别字强度 0.0-1.0
+        self.char_error_db_path = "data/error_chars.json"  # 错别字库路径
+        
+        # 图片处理配置
+        self.image_sort_by = "name"  # name or time
+        self.image_resize = "none"  # none or width
+        self.image_width = 6  # 英寸
+        self.image_alignment = "center"  # left, center, right
+        self.image_strategy = "cycle"  # cycle, truncate, repeat_last
+        self.image_insert_position = "after_title"  # before_title, after_title (有标题时)
+        self.image_insert_interval = 5  # 无标题时每隔几段插入一张图片
+        
+        # 文档格式配置
+        self.line_spacing = 1.5
+        self.title_levels = 6  # 支持的最大标题层级
+        
+        # 排版样式配置
+        self.current_style = "爆款文章风格"  # 当前选中的样式
+        self.use_custom_style = False  # 是否使用自定义样式
 
 # 全局配置实例
 CONFIG_FILE_PATH = os.path.join(os.path.expanduser("~"), ".txt2md2docx.ini")
diff --git a/gui_config.py b/gui_config.py
index 5f24bb9..9922cf4 100644
--- a/gui_config.py
+++ b/gui_config.py
@@ -156,6 +156,30 @@ def _create_text_tab(parent):
     
     ttk.Separator(parent, orient='horizontal').pack(fill='x', padx=10, pady=15)
     
+    # 段落句子数控制
+    ttk.Label(parent, text='段落控制', font=('', 11, 'bold'), foreground='darkblue').pack(anchor='w', padx=10, pady=(0, 5))
+    
+    # 每段最大句子数
+    sentence_frame = ttk.Frame(parent)
+    sentence_frame.pack(fill='x', padx=10, pady=5)
+    ttk.Label(sentence_frame, text='每段最大句子数:', width=15).pack(side='left')
+    sentence_var = tk.IntVar(value=config.max_sentences_per_paragraph)
+    sentence_spin = ttk.Spinbox(sentence_frame, from_=0, to=100, textvariable=sentence_var, width=10)
+    sentence_spin.pack(side='left', padx=(0, 10))
+    ttk.Label(sentence_frame, text='(0表示不限制)').pack(side='left')
+    
+    def update_sentence_limit(*args):
+        try:
+            config.max_sentences_per_paragraph = sentence_var.get()
+        except (tk.TclError, ValueError):
+            # 如果输入无效，设置为默认值0
+            config.max_sentences_per_paragraph = 0
+            sentence_var.set(0)
+    
+    sentence_var.trace('w', update_sentence_limit)
+    
+    ttk.Separator(parent, orient='horizontal').pack(fill='x', padx=10, pady=15)
+    
     # 免责声明
     disclaimer_var = tk.BooleanVar(value=config.add_disclaimer)
     ttk.Checkbutton(parent, text='添加免责声明', variable=disclaimer_var).pack(anchor='w', padx=10, pady=5)
@@ -177,7 +201,8 @@ def _create_text_tab(parent):
         'db_path': db_var,
         'reverse_text': reverse_var,
         'punctuation': punctuation_var,
-        'disclaimer': disclaimer_var
+        'disclaimer': disclaimer_var,
+        'max_sentences': sentence_var  # 添加返回值
     }
 
 
@@ -264,7 +289,8 @@ def _update_image_width(value):
     """更新图片宽度"""
     try:
         config.image_width = float(value)
-    except:
+    except (ValueError, tk.TclError):
+        # 如果输入无效，保持当前值不变
         pass
 
 
@@ -272,7 +298,8 @@ def _update_image_interval(value):
     """更新图片插入间隔"""
     try:
         config.image_insert_interval = int(value)
-    except:
+    except (ValueError, tk.TclError):
+        # 如果输入无效，保持当前值不变
         pass
 
 
@@ -298,6 +325,7 @@ def _reset_to_default(char_vars):
     config.image_strategy = default_config.image_strategy
     config.line_spacing = default_config.line_spacing
     config.title_levels = default_config.title_levels
+    config.max_sentences_per_paragraph = default_config.max_sentences_per_paragraph  # 添加这行
     
     # 更新界面变量
     if char_vars:
@@ -307,6 +335,7 @@ def _reset_to_default(char_vars):
         char_vars['reverse_text'].set(default_config.reverse_text_order)
         char_vars['punctuation'].set(default_config.replace_punctuation)
         char_vars['disclaimer'].set(default_config.add_disclaimer)
+        char_vars['max_sentences'].set(default_config.max_sentences_per_paragraph)  # 添加这行
     
     messagebox.showinfo('信息', '配置已重置为默认值')
 
diff --git a/text_processor.py b/text_processor.py
index eb2620a..afd9640 100644
--- a/text_processor.py
+++ b/text_processor.py
@@ -149,6 +149,10 @@ class TextProcessor:
         # 应用错别字处理
         processed_text = self.apply_char_errors(processed_text)
 
+        # 控制段落句子数
+        if config.max_sentences_per_paragraph > 0:
+            processed_text = self.limit_sentences_per_paragraph(processed_text, config.max_sentences_per_paragraph)
+
         # 最后进行标点符号替换
         if config.replace_punctuation:
             processed_text = self.replace_periods(processed_text)
@@ -285,6 +289,73 @@ class TextProcessor:
             "truncated": truncated
         }
 
+    def limit_sentences_per_paragraph(self, text: str, max_sentences: int) -> str:
+        """
+        控制每个段落的句子数量
+        
+        Args:
+            text: 输入文本
+            max_sentences: 每段最大句子数
+            
+        Returns:
+            str: 处理后的文本
+        """
+        if not text or max_sentences <= 0:
+            return text
+
+        # 定义句子结束标点符号
+        sentence_endings = ['。', '！', '？', '.', '!', '?']
+        
+        # 按段落分割文本
+        paragraphs = text.split('\n')
+        processed_paragraphs = []
+        
+        for paragraph in paragraphs:
+            if not paragraph.strip():
+                processed_paragraphs.append(paragraph)
+                continue
+                
+            # 找到所有句子结束位置
+            sentences = []
+            current_sentence = ""
+            
+            for char in paragraph:
+                current_sentence += char
+                # 如果是句子结束符号，则认为是一个完整句子
+                if char in sentence_endings:
+                    sentences.append(current_sentence)
+                    current_sentence = ""
+            
+            # 添加最后一个可能没有结束符号的句子
+            if current_sentence.strip():
+                sentences.append(current_sentence)
+            
+            # 如果段落句子数不超过限制，直接添加
+            if len(sentences) <= max_sentences:
+                processed_paragraphs.append(paragraph)
+                continue
+            
+            # 如果超过限制，重新组织段落
+            new_paragraphs = []
+            current_new_paragraph = ""
+            
+            for i, sentence in enumerate(sentences):
+                current_new_paragraph += sentence
+                
+                # 每达到max_sentences句就换段落
+                if (i + 1) % max_sentences == 0:
+                    new_paragraphs.append(current_new_paragraph.strip())
+                    current_new_paragraph = ""
+            
+            # 添加剩余的句子
+            if current_new_paragraph.strip():
+                new_paragraphs.append(current_new_paragraph.strip())
+            
+            # 将新段落添加到结果中
+            processed_paragraphs.extend(new_paragraphs)
+        
+        return '\n'.join(processed_paragraphs)
+
 
 # 创建全局文本处理器实例
 text_processor = TextProcessor()