654 lines
28 KiB
Python
654 lines
28 KiB
Python
"""
|
||
DOCX文档生成模块
|
||
|
||
负责将解析后的Markdown结构转换为DOCX文档,包括文本格式化、图片插入和样式设置。
|
||
"""
|
||
|
||
import os
|
||
import re
|
||
from typing import List, Dict, Any, Callable, Optional
|
||
from docx import Document
|
||
from docx.document import Document as DocxDocument
|
||
from docx.shared import Inches, Pt, RGBColor
|
||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||
from docx.enum.style import WD_STYLE_TYPE
|
||
|
||
from config import config
|
||
from text_processor import text_processor
|
||
from image_processor import ImageProcessor
|
||
from markdown_parser import MarkdownParser
|
||
from style_manager import style_manager
|
||
|
||
|
||
# 免责声明文本
|
||
DISCLAIMER_TEXT = """`[免责声明]文章的时间、过程、图片均来自于网络,文章旨在传播正能量,均无低俗等不良引导,请观众勿对号入座,并上升到人身攻击等方面。观众理性看待本事件,切勿留下主观臆断的恶意评论,互联网不是法外之地。本文如若真实性存在争议、事件版权或图片侵权问题,请及时联系作者,我们将予以删除。`"""
|
||
|
||
|
||
class DocxGenerator:
|
||
"""DOCX文档生成器类"""
|
||
|
||
def __init__(self):
|
||
"""初始化DOCX生成器"""
|
||
self.temp_files = [] # 跟踪临时文件以便清理
|
||
self.current_document_style = None # 当前使用的文档样式
|
||
self.paragraph_count = 0 # 段落计数器,用于无标题文章的图片插入控制
|
||
|
||
def generate(self, sections: List[Dict[str, Any]], image_files: List[str],
|
||
output_path: str, progress_callback: Optional[Callable] = None) -> bool:
|
||
"""
|
||
生成DOCX文档
|
||
|
||
Args:
|
||
sections: 解析后的文档章节列表
|
||
image_files: 图片文件路径列表
|
||
output_path: 输出文件路径
|
||
progress_callback: 进度回调函数
|
||
|
||
Returns:
|
||
bool: 是否生成成功
|
||
|
||
Raises:
|
||
Exception: 生成失败时
|
||
"""
|
||
try:
|
||
doc = Document()
|
||
self._setup_document_styles(doc)
|
||
|
||
total_sections = len(sections)
|
||
image_index = 0
|
||
image_count = len(image_files)
|
||
|
||
for i, section in enumerate(sections):
|
||
if progress_callback:
|
||
progress = int((i / total_sections) * 100)
|
||
section_title = section['content'][:30] + "..." if len(section['content']) > 30 else section['content']
|
||
progress_callback(progress, f"处理章节: {section_title}")
|
||
|
||
# 添加章节内容
|
||
image_index = self._add_section_to_doc(doc, section, image_files, image_index, image_count, output_path)
|
||
|
||
# 添加免责声明
|
||
if config.add_disclaimer:
|
||
self._add_disclaimer(doc)
|
||
|
||
# 保存文档
|
||
doc.save(output_path)
|
||
|
||
if progress_callback:
|
||
progress_callback(100, "转换完成!")
|
||
|
||
return True
|
||
|
||
except Exception as e:
|
||
raise Exception(f"生成DOCX失败: {str(e)}")
|
||
finally:
|
||
# 清理临时文件
|
||
self._cleanup_temp_files()
|
||
|
||
def _setup_document_styles(self, doc) -> None:
|
||
"""
|
||
设置文档样式
|
||
|
||
Args:
|
||
doc: DOCX文档对象
|
||
"""
|
||
try:
|
||
# 获取当前选中的样式
|
||
current_style = style_manager.get_style(config.current_style)
|
||
if not current_style:
|
||
print(f"警告: 找不到样式 '{config.current_style}',使用默认样式")
|
||
return
|
||
|
||
self.current_document_style = current_style
|
||
print(f"应用文档样式: {current_style.name}")
|
||
|
||
except Exception as e:
|
||
print(f"设置文档样式时出错: {e}")
|
||
|
||
def _add_section_to_doc(self, doc: DocxDocument, section: Dict[str, Any],
|
||
image_files: List[str], image_index: int, image_count: int,
|
||
output_path: str) -> int:
|
||
"""
|
||
添加章节内容到文档
|
||
|
||
Args:
|
||
doc: DOCX文档对象
|
||
section: 章节数据
|
||
image_files: 图片文件列表
|
||
image_index: 当前图片索引
|
||
image_count: 图片总数
|
||
output_path: 输出文件路径(用于临时文件)
|
||
|
||
Returns:
|
||
int: 更新后的图片索引
|
||
"""
|
||
# 添加章节标题
|
||
if section['level'] > 0 and section['level'] <= config.title_levels:
|
||
heading_text = text_processor.process_text_content(section['content'])
|
||
para = doc.add_heading(level=section['level'])
|
||
# 清空默认内容,应用自定义样式
|
||
para.clear()
|
||
run = para.add_run(heading_text)
|
||
|
||
# 应用标题样式
|
||
if hasattr(self, 'current_document_style') and self.current_document_style and self.current_document_style.heading_styles:
|
||
if section['level'] in self.current_document_style.heading_styles:
|
||
heading_style = self.current_document_style.heading_styles[section['level']]
|
||
if heading_style.font:
|
||
run.font.name = heading_style.font.name
|
||
run.font.size = Pt(heading_style.font.size)
|
||
run.font.bold = heading_style.font.bold
|
||
run.font.italic = heading_style.font.italic
|
||
if heading_style.font.color != "#000000":
|
||
run.font.color.rgb = RGBColor.from_string(heading_style.font.color.replace('#', ''))
|
||
|
||
if heading_style.paragraph:
|
||
para_style = heading_style.paragraph
|
||
if para_style.line_spacing > 0:
|
||
para.paragraph_format.line_spacing = para_style.line_spacing
|
||
if para_style.space_before > 0:
|
||
para.paragraph_format.space_before = Pt(para_style.space_before)
|
||
if para_style.space_after > 0:
|
||
para.paragraph_format.space_after = Pt(para_style.space_after)
|
||
if para_style.first_line_indent > 0:
|
||
para.paragraph_format.first_line_indent = Pt(para_style.first_line_indent * 12)
|
||
|
||
# 设置对齐方式
|
||
if para_style.alignment == "center":
|
||
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
elif para_style.alignment == "right":
|
||
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
||
elif para_style.alignment == "justify":
|
||
para.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
|
||
else:
|
||
para.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||
else:
|
||
# 默认样式
|
||
run.font.size = Pt(18 - section['level'] * 2 if section['level'] <= 6 else 10)
|
||
run.font.bold = True
|
||
|
||
self._apply_inline_formatting(para, heading_text)
|
||
|
||
# 如果有标题,根据配置决定在标题前还是后插入图片
|
||
if image_count > 0 and image_index < image_count:
|
||
# 检查是否需要在标题前插入图片
|
||
if hasattr(config, 'image_insert_position') and config.image_insert_position == "before_title":
|
||
image_index = self._insert_section_image(doc, image_files, image_index, image_count, output_path)
|
||
elif section['content'] != '前置内容':
|
||
heading_text = text_processor.process_text_content(section['content'])
|
||
para = doc.add_paragraph()
|
||
run = para.add_run(heading_text)
|
||
|
||
# 应用样式设置
|
||
if hasattr(self, 'current_document_style') and self.current_document_style and self.current_document_style.heading_styles:
|
||
if section['level'] in self.current_document_style.heading_styles:
|
||
heading_style = self.current_document_style.heading_styles[section['level']]
|
||
if heading_style.font:
|
||
run.font.name = heading_style.font.name
|
||
run.font.size = Pt(heading_style.font.size)
|
||
run.font.bold = heading_style.font.bold
|
||
run.font.italic = heading_style.font.italic
|
||
if heading_style.font.color != "#000000":
|
||
run.font.color.rgb = RGBColor.from_string(heading_style.font.color.replace('#', ''))
|
||
else:
|
||
run.font.size = Pt(14)
|
||
run.font.bold = True
|
||
|
||
para.paragraph_format.space_after = Pt(12)
|
||
|
||
# 处理章节中的元素
|
||
elements = section.get('elements', [])
|
||
if not elements:
|
||
return image_index
|
||
|
||
# 处理元素
|
||
for element in elements:
|
||
# 添加元素到文档
|
||
self._add_element_to_doc(doc, element)
|
||
|
||
# 根据文章结构决定图片插入策略
|
||
if element['type'] not in ['empty']:
|
||
# 如果有标题,根据配置决定在标题后插入图片
|
||
if section['level'] > 0 and section['level'] <= config.title_levels:
|
||
# 有标题的文章,在标题后的第一个内容后插入图片
|
||
if hasattr(config, 'image_insert_position') and config.image_insert_position == "after_title":
|
||
image_index = self._insert_section_image(doc, image_files, image_index, image_count, output_path)
|
||
# 插入一次后就不再插入,直到下一个标题
|
||
break
|
||
else:
|
||
# 无标题的文章,根据段落计数控制图片插入间隔
|
||
self.paragraph_count += 1
|
||
if image_count > 0 and image_index < image_count:
|
||
# 检查是否需要插入图片(根据配置的间隔)
|
||
image_insert_interval = getattr(config, 'image_insert_interval', 5) # 默认每5段插入一张图片
|
||
if self.paragraph_count % image_insert_interval == 0:
|
||
image_index = self._insert_section_image(doc, image_files, image_index, image_count, output_path)
|
||
|
||
return image_index
|
||
|
||
def _add_element_to_doc(self, doc: DocxDocument, element: Dict[str, Any]) -> None:
|
||
"""
|
||
将解析的元素添加到文档中
|
||
|
||
Args:
|
||
doc: DOCX文档对象
|
||
element: 元素数据
|
||
"""
|
||
element_type = element['type']
|
||
content = text_processor.process_text_content(element.get('content', ''))
|
||
|
||
if element_type == 'paragraph':
|
||
self._add_formatted_paragraph(doc, content)
|
||
|
||
elif element_type == 'unordered_list':
|
||
para = doc.add_paragraph(style='List Bullet')
|
||
self._apply_inline_formatting(para, content)
|
||
# 应用列表样式
|
||
self._apply_list_style(para, 'unordered')
|
||
|
||
elif element_type == 'ordered_list':
|
||
para = doc.add_paragraph(style='List Number')
|
||
self._apply_inline_formatting(para, content)
|
||
# 应用列表样式
|
||
self._apply_list_style(para, 'ordered')
|
||
|
||
elif element_type == 'blockquote':
|
||
para = doc.add_paragraph(style='Quote')
|
||
self._apply_inline_formatting(para, content)
|
||
# 应用引用样式
|
||
self._apply_quote_style(para)
|
||
|
||
elif element_type == 'code_block':
|
||
self._add_code_block(doc, element.get('content', ''), element.get('language', ''))
|
||
|
||
elif element_type == 'table':
|
||
self._add_table_to_doc(doc, element.get('rows', []))
|
||
|
||
elif element_type == 'horizontal_rule':
|
||
self._add_horizontal_rule(doc)
|
||
|
||
elif element_type == 'empty':
|
||
doc.add_paragraph()
|
||
|
||
def _apply_list_style(self, paragraph, list_type: str) -> None:
|
||
"""
|
||
应用列表样式到段落
|
||
|
||
Args:
|
||
paragraph: DOCX段落对象
|
||
list_type: 列表类型 ('unordered' 或 'ordered')
|
||
"""
|
||
if not (hasattr(self, 'current_document_style') and self.current_document_style):
|
||
return
|
||
|
||
list_style = None
|
||
if list_type == 'unordered' and self.current_document_style.unordered_list:
|
||
list_style = self.current_document_style.unordered_list
|
||
elif list_type == 'ordered' and self.current_document_style.ordered_list:
|
||
list_style = self.current_document_style.ordered_list
|
||
|
||
if list_style and list_style.paragraph:
|
||
if list_style.paragraph.space_before > 0:
|
||
paragraph.paragraph_format.space_before = Pt(list_style.paragraph.space_before)
|
||
if list_style.paragraph.space_after > 0:
|
||
paragraph.paragraph_format.space_after = Pt(list_style.paragraph.space_after)
|
||
|
||
def _apply_quote_style(self, paragraph) -> None:
|
||
"""
|
||
应用引用块样式到段落
|
||
|
||
Args:
|
||
paragraph: DOCX段落对象
|
||
"""
|
||
if not (hasattr(self, 'current_document_style') and self.current_document_style and self.current_document_style.quote_block):
|
||
return
|
||
|
||
quote_style = self.current_document_style.quote_block
|
||
if quote_style.paragraph:
|
||
if quote_style.paragraph.line_spacing > 0:
|
||
paragraph.paragraph_format.line_spacing = quote_style.paragraph.line_spacing
|
||
if quote_style.paragraph.space_before > 0:
|
||
paragraph.paragraph_format.space_before = Pt(quote_style.paragraph.space_before)
|
||
if quote_style.paragraph.space_after > 0:
|
||
paragraph.paragraph_format.space_after = Pt(quote_style.paragraph.space_after)
|
||
if quote_style.paragraph.first_line_indent > 0:
|
||
paragraph.paragraph_format.first_line_indent = Pt(quote_style.paragraph.first_line_indent * 12)
|
||
|
||
# 设置对齐方式
|
||
if quote_style.paragraph.alignment == "center":
|
||
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
elif quote_style.paragraph.alignment == "right":
|
||
paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
||
elif quote_style.paragraph.alignment == "justify":
|
||
paragraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
|
||
|
||
def _add_formatted_paragraph(self, doc: DocxDocument, content: str) -> None:
|
||
"""
|
||
添加带格式的段落
|
||
|
||
Args:
|
||
doc: DOCX文档对象
|
||
content: 段落内容
|
||
"""
|
||
if not content or not content.strip():
|
||
doc.add_paragraph()
|
||
return
|
||
|
||
para = doc.add_paragraph()
|
||
self._apply_inline_formatting(para, content)
|
||
|
||
# 应用样式中的段落格式
|
||
if hasattr(self, 'current_document_style') and self.current_document_style:
|
||
if self.current_document_style.body_paragraph:
|
||
body_para = self.current_document_style.body_paragraph
|
||
if body_para.line_spacing > 0:
|
||
para.paragraph_format.line_spacing = body_para.line_spacing
|
||
if body_para.space_before > 0:
|
||
para.paragraph_format.space_before = Pt(body_para.space_before)
|
||
if body_para.space_after > 0:
|
||
para.paragraph_format.space_after = Pt(body_para.space_after)
|
||
if body_para.first_line_indent > 0:
|
||
para.paragraph_format.first_line_indent = Pt(body_para.first_line_indent * 12) # 字符转磅
|
||
|
||
# 设置对齐方式
|
||
if body_para.alignment == "center":
|
||
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
elif body_para.alignment == "right":
|
||
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
||
elif body_para.alignment == "justify":
|
||
para.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
|
||
else:
|
||
para.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||
elif config.line_spacing > 0:
|
||
para.paragraph_format.line_spacing = config.line_spacing
|
||
|
||
def _apply_inline_formatting(self, paragraph, text: str) -> None:
|
||
"""
|
||
应用行内格式到段落
|
||
|
||
Args:
|
||
paragraph: DOCX段落对象
|
||
text: 要格式化的文本
|
||
"""
|
||
# 首先处理文字内容(已在调用前处理)
|
||
processed_text = text
|
||
|
||
# 提取格式信息
|
||
formatting = MarkdownParser.extract_inline_formatting(processed_text)
|
||
|
||
# 如果没有格式,直接添加文本
|
||
if not formatting:
|
||
run = paragraph.add_run(processed_text)
|
||
self._apply_body_font_style(run)
|
||
return
|
||
|
||
current_pos = 0
|
||
|
||
for fmt in formatting:
|
||
# 添加格式前的普通文本
|
||
if fmt['start'] > current_pos:
|
||
run = paragraph.add_run(processed_text[current_pos:fmt['start']])
|
||
self._apply_body_font_style(run)
|
||
|
||
# 创建格式化的run
|
||
if fmt['type'] == 'bold':
|
||
clean_text = re.sub(r'\*\*(.+?)\*\*|__(.+?)__', r'\1\2', processed_text[fmt['start']:fmt['end']])
|
||
run = paragraph.add_run(clean_text)
|
||
self._apply_body_font_style(run)
|
||
run.bold = True
|
||
|
||
elif fmt['type'] == 'italic':
|
||
clean_text = re.sub(r'(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)|_(.+?)_', r'\1\2',
|
||
processed_text[fmt['start']:fmt['end']])
|
||
run = paragraph.add_run(clean_text)
|
||
self._apply_body_font_style(run)
|
||
run.italic = True
|
||
|
||
elif fmt['type'] == 'code':
|
||
clean_text = re.sub(r'`([^`]+)`', r'\1', processed_text[fmt['start']:fmt['end']])
|
||
run = paragraph.add_run(clean_text)
|
||
# 代码样式优先使用样式中的设置
|
||
if hasattr(self, 'current_document_style') and self.current_document_style and self.current_document_style.code_block:
|
||
code_style = self.current_document_style.code_block
|
||
if code_style.font:
|
||
run.font.name = code_style.font.name
|
||
run.font.size = Pt(code_style.font.size)
|
||
if code_style.font.color != "#000000":
|
||
run.font.color.rgb = RGBColor.from_string(code_style.font.color.replace('#', ''))
|
||
else:
|
||
run.font.name = 'Courier New'
|
||
run.font.size = Pt(10)
|
||
|
||
elif fmt['type'] == 'strikethrough':
|
||
clean_text = re.sub(r'~~(.+?)~~', r'\1', processed_text[fmt['start']:fmt['end']])
|
||
run = paragraph.add_run(clean_text)
|
||
self._apply_body_font_style(run)
|
||
run.font.strike = True
|
||
|
||
elif fmt['type'] == 'link':
|
||
# 对于链接,只显示链接文本
|
||
run = paragraph.add_run(fmt['text'])
|
||
self._apply_body_font_style(run)
|
||
run.font.color.rgb = RGBColor(0, 0, 255) # 蓝色
|
||
run.underline = True
|
||
|
||
current_pos = fmt['end']
|
||
|
||
# 添加剩余的普通文本
|
||
if current_pos < len(processed_text):
|
||
run = paragraph.add_run(processed_text[current_pos:])
|
||
self._apply_body_font_style(run)
|
||
|
||
def _apply_body_font_style(self, run) -> None:
|
||
"""
|
||
应用正文字体样式到run
|
||
|
||
Args:
|
||
run: DOCX run对象
|
||
"""
|
||
if hasattr(self, 'current_document_style') and self.current_document_style and self.current_document_style.body_font:
|
||
body_font = self.current_document_style.body_font
|
||
run.font.name = body_font.name
|
||
run.font.size = Pt(body_font.size)
|
||
run.font.bold = body_font.bold
|
||
run.font.italic = body_font.italic
|
||
if body_font.color != "#000000":
|
||
run.font.color.rgb = RGBColor.from_string(body_font.color.replace('#', ''))
|
||
|
||
def _add_code_block(self, doc: DocxDocument, content: str, language: str) -> None:
|
||
"""
|
||
添加代码块
|
||
|
||
Args:
|
||
doc: DOCX文档对象
|
||
content: 代码内容
|
||
language: 编程语言
|
||
"""
|
||
para = doc.add_paragraph(style='No Spacing')
|
||
run = para.add_run(content)
|
||
|
||
# 应用代码块样式
|
||
if hasattr(self, 'current_document_style') and self.current_document_style and self.current_document_style.code_block:
|
||
code_style = self.current_document_style.code_block
|
||
if code_style.font:
|
||
run.font.name = code_style.font.name
|
||
run.font.size = Pt(code_style.font.size)
|
||
run.font.bold = code_style.font.bold
|
||
run.font.italic = code_style.font.italic
|
||
if code_style.font.color != "#000000":
|
||
run.font.color.rgb = RGBColor.from_string(code_style.font.color.replace('#', ''))
|
||
|
||
if code_style.paragraph:
|
||
para_style = code_style.paragraph
|
||
if para_style.space_before > 0:
|
||
para.paragraph_format.space_before = Pt(para_style.space_before)
|
||
if para_style.space_after > 0:
|
||
para.paragraph_format.space_after = Pt(para_style.space_after)
|
||
else:
|
||
# 默认样式
|
||
run.font.name = 'Courier New'
|
||
run.font.size = Pt(10)
|
||
para.paragraph_format.space_before = Pt(6)
|
||
para.paragraph_format.space_after = Pt(6)
|
||
|
||
def _add_table_to_doc(self, doc: DocxDocument, rows: List[List[str]]) -> None:
|
||
"""
|
||
添加表格到文档
|
||
|
||
Args:
|
||
doc: DOCX文档对象
|
||
rows: 表格行数据
|
||
"""
|
||
if not rows:
|
||
return
|
||
|
||
table = doc.add_table(rows=len(rows), cols=len(rows[0]))
|
||
table.style = 'Table Grid'
|
||
|
||
for i, row_data in enumerate(rows):
|
||
row_cells = table.rows[i].cells
|
||
for j, cell_data in enumerate(row_data):
|
||
if j < len(row_cells):
|
||
processed_text = text_processor.process_text_content(cell_data)
|
||
cell_para = row_cells[j].paragraphs[0]
|
||
cell_para.clear()
|
||
run = cell_para.add_run(processed_text)
|
||
|
||
# 应用表格样式
|
||
if hasattr(self, 'current_document_style') and self.current_document_style and self.current_document_style.table_style:
|
||
table_style = self.current_document_style.table_style
|
||
if table_style.font:
|
||
run.font.name = table_style.font.name
|
||
run.font.size = Pt(table_style.font.size)
|
||
run.font.bold = table_style.font.bold
|
||
run.font.italic = table_style.font.italic
|
||
if table_style.font.color != "#000000":
|
||
run.font.color.rgb = RGBColor.from_string(table_style.font.color.replace('#', ''))
|
||
|
||
if table_style.paragraph:
|
||
para_style = table_style.paragraph
|
||
if para_style.space_before > 0:
|
||
cell_para.paragraph_format.space_before = Pt(para_style.space_before)
|
||
if para_style.space_after > 0:
|
||
cell_para.paragraph_format.space_after = Pt(para_style.space_after)
|
||
|
||
def _add_horizontal_rule(self, doc: DocxDocument) -> None:
|
||
"""
|
||
在文档中添加横线
|
||
|
||
Args:
|
||
doc: DOCX文档对象
|
||
"""
|
||
para = doc.add_paragraph()
|
||
run = para.add_run()
|
||
run.font.underline = True
|
||
run.text = " " * 100 # 足够长的下划线作为横线
|
||
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
|
||
def _insert_section_image(self, doc: DocxDocument, image_files: List[str],
|
||
image_index: int, image_count: int, output_path: str) -> int:
|
||
"""
|
||
为章节插入图片
|
||
|
||
Args:
|
||
doc: DOCX文档对象
|
||
image_files: 图片文件列表
|
||
image_index: 当前图片索引
|
||
image_count: 图片总数
|
||
output_path: 输出文件路径
|
||
|
||
Returns:
|
||
int: 更新后的图片索引
|
||
"""
|
||
if image_count > 0 and image_index < image_count:
|
||
try:
|
||
self._insert_image(doc, image_files[image_index], output_path)
|
||
image_index += 1
|
||
|
||
# 根据策略处理图片不足的情况
|
||
if image_index >= image_count:
|
||
if config.image_strategy == "cycle":
|
||
image_index = 0
|
||
elif config.image_strategy == "truncate":
|
||
image_index = image_count
|
||
# repeat_last策略:保持当前索引-1,下次还用最后一张
|
||
|
||
except Exception as e:
|
||
# 插入失败时添加错误提示
|
||
para = doc.add_paragraph()
|
||
run = para.add_run(f"[图片插入失败: {str(e)}]")
|
||
run.font.color.rgb = RGBColor(255, 0, 0) # 红色
|
||
|
||
return image_index
|
||
|
||
def _insert_image(self, doc: DocxDocument, image_path: str, output_path: str) -> None:
|
||
"""
|
||
插入图片到文档
|
||
|
||
Args:
|
||
doc: DOCX文档对象
|
||
image_path: 图片文件路径
|
||
output_path: 输出文件路径(用于临时文件)
|
||
"""
|
||
try:
|
||
# 使用优化方法处理图片
|
||
temp_dir = os.path.join(os.path.dirname(output_path), "temp_images")
|
||
os.makedirs(temp_dir, exist_ok=True)
|
||
optimized_image_path = ImageProcessor.optimize_image_for_docx(image_path, temp_dir)
|
||
|
||
# 处理图片(方向修正和尺寸调整)
|
||
img, width = ImageProcessor.process_image(optimized_image_path)
|
||
|
||
temp_img_path = None
|
||
if config.image_resize == "width":
|
||
# 需要保存临时图片
|
||
temp_img_path = os.path.join(temp_dir, f"temp_img_{hash(image_path)}.png")
|
||
img.save(temp_img_path)
|
||
self.temp_files.append(temp_img_path)
|
||
img_path = temp_img_path
|
||
else:
|
||
img_path = optimized_image_path if optimized_image_path != image_path else image_path
|
||
|
||
# 创建段落并插入图片
|
||
para = doc.add_paragraph()
|
||
run = para.runs[0] if para.runs else para.add_run()
|
||
run.add_picture(img_path, width=Inches(width))
|
||
para.alignment = ImageProcessor.get_image_alignment()
|
||
|
||
except Exception as e:
|
||
raise Exception(f"插入图片失败: {str(e)}")
|
||
|
||
def _add_disclaimer(self, doc: DocxDocument) -> None:
|
||
"""
|
||
添加免责声明
|
||
|
||
Args:
|
||
doc: DOCX文档对象
|
||
"""
|
||
doc.add_paragraph("---")
|
||
para = doc.add_paragraph()
|
||
disclaimer_text = text_processor.process_text_content(DISCLAIMER_TEXT)
|
||
run = para.add_run(disclaimer_text)
|
||
run.font.size = Pt(10)
|
||
para.paragraph_format.line_spacing = 1.0
|
||
|
||
def _cleanup_temp_files(self) -> None:
|
||
"""清理临时文件"""
|
||
for temp_file in self.temp_files:
|
||
try:
|
||
if os.path.exists(temp_file):
|
||
os.remove(temp_file)
|
||
except Exception as e:
|
||
print(f"清理临时文件失败 {temp_file}: {e}")
|
||
self.temp_files.clear()
|
||
|
||
|
||
# 创建全局DOCX生成器实例
|
||
docx_generator = DocxGenerator()
|
||
|
||
|
||
# 兼容旧接口的函数
|
||
def generate(sections: List[Dict[str, Any]], image_files: List[str],
|
||
output_path: str, progress_callback: Optional[Callable] = None) -> bool:
|
||
"""生成DOCX文档(兼容旧接口)"""
|
||
return docx_generator.generate(sections, image_files, output_path, progress_callback) |