import os import json from typing import List, Dict, Optional from datetime import datetime from docx import Document from docx.shared import Inches from docx.enum.text import WD_ALIGN_PARAGRAPH from exceptions import ExportError class Exporter: def __init__(self, export_folder: str = "exports"): self.export_folder = export_folder if not os.path.exists(export_folder): os.makedirs(export_folder) def export_to_markdown(self, query: str, generated_text: str, source_segments: List[Dict], file_name: Optional[str] = None) -> str: """ Export generated text and source segments to Markdown format Args: query: The query used to generate the text generated_text: The generated text source_segments: List of source segments used file_name: Optional file name for the export Returns: Path to the exported file """ try: # Generate file name if not provided if not file_name: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") file_name = f"生成结果_{timestamp}.md" # Create markdown content md_content = f"""# 文案生成结果 ## 查询 {query} ## 生成文案 {generated_text} ## 源文档片段 """ for i, segment in enumerate(source_segments, 1): md_content += f"### 片段 {i}\n" md_content += f"**文件名**: {segment['metadata']['file_name']}\n\n" md_content += f"**段落ID**: {segment['metadata']['segment_id']}\n\n" md_content += f"**内容**:\n{segment['content']}\n\n" md_content += "---\n\n" # Write to file file_path = os.path.join(self.export_folder, file_name) with open(file_path, "w", encoding="utf-8") as f: f.write(md_content) return file_path except Exception as e: raise ExportError(f"导出Markdown文件失败: {str(e)}") def export_to_docx(self, query: str, generated_text: str, source_segments: List[Dict], file_name: Optional[str] = None) -> str: """ Export generated text and source segments to DOCX format Args: query: The query used to generate the text generated_text: The generated text source_segments: List of source segments used file_name: Optional file name for the export Returns: Path to the exported file """ # Generate file name if not provided if not file_name: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") file_name = f"生成结果_{timestamp}.docx" # Create DOCX document doc = Document() # Add title title = doc.add_heading('文案生成结果', 0) title.alignment = WD_ALIGN_PARAGRAPH.CENTER # Add query section doc.add_heading('查询', level=1) doc.add_paragraph(query) # Add generated text section doc.add_heading('生成文案', level=1) doc.add_paragraph(generated_text) # Add source segments section doc.add_heading('源文档片段', level=1) for i, segment in enumerate(source_segments, 1): doc.add_heading(f'片段 {i}', level=2) doc.add_paragraph(f'文件名: {segment["metadata"]["file_name"]}') doc.add_paragraph(f'段落ID: {segment["metadata"]["segment_id"]}') doc.add_paragraph('内容:') doc.add_paragraph(segment['content']) doc.add_paragraph('---') # Save document file_path = os.path.join(self.export_folder, file_name) doc.save(file_path) return file_path def export_batch_to_markdown(self, results: List[Dict], batch_name: Optional[str] = None) -> str: """ Export batch of results to Markdown format Args: results: List of result dictionaries batch_name: Optional batch name Returns: Path to the exported file """ # Generate file name if not provided if not batch_name: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") batch_name = f"批量生成结果_{timestamp}.md" # Create markdown content md_content = f"""# 批量文案生成结果 ## 生成时间 {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} ## 结果列表 """ for i, result in enumerate(results, 1): md_content += f"## 结果 {i}\n\n" md_content += f"**查询**: {result.get('query', 'N/A')}\n\n" md_content += f"**生成文案**:\n{result.get('generated_text', 'N/A')}\n\n" # Add source segments if available source_segments = result.get('source_segments', []) if source_segments: md_content += "**源文档片段**:\n" for j, segment in enumerate(source_segments, 1): md_content += f"- 片段 {j}: {segment['metadata']['segment_id']}\n" md_content += "\n---\n\n" # Write to file file_path = os.path.join(self.export_folder, batch_name) with open(file_path, "w", encoding="utf-8") as f: f.write(md_content) return file_path def export_batch_to_docx(self, results: List[Dict], batch_name: Optional[str] = None) -> str: """ Export batch of results to DOCX format Args: results: List of result dictionaries batch_name: Optional batch name Returns: Path to the exported file """ # Generate file name if not provided if not batch_name: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") batch_name = f"批量生成结果_{timestamp}.docx" # Create DOCX document doc = Document() # Add title title = doc.add_heading('批量文案生成结果', 0) title.alignment = WD_ALIGN_PARAGRAPH.CENTER # Add generation time doc.add_heading('生成时间', level=1) doc.add_paragraph(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) # Add results list doc.add_heading('结果列表', level=1) for i, result in enumerate(results, 1): doc.add_heading(f'结果 {i}', level=2) doc.add_paragraph(f'查询: {result.get("query", "N/A")}') # Add generated text generated_text = result.get('generated_text', 'N/A') doc.add_paragraph('生成文案:') doc.add_paragraph(generated_text) # Add source segments if available source_segments = result.get('source_segments', []) if source_segments: doc.add_paragraph('源文档片段:') for j, segment in enumerate(source_segments, 1): paragraph = doc.add_paragraph() paragraph.add_run(f'片段 {j}: ').bold = True paragraph.add_run(f'{segment["metadata"]["segment_id"]}') # Add separator doc.add_paragraph('---') # Save document file_path = os.path.join(self.export_folder, batch_name) doc.save(file_path) return file_path