216 lines
7.6 KiB
Python
216 lines
7.6 KiB
Python
import os
|
|
import json
|
|
from typing import List, Dict, Optional
|
|
from datetime import datetime
|
|
from docx import Document
|
|
from docx.shared import Inches
|
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
|
from exceptions import ExportError
|
|
|
|
class Exporter:
|
|
def __init__(self, export_folder: str = "exports"):
|
|
self.export_folder = export_folder
|
|
if not os.path.exists(export_folder):
|
|
os.makedirs(export_folder)
|
|
|
|
def export_to_markdown(self, query: str, generated_text: str,
|
|
source_segments: List[Dict], file_name: Optional[str] = None) -> str:
|
|
"""
|
|
Export generated text and source segments to Markdown format
|
|
|
|
Args:
|
|
query: The query used to generate the text
|
|
generated_text: The generated text
|
|
source_segments: List of source segments used
|
|
file_name: Optional file name for the export
|
|
|
|
Returns:
|
|
Path to the exported file
|
|
"""
|
|
try:
|
|
# Generate file name if not provided
|
|
if not file_name:
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
file_name = f"生成结果_{timestamp}.md"
|
|
|
|
# Create markdown content
|
|
md_content = f"""# 文案生成结果
|
|
|
|
## 查询
|
|
{query}
|
|
|
|
## 生成文案
|
|
{generated_text}
|
|
|
|
## 源文档片段
|
|
"""
|
|
|
|
for i, segment in enumerate(source_segments, 1):
|
|
md_content += f"### 片段 {i}\n"
|
|
md_content += f"**文件名**: {segment['metadata']['file_name']}\n\n"
|
|
md_content += f"**段落ID**: {segment['metadata']['segment_id']}\n\n"
|
|
md_content += f"**内容**:\n{segment['content']}\n\n"
|
|
md_content += "---\n\n"
|
|
|
|
# Write to file
|
|
file_path = os.path.join(self.export_folder, file_name)
|
|
with open(file_path, "w", encoding="utf-8") as f:
|
|
f.write(md_content)
|
|
|
|
return file_path
|
|
except Exception as e:
|
|
raise ExportError(f"导出Markdown文件失败: {str(e)}")
|
|
|
|
def export_to_docx(self, query: str, generated_text: str,
|
|
source_segments: List[Dict], file_name: Optional[str] = None) -> str:
|
|
"""
|
|
Export generated text and source segments to DOCX format
|
|
|
|
Args:
|
|
query: The query used to generate the text
|
|
generated_text: The generated text
|
|
source_segments: List of source segments used
|
|
file_name: Optional file name for the export
|
|
|
|
Returns:
|
|
Path to the exported file
|
|
"""
|
|
# Generate file name if not provided
|
|
if not file_name:
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
file_name = f"生成结果_{timestamp}.docx"
|
|
|
|
# Create DOCX document
|
|
doc = Document()
|
|
|
|
# Add title
|
|
title = doc.add_heading('文案生成结果', 0)
|
|
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
|
|
# Add query section
|
|
doc.add_heading('查询', level=1)
|
|
doc.add_paragraph(query)
|
|
|
|
# Add generated text section
|
|
doc.add_heading('生成文案', level=1)
|
|
doc.add_paragraph(generated_text)
|
|
|
|
# Add source segments section
|
|
doc.add_heading('源文档片段', level=1)
|
|
|
|
for i, segment in enumerate(source_segments, 1):
|
|
doc.add_heading(f'片段 {i}', level=2)
|
|
doc.add_paragraph(f'文件名: {segment["metadata"]["file_name"]}')
|
|
doc.add_paragraph(f'段落ID: {segment["metadata"]["segment_id"]}')
|
|
doc.add_paragraph('内容:')
|
|
doc.add_paragraph(segment['content'])
|
|
doc.add_paragraph('---')
|
|
|
|
# Save document
|
|
file_path = os.path.join(self.export_folder, file_name)
|
|
doc.save(file_path)
|
|
|
|
return file_path
|
|
|
|
def export_batch_to_markdown(self, results: List[Dict], batch_name: Optional[str] = None) -> str:
|
|
"""
|
|
Export batch of results to Markdown format
|
|
|
|
Args:
|
|
results: List of result dictionaries
|
|
batch_name: Optional batch name
|
|
|
|
Returns:
|
|
Path to the exported file
|
|
"""
|
|
# Generate file name if not provided
|
|
if not batch_name:
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
batch_name = f"批量生成结果_{timestamp}.md"
|
|
|
|
# Create markdown content
|
|
md_content = f"""# 批量文案生成结果
|
|
|
|
## 生成时间
|
|
{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
|
|
|
## 结果列表
|
|
"""
|
|
|
|
for i, result in enumerate(results, 1):
|
|
md_content += f"## 结果 {i}\n\n"
|
|
md_content += f"**查询**: {result.get('query', 'N/A')}\n\n"
|
|
md_content += f"**生成文案**:\n{result.get('generated_text', 'N/A')}\n\n"
|
|
|
|
# Add source segments if available
|
|
source_segments = result.get('source_segments', [])
|
|
if source_segments:
|
|
md_content += "**源文档片段**:\n"
|
|
for j, segment in enumerate(source_segments, 1):
|
|
md_content += f"- 片段 {j}: {segment['metadata']['segment_id']}\n"
|
|
|
|
md_content += "\n---\n\n"
|
|
|
|
# Write to file
|
|
file_path = os.path.join(self.export_folder, batch_name)
|
|
with open(file_path, "w", encoding="utf-8") as f:
|
|
f.write(md_content)
|
|
|
|
return file_path
|
|
|
|
def export_batch_to_docx(self, results: List[Dict], batch_name: Optional[str] = None) -> str:
|
|
"""
|
|
Export batch of results to DOCX format
|
|
|
|
Args:
|
|
results: List of result dictionaries
|
|
batch_name: Optional batch name
|
|
|
|
Returns:
|
|
Path to the exported file
|
|
"""
|
|
# Generate file name if not provided
|
|
if not batch_name:
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
batch_name = f"批量生成结果_{timestamp}.docx"
|
|
|
|
# Create DOCX document
|
|
doc = Document()
|
|
|
|
# Add title
|
|
title = doc.add_heading('批量文案生成结果', 0)
|
|
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
|
|
# Add generation time
|
|
doc.add_heading('生成时间', level=1)
|
|
doc.add_paragraph(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
|
|
|
# Add results list
|
|
doc.add_heading('结果列表', level=1)
|
|
|
|
for i, result in enumerate(results, 1):
|
|
doc.add_heading(f'结果 {i}', level=2)
|
|
doc.add_paragraph(f'查询: {result.get("query", "N/A")}')
|
|
|
|
# Add generated text
|
|
generated_text = result.get('generated_text', 'N/A')
|
|
doc.add_paragraph('生成文案:')
|
|
doc.add_paragraph(generated_text)
|
|
|
|
# Add source segments if available
|
|
source_segments = result.get('source_segments', [])
|
|
if source_segments:
|
|
doc.add_paragraph('源文档片段:')
|
|
for j, segment in enumerate(source_segments, 1):
|
|
paragraph = doc.add_paragraph()
|
|
paragraph.add_run(f'片段 {j}: ').bold = True
|
|
paragraph.add_run(f'{segment["metadata"]["segment_id"]}')
|
|
|
|
# Add separator
|
|
doc.add_paragraph('---')
|
|
|
|
# Save document
|
|
file_path = os.path.join(self.export_folder, batch_name)
|
|
doc.save(file_path)
|
|
|
|
return file_path |