nodebookls/exporter.py
2025-10-29 13:56:24 +08:00

216 lines
7.6 KiB
Python

import os
import json
from typing import List, Dict, Optional
from datetime import datetime
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from exceptions import ExportError
class Exporter:
def __init__(self, export_folder: str = "exports"):
self.export_folder = export_folder
if not os.path.exists(export_folder):
os.makedirs(export_folder)
def export_to_markdown(self, query: str, generated_text: str,
source_segments: List[Dict], file_name: Optional[str] = None) -> str:
"""
Export generated text and source segments to Markdown format
Args:
query: The query used to generate the text
generated_text: The generated text
source_segments: List of source segments used
file_name: Optional file name for the export
Returns:
Path to the exported file
"""
try:
# Generate file name if not provided
if not file_name:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
file_name = f"生成结果_{timestamp}.md"
# Create markdown content
md_content = f"""# 文案生成结果
## 查询
{query}
## 生成文案
{generated_text}
## 源文档片段
"""
for i, segment in enumerate(source_segments, 1):
md_content += f"### 片段 {i}\n"
md_content += f"**文件名**: {segment['metadata']['file_name']}\n\n"
md_content += f"**段落ID**: {segment['metadata']['segment_id']}\n\n"
md_content += f"**内容**:\n{segment['content']}\n\n"
md_content += "---\n\n"
# Write to file
file_path = os.path.join(self.export_folder, file_name)
with open(file_path, "w", encoding="utf-8") as f:
f.write(md_content)
return file_path
except Exception as e:
raise ExportError(f"导出Markdown文件失败: {str(e)}")
def export_to_docx(self, query: str, generated_text: str,
source_segments: List[Dict], file_name: Optional[str] = None) -> str:
"""
Export generated text and source segments to DOCX format
Args:
query: The query used to generate the text
generated_text: The generated text
source_segments: List of source segments used
file_name: Optional file name for the export
Returns:
Path to the exported file
"""
# Generate file name if not provided
if not file_name:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
file_name = f"生成结果_{timestamp}.docx"
# Create DOCX document
doc = Document()
# Add title
title = doc.add_heading('文案生成结果', 0)
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Add query section
doc.add_heading('查询', level=1)
doc.add_paragraph(query)
# Add generated text section
doc.add_heading('生成文案', level=1)
doc.add_paragraph(generated_text)
# Add source segments section
doc.add_heading('源文档片段', level=1)
for i, segment in enumerate(source_segments, 1):
doc.add_heading(f'片段 {i}', level=2)
doc.add_paragraph(f'文件名: {segment["metadata"]["file_name"]}')
doc.add_paragraph(f'段落ID: {segment["metadata"]["segment_id"]}')
doc.add_paragraph('内容:')
doc.add_paragraph(segment['content'])
doc.add_paragraph('---')
# Save document
file_path = os.path.join(self.export_folder, file_name)
doc.save(file_path)
return file_path
def export_batch_to_markdown(self, results: List[Dict], batch_name: Optional[str] = None) -> str:
"""
Export batch of results to Markdown format
Args:
results: List of result dictionaries
batch_name: Optional batch name
Returns:
Path to the exported file
"""
# Generate file name if not provided
if not batch_name:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
batch_name = f"批量生成结果_{timestamp}.md"
# Create markdown content
md_content = f"""# 批量文案生成结果
## 生成时间
{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
## 结果列表
"""
for i, result in enumerate(results, 1):
md_content += f"## 结果 {i}\n\n"
md_content += f"**查询**: {result.get('query', 'N/A')}\n\n"
md_content += f"**生成文案**:\n{result.get('generated_text', 'N/A')}\n\n"
# Add source segments if available
source_segments = result.get('source_segments', [])
if source_segments:
md_content += "**源文档片段**:\n"
for j, segment in enumerate(source_segments, 1):
md_content += f"- 片段 {j}: {segment['metadata']['segment_id']}\n"
md_content += "\n---\n\n"
# Write to file
file_path = os.path.join(self.export_folder, batch_name)
with open(file_path, "w", encoding="utf-8") as f:
f.write(md_content)
return file_path
def export_batch_to_docx(self, results: List[Dict], batch_name: Optional[str] = None) -> str:
"""
Export batch of results to DOCX format
Args:
results: List of result dictionaries
batch_name: Optional batch name
Returns:
Path to the exported file
"""
# Generate file name if not provided
if not batch_name:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
batch_name = f"批量生成结果_{timestamp}.docx"
# Create DOCX document
doc = Document()
# Add title
title = doc.add_heading('批量文案生成结果', 0)
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Add generation time
doc.add_heading('生成时间', level=1)
doc.add_paragraph(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
# Add results list
doc.add_heading('结果列表', level=1)
for i, result in enumerate(results, 1):
doc.add_heading(f'结果 {i}', level=2)
doc.add_paragraph(f'查询: {result.get("query", "N/A")}')
# Add generated text
generated_text = result.get('generated_text', 'N/A')
doc.add_paragraph('生成文案:')
doc.add_paragraph(generated_text)
# Add source segments if available
source_segments = result.get('source_segments', [])
if source_segments:
doc.add_paragraph('源文档片段:')
for j, segment in enumerate(source_segments, 1):
paragraph = doc.add_paragraph()
paragraph.add_run(f'片段 {j}: ').bold = True
paragraph.add_run(f'{segment["metadata"]["segment_id"]}')
# Add separator
doc.add_paragraph('---')
# Save document
file_path = os.path.join(self.export_folder, batch_name)
doc.save(file_path)
return file_path