nodebookls/exporter.py

216 lines
7.6 KiB
Python
Raw Permalink Normal View History

2025-10-29 13:56:24 +08:00
import os
import json
from typing import List, Dict, Optional
from datetime import datetime
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from exceptions import ExportError
class Exporter:
def __init__(self, export_folder: str = "exports"):
self.export_folder = export_folder
if not os.path.exists(export_folder):
os.makedirs(export_folder)
def export_to_markdown(self, query: str, generated_text: str,
source_segments: List[Dict], file_name: Optional[str] = None) -> str:
"""
Export generated text and source segments to Markdown format
Args:
query: The query used to generate the text
generated_text: The generated text
source_segments: List of source segments used
file_name: Optional file name for the export
Returns:
Path to the exported file
"""
try:
# Generate file name if not provided
if not file_name:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
file_name = f"生成结果_{timestamp}.md"
# Create markdown content
md_content = f"""# 文案生成结果
## 查询
{query}
## 生成文案
{generated_text}
## 源文档片段
"""
for i, segment in enumerate(source_segments, 1):
md_content += f"### 片段 {i}\n"
md_content += f"**文件名**: {segment['metadata']['file_name']}\n\n"
md_content += f"**段落ID**: {segment['metadata']['segment_id']}\n\n"
md_content += f"**内容**:\n{segment['content']}\n\n"
md_content += "---\n\n"
# Write to file
file_path = os.path.join(self.export_folder, file_name)
with open(file_path, "w", encoding="utf-8") as f:
f.write(md_content)
return file_path
except Exception as e:
raise ExportError(f"导出Markdown文件失败: {str(e)}")
def export_to_docx(self, query: str, generated_text: str,
source_segments: List[Dict], file_name: Optional[str] = None) -> str:
"""
Export generated text and source segments to DOCX format
Args:
query: The query used to generate the text
generated_text: The generated text
source_segments: List of source segments used
file_name: Optional file name for the export
Returns:
Path to the exported file
"""
# Generate file name if not provided
if not file_name:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
file_name = f"生成结果_{timestamp}.docx"
# Create DOCX document
doc = Document()
# Add title
title = doc.add_heading('文案生成结果', 0)
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Add query section
doc.add_heading('查询', level=1)
doc.add_paragraph(query)
# Add generated text section
doc.add_heading('生成文案', level=1)
doc.add_paragraph(generated_text)
# Add source segments section
doc.add_heading('源文档片段', level=1)
for i, segment in enumerate(source_segments, 1):
doc.add_heading(f'片段 {i}', level=2)
doc.add_paragraph(f'文件名: {segment["metadata"]["file_name"]}')
doc.add_paragraph(f'段落ID: {segment["metadata"]["segment_id"]}')
doc.add_paragraph('内容:')
doc.add_paragraph(segment['content'])
doc.add_paragraph('---')
# Save document
file_path = os.path.join(self.export_folder, file_name)
doc.save(file_path)
return file_path
def export_batch_to_markdown(self, results: List[Dict], batch_name: Optional[str] = None) -> str:
"""
Export batch of results to Markdown format
Args:
results: List of result dictionaries
batch_name: Optional batch name
Returns:
Path to the exported file
"""
# Generate file name if not provided
if not batch_name:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
batch_name = f"批量生成结果_{timestamp}.md"
# Create markdown content
md_content = f"""# 批量文案生成结果
## 生成时间
{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
## 结果列表
"""
for i, result in enumerate(results, 1):
md_content += f"## 结果 {i}\n\n"
md_content += f"**查询**: {result.get('query', 'N/A')}\n\n"
md_content += f"**生成文案**:\n{result.get('generated_text', 'N/A')}\n\n"
# Add source segments if available
source_segments = result.get('source_segments', [])
if source_segments:
md_content += "**源文档片段**:\n"
for j, segment in enumerate(source_segments, 1):
md_content += f"- 片段 {j}: {segment['metadata']['segment_id']}\n"
md_content += "\n---\n\n"
# Write to file
file_path = os.path.join(self.export_folder, batch_name)
with open(file_path, "w", encoding="utf-8") as f:
f.write(md_content)
return file_path
def export_batch_to_docx(self, results: List[Dict], batch_name: Optional[str] = None) -> str:
"""
Export batch of results to DOCX format
Args:
results: List of result dictionaries
batch_name: Optional batch name
Returns:
Path to the exported file
"""
# Generate file name if not provided
if not batch_name:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
batch_name = f"批量生成结果_{timestamp}.docx"
# Create DOCX document
doc = Document()
# Add title
title = doc.add_heading('批量文案生成结果', 0)
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Add generation time
doc.add_heading('生成时间', level=1)
doc.add_paragraph(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
# Add results list
doc.add_heading('结果列表', level=1)
for i, result in enumerate(results, 1):
doc.add_heading(f'结果 {i}', level=2)
doc.add_paragraph(f'查询: {result.get("query", "N/A")}')
# Add generated text
generated_text = result.get('generated_text', 'N/A')
doc.add_paragraph('生成文案:')
doc.add_paragraph(generated_text)
# Add source segments if available
source_segments = result.get('source_segments', [])
if source_segments:
doc.add_paragraph('源文档片段:')
for j, segment in enumerate(source_segments, 1):
paragraph = doc.add_paragraph()
paragraph.add_run(f'片段 {j}: ').bold = True
paragraph.add_run(f'{segment["metadata"]["segment_id"]}')
# Add separator
doc.add_paragraph('---')
# Save document
file_path = os.path.join(self.export_folder, batch_name)
doc.save(file_path)
return file_path