from typing import List, Dict, Optional, Tuple from config import settings from model_manager import model_router from style_templates import style_manager from exceptions import GenerationError class TextGenerator: def __init__(self): self.client = self._initialize_client() def _initialize_client(self): """Initialize API client - 使用模型路由器,不需要单独初始化客户端""" return None def generate_text(self, context: str, style: str = "通用文案", min_length: int = 50, max_length: int = 200, history: Optional[List[Dict]] = None) -> str: """ Generate text based on context and style Args: context: Retrieved context information style: Writing style (e.g., 小红书种草风, 官方通告, 知乎科普) min_length: Minimum text length max_length: Maximum text length history: Conversation history Returns: Generated text """ # 检查是否有可用的模型提供商 if not model_router.providers: raise GenerationError("未配置任何模型提供商") try: # Get style template template_info = style_manager.get_template(style) prompt_template = template_info["template"] temperature = template_info["temperature"] # Build prompt prompt = prompt_template.format( context=context, min_length=min_length, max_length=max_length ) # Prepare messages messages = [] # Add history if provided if history: # Only keep last 2 rounds of conversation recent_history = history[-4:] # 2 rounds = 4 messages (user/assistant pairs) messages.extend(recent_history) # Add current prompt messages.append({"role": "user", "content": prompt}) # Calculate max tokens based on max length max_tokens = max_length * settings.MAX_TOKENS_FACTOR # Generate text using model router content = model_router.generate_text( model=settings.GENERATION_MODEL, messages=messages, temperature=temperature, max_tokens=max_tokens ) return content except Exception as e: raise GenerationError(f"文本生成失败: {str(e)}") def score_generation(self, generated_text: str, context: str, query: str) -> Dict: """ Score the quality of generated text using GPT-4 Args: generated_text: The generated text to score context: The source context query: The original query Returns: Dictionary containing score and feedback """ # 检查是否有可用的模型提供商 if not model_router.providers: return { "score": 0, "feedback": "未配置任何模型提供商" } try: # Create scoring prompt prompt = f"""请对以下生成的文本进行评分,评估其与原始查询和上下文的一致性: 原始查询: {query} 上下文信息: {context} 生成文本: {generated_text} 请从以下维度进行评分(满分100分): 1. 相关性(30分):生成内容与查询的相关程度 2. 准确性(30分):生成内容与上下文信息的一致程度 3. 完整性(20分):是否充分回答了查询 4. 流畅性(20分):语言表达是否自然流畅 请提供: - 总分(0-100) - 各维度得分 - 简要反馈意见 - 改进建议 请以以下JSON格式返回结果: {{ "total_score": 85, "dimensions": {{ "relevance": 25, "accuracy": 28, "completeness": 18, "fluency": 14 }}, "feedback": "生成内容与查询相关,但可以更详细...", "suggestions": "建议增加更多具体示例..." }} """ # Generate score using model router with GPT-4 content = model_router.generate_text( model="gpt-4", messages=[ {"role": "system", "content": "你是一个专业的文本质量评估专家。"}, {"role": "user", "content": prompt} ], temperature=0.1, max_tokens=500 ) if content: import json try: # Try to parse as JSON score_data = json.loads(content) return score_data except json.JSONDecodeError: # If JSON parsing fails, return as feedback return { "score": 0, "feedback": content } else: return { "score": 0, "feedback": "评分生成失败" } except Exception as e: return { "score": 0, "feedback": f"评分生成错误: {str(e)}" } def detect_hallucination_keywords(self, text: str) -> List[str]: """ Detect hallucination keywords in generated text Args: text: Generated text Returns: List of detected hallucination keywords """ hallucination_keywords = [ "据悉", "据报道", "研究表明", "据专家称", "有消息称", "据了解", "据分析", "据预测", "据估计", "据透露", "可能", "也许", "大概", "似乎", "看起来", "普遍认为", "大多数人认为", "通常情况下" ] detected = [] for keyword in hallucination_keywords: if keyword in text: detected.append(keyword) return detected def detect_hallucination_entities(self, text: str, context: str) -> List[str]: """ Detect hallucinated entities that don't appear in the context Args: text: Generated text context: Source context Returns: List of potentially hallucinated entities """ # 这是一个简化的实现,实际应用中可以使用更复杂的实体识别技术 # 提取文本中的实体(这里简化为提取名词短语) import re # 提取生成文本中的可能实体(简单实现) generated_entities = re.findall(r'[A-Za-z0-9\u4e00-\u9fff]{2,}', text) # 提取上下文中的实体 context_entities = re.findall(r'[A-Za-z0-9\u4e00-\u9fff]{2,}', context) # 找出生成文本中但上下文中没有的实体 hallucinated_entities = [] for entity in generated_entities: if entity not in context_entities and len(entity) > 2: # 过滤掉一些常见的通用词 common_words = {"可以", "能够", "通过", "进行", "提供", "支持", "包括", "以及", "或者", "但是", "然而", "因此", "所以"} if entity not in common_words: hallucinated_entities.append(entity) return hallucinated_entities def comprehensive_hallucination_check(self, text: str, context: str) -> Tuple[List[str], List[str]]: """ Comprehensive hallucination detection Args: text: Generated text context: Source context Returns: Tuple of (hallucination_keywords, hallucinated_entities) """ keywords = self.detect_hallucination_keywords(text) entities = self.detect_hallucination_entities(text, context) return keywords, entities