nodebookls/logger.py
2025-10-29 13:56:24 +08:00

166 lines
6.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import json
import logging
from datetime import datetime
from typing import Dict, Any, List, Optional
import csv
class GenerationLogger:
def __init__(self, log_folder: str = "logs"):
self.log_folder = log_folder
if not os.path.exists(log_folder):
os.makedirs(log_folder)
# 初始化日志文件
self.log_file = os.path.join(log_folder, "generation_log.json")
self.csv_file = os.path.join(log_folder, "generation_log.csv")
# 设置Python标准日志
self.logger = logging.getLogger('generation_logger')
self.logger.setLevel(logging.INFO)
# 创建文件处理器
if not self.logger.handlers:
handler = logging.FileHandler(os.path.join(log_folder, 'generation.log'), encoding='utf-8')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
self.logger.addHandler(handler)
# 如果CSV文件不存在创建并写入表头
if not os.path.exists(self.csv_file):
with open(self.csv_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow([
'timestamp', 'query', 'generated_text', 'style',
'total_score', 'relevance', 'accuracy', 'completeness', 'fluency',
'hallucination_keywords', 'hallucination_entities'
])
def log_generation(self, query: str, generated_text: str, style: str,
source_segments: List[Any], score: Optional[Dict[str, Any]] = None,
hallucination_warnings: Optional[Dict[str, List[Any]]] = None):
"""
Log a generation event
Args:
query: The query used for generation
generated_text: The generated text
style: The style used for generation
source_segments: The source segments used
score: Optional score information
hallucination_warnings: Optional hallucination warnings
"""
# 记录到标准日志
self.logger.info(f"Generation request: {query[:50]}...")
# 创建日志条目
log_entry = {
"timestamp": datetime.now().isoformat(),
"query": query,
"generated_text": generated_text,
"style": style,
"source_segments_count": len(source_segments),
"score": score,
"hallucination_warnings": hallucination_warnings
}
# 写入JSON日志文件
self._write_json_log(log_entry)
# 写入CSV日志文件
self._write_csv_log(log_entry)
def _write_json_log(self, log_entry: Dict[str, Any]):
"""Write log entry to JSON file"""
# 读取现有日志
logs = []
if os.path.exists(self.log_file):
with open(self.log_file, 'r', encoding='utf-8') as f:
try:
logs = json.load(f)
except json.JSONDecodeError:
logs = []
# 添加新日志条目
logs.append(log_entry)
# 写入文件
with open(self.log_file, 'w', encoding='utf-8') as f:
json.dump(logs, f, ensure_ascii=False, indent=2)
def _write_csv_log(self, log_entry: Dict[str, Any]):
"""Write log entry to CSV file"""
with open(self.csv_file, 'a', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow([
log_entry["timestamp"],
log_entry["query"],
log_entry["generated_text"],
log_entry["style"],
log_entry["score"].get("total_score", "") if log_entry["score"] else "",
log_entry["score"].get("dimensions", {}).get("relevance", "") if log_entry["score"] else "",
log_entry["score"].get("dimensions", {}).get("accuracy", "") if log_entry["score"] else "",
log_entry["score"].get("dimensions", {}).get("completeness", "") if log_entry["score"] else "",
log_entry["score"].get("dimensions", {}).get("fluency", "") if log_entry["score"] else "",
", ".join(log_entry["hallucination_warnings"].get("keywords", [])) if log_entry["hallucination_warnings"] else "",
", ".join(log_entry["hallucination_warnings"].get("entities", [])) if log_entry["hallucination_warnings"] else ""
])
def get_logs(self, limit: int = 100) -> List[Dict[str, Any]]:
"""
Get recent logs
Args:
limit: Maximum number of logs to return
Returns:
List of log entries
"""
if not os.path.exists(self.log_file):
return []
with open(self.log_file, 'r', encoding='utf-8') as f:
try:
logs = json.load(f)
# 返回最近的记录
return logs[-limit:]
except json.JSONDecodeError:
return []
def clear_logs(self):
"""Clear all logs"""
if os.path.exists(self.log_file):
os.remove(self.log_file)
if os.path.exists(self.csv_file):
os.remove(self.csv_file)
# 清空标准日志
for handler in self.logger.handlers:
if isinstance(handler, logging.FileHandler):
handler.stream.truncate(0)
# 重新创建日志文件夹(如果不存在)
if not os.path.exists(self.log_folder):
os.makedirs(self.log_folder)
# 重新创建JSON日志文件
with open(self.log_file, 'w', encoding='utf-8') as f:
json.dump([], f)
# 重新创建CSV文件和表头
with open(self.csv_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow([
'timestamp', 'query', 'generated_text', 'style',
'total_score', 'relevance', 'accuracy', 'completeness', 'fluency',
'hallucination_keywords', 'hallucination_entities'
])
def log_info(self, info_msg: str):
"""记录信息日志"""
self.logger.info(info_msg)
# Create global logger instance
logger = GenerationLogger()