#!/usr/bin/env python """ 命令行接口 - 支持批量处理文章 """ import argparse import logging import sys from pathlib import Path from typing import Optional from config_manager import config_manager from main_process import link_to_text from src.services.web_scraping import web_scraping_service logger = logging.getLogger(__name__) def setup_logging(verbose: bool = False) -> None: """配置日志""" level = logging.DEBUG if verbose else logging.INFO logging.basicConfig( level=level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler()] ) def process_excel_file( excel_path: str, num_threads: int = 1, ai_service: str = "coze", generation_type: str = "文章", template_name: Optional[str] = None ) -> bool: """ 处理Excel文件中的链接 Args: excel_path: Excel文件路径 num_threads: 线程数 ai_service: AI服务 generation_type: 生成类型 template_name: 模板名称 Returns: 是否成功 """ try: results = link_to_text( num_threads=num_threads, ai_service=ai_service, current_template=None, generation_type=generation_type, app=None ) success_count = sum(1 for r in results if isinstance(r, tuple) and len(r) >= 2 and r[1]) total_count = len(results) logger.info(f"处理完成: {success_count}/{total_count} 成功") return success_count == total_count except Exception as e: logger.error(f"处理失败: {e}") return False def process_single_link( link: str, ai_service: str = "coze", generation_type: str = "文章" ) -> bool: """ 处理单个链接 Args: link: 文章链接 ai_service: AI服务 generation_type: 生成类型 Returns: 是否成功 """ try: title, content, images = web_scraping_service.get_cached_content(link) if not title or not content: logger.error("无法提取文章内容") return False logger.info(f"标题: {title}") logger.info(f"内容长度: {len(content)}") logger.info(f"图片数量: {len(images)}") return True except Exception as e: logger.error(f"处理失败: {e}") return False def main() -> int: """主函数""" parser = argparse.ArgumentParser( description="文章批量处理工具 - 命令行模式", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" 示例: # 处理Excel文件 %(prog)s --excel 文章链接.xlsx --threads 3 # 处理单个链接 %(prog)s --link https://www.toutiao.com/article/123 # 使用特定生成类型 %(prog)s --excel 文章链接.xlsx --type 短篇 """ ) # 输入选项 input_group = parser.add_mutually_exclusive_group(required=True) input_group.add_argument( '--excel', '-e', type=str, help='Excel文件路径(包含文章链接)' ) input_group.add_argument( '--link', '-l', type=str, help='单个文章链接' ) # 处理选项 parser.add_argument( '--threads', '-t', type=int, default=1, help='线程数(默认: 1)' ) parser.add_argument( '--service', '-s', type=str, default='coze', choices=['coze'], help='AI服务(默认: coze)' ) parser.add_argument( '--type', '-T', type=str, default='文章', choices=['短篇', '文章'], help='生成类型(默认: 文章)' ) # 其他选项 parser.add_argument( '--template', type=str, help='使用的模板名称' ) parser.add_argument( '--verbose', '-v', action='store_true', help='显示详细日志' ) parser.add_argument( '--config', type=str, help='配置文件路径(默认: config.ini)' ) args = parser.parse_args() setup_logging(args.verbose) if args.config: config_manager._config_file = args.config config_manager.reload() try: if args.excel: success = process_excel_file( excel_path=args.excel, num_threads=args.threads, ai_service=args.service, generation_type=args.type, template_name=args.template ) else: success = process_single_link( link=args.link, ai_service=args.service, generation_type=args.type ) return 0 if success else 1 except KeyboardInterrupt: logger.info("用户中断") return 130 except Exception as e: logger.error(f"未预期的错误: {e}") return 1 if __name__ == '__main__': sys.exit(main())