Files

209 lines
4.9 KiB
Python
Raw Permalink Normal View History

2026-03-25 15:17:18 +08:00
#!/usr/bin/env python
"""
命令行接口 - 支持批量处理文章
"""
import argparse
import logging
import sys
from pathlib import Path
from typing import Optional
from config_manager import config_manager
from main_process import link_to_text
from src.services.web_scraping import web_scraping_service
logger = logging.getLogger(__name__)
def setup_logging(verbose: bool = False) -> None:
"""配置日志"""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler()]
)
def process_excel_file(
excel_path: str,
num_threads: int = 1,
ai_service: str = "coze",
generation_type: str = "文章",
template_name: Optional[str] = None
) -> bool:
"""
处理Excel文件中的链接
Args:
excel_path: Excel文件路径
num_threads: 线程数
ai_service: AI服务
generation_type: 生成类型
template_name: 模板名称
Returns:
是否成功
"""
try:
results = link_to_text(
num_threads=num_threads,
ai_service=ai_service,
current_template=None,
generation_type=generation_type,
app=None
)
success_count = sum(1 for r in results if isinstance(r, tuple) and len(r) >= 2 and r[1])
total_count = len(results)
logger.info(f"处理完成: {success_count}/{total_count} 成功")
return success_count == total_count
except Exception as e:
logger.error(f"处理失败: {e}")
return False
def process_single_link(
link: str,
ai_service: str = "coze",
generation_type: str = "文章"
) -> bool:
"""
处理单个链接
Args:
link: 文章链接
ai_service: AI服务
generation_type: 生成类型
Returns:
是否成功
"""
try:
title, content, images = web_scraping_service.get_cached_content(link)
if not title or not content:
logger.error("无法提取文章内容")
return False
logger.info(f"标题: {title}")
logger.info(f"内容长度: {len(content)}")
logger.info(f"图片数量: {len(images)}")
return True
except Exception as e:
logger.error(f"处理失败: {e}")
return False
def main() -> int:
"""主函数"""
parser = argparse.ArgumentParser(
description="文章批量处理工具 - 命令行模式",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
示例:
# 处理Excel文件
%(prog)s --excel 文章链接.xlsx --threads 3
# 处理单个链接
%(prog)s --link https://www.toutiao.com/article/123
# 使用特定生成类型
%(prog)s --excel 文章链接.xlsx --type 短篇
"""
)
# 输入选项
input_group = parser.add_mutually_exclusive_group(required=True)
input_group.add_argument(
'--excel', '-e',
type=str,
help='Excel文件路径包含文章链接'
)
input_group.add_argument(
'--link', '-l',
type=str,
help='单个文章链接'
)
# 处理选项
parser.add_argument(
'--threads', '-t',
type=int,
default=1,
help='线程数(默认: 1'
)
parser.add_argument(
'--service', '-s',
type=str,
default='coze',
choices=['coze'],
help='AI服务默认: coze'
)
parser.add_argument(
'--type', '-T',
type=str,
default='文章',
choices=['短篇', '文章'],
help='生成类型(默认: 文章)'
)
# 其他选项
parser.add_argument(
'--template',
type=str,
help='使用的模板名称'
)
parser.add_argument(
'--verbose', '-v',
action='store_true',
help='显示详细日志'
)
parser.add_argument(
'--config',
type=str,
help='配置文件路径(默认: config.ini'
)
args = parser.parse_args()
setup_logging(args.verbose)
if args.config:
config_manager._config_file = args.config
config_manager.reload()
try:
if args.excel:
success = process_excel_file(
excel_path=args.excel,
num_threads=args.threads,
ai_service=args.service,
generation_type=args.type,
template_name=args.template
)
else:
success = process_single_link(
link=args.link,
ai_service=args.service,
generation_type=args.type
)
return 0 if success else 1
except KeyboardInterrupt:
logger.info("用户中断")
return 130
except Exception as e:
logger.error(f"未预期的错误: {e}")
return 1
if __name__ == '__main__':
sys.exit(main())