209 lines
4.9 KiB
Python
209 lines
4.9 KiB
Python
#!/usr/bin/env python
|
||
"""
|
||
命令行接口 - 支持批量处理文章
|
||
"""
|
||
import argparse
|
||
import logging
|
||
import sys
|
||
from pathlib import Path
|
||
from typing import Optional
|
||
|
||
from config_manager import config_manager
|
||
from main_process import link_to_text
|
||
from src.services.web_scraping import web_scraping_service
|
||
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
def setup_logging(verbose: bool = False) -> None:
|
||
"""配置日志"""
|
||
level = logging.DEBUG if verbose else logging.INFO
|
||
logging.basicConfig(
|
||
level=level,
|
||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||
handlers=[logging.StreamHandler()]
|
||
)
|
||
|
||
|
||
def process_excel_file(
|
||
excel_path: str,
|
||
num_threads: int = 1,
|
||
ai_service: str = "coze",
|
||
generation_type: str = "文章",
|
||
template_name: Optional[str] = None
|
||
) -> bool:
|
||
"""
|
||
处理Excel文件中的链接
|
||
|
||
Args:
|
||
excel_path: Excel文件路径
|
||
num_threads: 线程数
|
||
ai_service: AI服务
|
||
generation_type: 生成类型
|
||
template_name: 模板名称
|
||
|
||
Returns:
|
||
是否成功
|
||
"""
|
||
try:
|
||
results = link_to_text(
|
||
num_threads=num_threads,
|
||
ai_service=ai_service,
|
||
current_template=None,
|
||
generation_type=generation_type,
|
||
app=None
|
||
)
|
||
|
||
success_count = sum(1 for r in results if isinstance(r, tuple) and len(r) >= 2 and r[1])
|
||
total_count = len(results)
|
||
|
||
logger.info(f"处理完成: {success_count}/{total_count} 成功")
|
||
|
||
return success_count == total_count
|
||
|
||
except Exception as e:
|
||
logger.error(f"处理失败: {e}")
|
||
return False
|
||
|
||
|
||
def process_single_link(
|
||
link: str,
|
||
ai_service: str = "coze",
|
||
generation_type: str = "文章"
|
||
) -> bool:
|
||
"""
|
||
处理单个链接
|
||
|
||
Args:
|
||
link: 文章链接
|
||
ai_service: AI服务
|
||
generation_type: 生成类型
|
||
|
||
Returns:
|
||
是否成功
|
||
"""
|
||
try:
|
||
title, content, images = web_scraping_service.get_cached_content(link)
|
||
|
||
if not title or not content:
|
||
logger.error("无法提取文章内容")
|
||
return False
|
||
|
||
logger.info(f"标题: {title}")
|
||
logger.info(f"内容长度: {len(content)}")
|
||
logger.info(f"图片数量: {len(images)}")
|
||
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.error(f"处理失败: {e}")
|
||
return False
|
||
|
||
|
||
def main() -> int:
|
||
"""主函数"""
|
||
parser = argparse.ArgumentParser(
|
||
description="文章批量处理工具 - 命令行模式",
|
||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||
epilog="""
|
||
示例:
|
||
# 处理Excel文件
|
||
%(prog)s --excel 文章链接.xlsx --threads 3
|
||
|
||
# 处理单个链接
|
||
%(prog)s --link https://www.toutiao.com/article/123
|
||
|
||
# 使用特定生成类型
|
||
%(prog)s --excel 文章链接.xlsx --type 短篇
|
||
"""
|
||
)
|
||
|
||
# 输入选项
|
||
input_group = parser.add_mutually_exclusive_group(required=True)
|
||
input_group.add_argument(
|
||
'--excel', '-e',
|
||
type=str,
|
||
help='Excel文件路径(包含文章链接)'
|
||
)
|
||
input_group.add_argument(
|
||
'--link', '-l',
|
||
type=str,
|
||
help='单个文章链接'
|
||
)
|
||
|
||
# 处理选项
|
||
parser.add_argument(
|
||
'--threads', '-t',
|
||
type=int,
|
||
default=1,
|
||
help='线程数(默认: 1)'
|
||
)
|
||
parser.add_argument(
|
||
'--service', '-s',
|
||
type=str,
|
||
default='coze',
|
||
choices=['coze'],
|
||
help='AI服务(默认: coze)'
|
||
)
|
||
parser.add_argument(
|
||
'--type', '-T',
|
||
type=str,
|
||
default='文章',
|
||
choices=['短篇', '文章'],
|
||
help='生成类型(默认: 文章)'
|
||
)
|
||
|
||
# 其他选项
|
||
parser.add_argument(
|
||
'--template',
|
||
type=str,
|
||
help='使用的模板名称'
|
||
)
|
||
parser.add_argument(
|
||
'--verbose', '-v',
|
||
action='store_true',
|
||
help='显示详细日志'
|
||
)
|
||
parser.add_argument(
|
||
'--config',
|
||
type=str,
|
||
help='配置文件路径(默认: config.ini)'
|
||
)
|
||
|
||
args = parser.parse_args()
|
||
|
||
setup_logging(args.verbose)
|
||
|
||
if args.config:
|
||
config_manager._config_file = args.config
|
||
config_manager.reload()
|
||
|
||
try:
|
||
if args.excel:
|
||
success = process_excel_file(
|
||
excel_path=args.excel,
|
||
num_threads=args.threads,
|
||
ai_service=args.service,
|
||
generation_type=args.type,
|
||
template_name=args.template
|
||
)
|
||
else:
|
||
success = process_single_link(
|
||
link=args.link,
|
||
ai_service=args.service,
|
||
generation_type=args.type
|
||
)
|
||
|
||
return 0 if success else 1
|
||
|
||
except KeyboardInterrupt:
|
||
logger.info("用户中断")
|
||
return 130
|
||
except Exception as e:
|
||
logger.error(f"未预期的错误: {e}")
|
||
return 1
|
||
|
||
|
||
if __name__ == '__main__':
|
||
sys.exit(main()) |