209 lines
4.9 KiB
Python
209 lines
4.9 KiB
Python
|
|
#!/usr/bin/env python
|
|||
|
|
"""
|
|||
|
|
命令行接口 - 支持批量处理文章
|
|||
|
|
"""
|
|||
|
|
import argparse
|
|||
|
|
import logging
|
|||
|
|
import sys
|
|||
|
|
from pathlib import Path
|
|||
|
|
from typing import Optional
|
|||
|
|
|
|||
|
|
from config_manager import config_manager
|
|||
|
|
from main_process import link_to_text
|
|||
|
|
from src.services.web_scraping import web_scraping_service
|
|||
|
|
|
|||
|
|
|
|||
|
|
logger = logging.getLogger(__name__)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def setup_logging(verbose: bool = False) -> None:
|
|||
|
|
"""配置日志"""
|
|||
|
|
level = logging.DEBUG if verbose else logging.INFO
|
|||
|
|
logging.basicConfig(
|
|||
|
|
level=level,
|
|||
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|||
|
|
handlers=[logging.StreamHandler()]
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def process_excel_file(
|
|||
|
|
excel_path: str,
|
|||
|
|
num_threads: int = 1,
|
|||
|
|
ai_service: str = "coze",
|
|||
|
|
generation_type: str = "文章",
|
|||
|
|
template_name: Optional[str] = None
|
|||
|
|
) -> bool:
|
|||
|
|
"""
|
|||
|
|
处理Excel文件中的链接
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
excel_path: Excel文件路径
|
|||
|
|
num_threads: 线程数
|
|||
|
|
ai_service: AI服务
|
|||
|
|
generation_type: 生成类型
|
|||
|
|
template_name: 模板名称
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
是否成功
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
results = link_to_text(
|
|||
|
|
num_threads=num_threads,
|
|||
|
|
ai_service=ai_service,
|
|||
|
|
current_template=None,
|
|||
|
|
generation_type=generation_type,
|
|||
|
|
app=None
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
success_count = sum(1 for r in results if isinstance(r, tuple) and len(r) >= 2 and r[1])
|
|||
|
|
total_count = len(results)
|
|||
|
|
|
|||
|
|
logger.info(f"处理完成: {success_count}/{total_count} 成功")
|
|||
|
|
|
|||
|
|
return success_count == total_count
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"处理失败: {e}")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
|
|||
|
|
def process_single_link(
|
|||
|
|
link: str,
|
|||
|
|
ai_service: str = "coze",
|
|||
|
|
generation_type: str = "文章"
|
|||
|
|
) -> bool:
|
|||
|
|
"""
|
|||
|
|
处理单个链接
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
link: 文章链接
|
|||
|
|
ai_service: AI服务
|
|||
|
|
generation_type: 生成类型
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
是否成功
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
title, content, images = web_scraping_service.get_cached_content(link)
|
|||
|
|
|
|||
|
|
if not title or not content:
|
|||
|
|
logger.error("无法提取文章内容")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
logger.info(f"标题: {title}")
|
|||
|
|
logger.info(f"内容长度: {len(content)}")
|
|||
|
|
logger.info(f"图片数量: {len(images)}")
|
|||
|
|
|
|||
|
|
return True
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"处理失败: {e}")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main() -> int:
|
|||
|
|
"""主函数"""
|
|||
|
|
parser = argparse.ArgumentParser(
|
|||
|
|
description="文章批量处理工具 - 命令行模式",
|
|||
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|||
|
|
epilog="""
|
|||
|
|
示例:
|
|||
|
|
# 处理Excel文件
|
|||
|
|
%(prog)s --excel 文章链接.xlsx --threads 3
|
|||
|
|
|
|||
|
|
# 处理单个链接
|
|||
|
|
%(prog)s --link https://www.toutiao.com/article/123
|
|||
|
|
|
|||
|
|
# 使用特定生成类型
|
|||
|
|
%(prog)s --excel 文章链接.xlsx --type 短篇
|
|||
|
|
"""
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 输入选项
|
|||
|
|
input_group = parser.add_mutually_exclusive_group(required=True)
|
|||
|
|
input_group.add_argument(
|
|||
|
|
'--excel', '-e',
|
|||
|
|
type=str,
|
|||
|
|
help='Excel文件路径(包含文章链接)'
|
|||
|
|
)
|
|||
|
|
input_group.add_argument(
|
|||
|
|
'--link', '-l',
|
|||
|
|
type=str,
|
|||
|
|
help='单个文章链接'
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 处理选项
|
|||
|
|
parser.add_argument(
|
|||
|
|
'--threads', '-t',
|
|||
|
|
type=int,
|
|||
|
|
default=1,
|
|||
|
|
help='线程数(默认: 1)'
|
|||
|
|
)
|
|||
|
|
parser.add_argument(
|
|||
|
|
'--service', '-s',
|
|||
|
|
type=str,
|
|||
|
|
default='coze',
|
|||
|
|
choices=['coze'],
|
|||
|
|
help='AI服务(默认: coze)'
|
|||
|
|
)
|
|||
|
|
parser.add_argument(
|
|||
|
|
'--type', '-T',
|
|||
|
|
type=str,
|
|||
|
|
default='文章',
|
|||
|
|
choices=['短篇', '文章'],
|
|||
|
|
help='生成类型(默认: 文章)'
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 其他选项
|
|||
|
|
parser.add_argument(
|
|||
|
|
'--template',
|
|||
|
|
type=str,
|
|||
|
|
help='使用的模板名称'
|
|||
|
|
)
|
|||
|
|
parser.add_argument(
|
|||
|
|
'--verbose', '-v',
|
|||
|
|
action='store_true',
|
|||
|
|
help='显示详细日志'
|
|||
|
|
)
|
|||
|
|
parser.add_argument(
|
|||
|
|
'--config',
|
|||
|
|
type=str,
|
|||
|
|
help='配置文件路径(默认: config.ini)'
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
args = parser.parse_args()
|
|||
|
|
|
|||
|
|
setup_logging(args.verbose)
|
|||
|
|
|
|||
|
|
if args.config:
|
|||
|
|
config_manager._config_file = args.config
|
|||
|
|
config_manager.reload()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
if args.excel:
|
|||
|
|
success = process_excel_file(
|
|||
|
|
excel_path=args.excel,
|
|||
|
|
num_threads=args.threads,
|
|||
|
|
ai_service=args.service,
|
|||
|
|
generation_type=args.type,
|
|||
|
|
template_name=args.template
|
|||
|
|
)
|
|||
|
|
else:
|
|||
|
|
success = process_single_link(
|
|||
|
|
link=args.link,
|
|||
|
|
ai_service=args.service,
|
|||
|
|
generation_type=args.type
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return 0 if success else 1
|
|||
|
|
|
|||
|
|
except KeyboardInterrupt:
|
|||
|
|
logger.info("用户中断")
|
|||
|
|
return 130
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"未预期的错误: {e}")
|
|||
|
|
return 1
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
sys.exit(main())
|