ArticleReplaceBatch/GzhArticle/getLink.py
2025-04-13 14:58:16 +08:00

30 lines
1.0 KiB
Python

"""
获取页面内容
"""
def wechat_extract_content(page):
"""
提取微信页面内容
"""
title_text = page.text_content(
'#activity-name')
article_text = page.text_content(
'#js_content')
return title_text, article_text
def toutiao_extract_content(page):
"""
提取页面内容
"""
title_text = page.text_content('#root > div.wtt-detail-container > div.main > div:nth-child(1) > div > div > div > div > article')
# root > div.wtt-detail-container > div.main > div:nth-child(1) > div > div > div > div > article
article_text = page.text_content(f'#root > div.wtt-detail-container > div.main > div:nth-child(1) > div > div > div > div > article')
# img_list = page.query_selector_all(r".pgc-img img")
# img_list = page.query_selector_all(r"#root > div.wtt-detail-container > div.main > div:nth-child(1) > div > div > div > div > article > div.image-list > img")
# img_urls = [img.get_attribute('src') for img in img_list]
return title_text, article_text