175 lines
8.7 KiB
Python
175 lines
8.7 KiB
Python
|
|
import re
|
|||
|
|
import jieba
|
|||
|
|
import random
|
|||
|
|
from typing import List
|
|||
|
|
|
|||
|
|
|
|||
|
|
class HeavyHumanizer:
|
|||
|
|
"""重度人类化改写器 - 保持逻辑,可读性高,适合绕过 AI 检测"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
jieba.initialize()
|
|||
|
|
# 人类化开头/转折/插话
|
|||
|
|
self.openings = ['说到', '提到', '关于', '其实', '要说', '你知道吗', '顺便说']
|
|||
|
|
self.transitions = ['但是', '不过', '然而', '话说回来', '可惜的是', '偏偏']
|
|||
|
|
self.fillers = ['其实', '当然', '显然', '我觉得', '说起来', '顺便说']
|
|||
|
|
# 主观表达
|
|||
|
|
self.subjective = ['我认为', '我觉得', '在我看来', '就我所知', '据我了解']
|
|||
|
|
# 口语词汇
|
|||
|
|
self.colloquial_particles = ['呢', '吧', '啊', '哦', '嘛', '哈', '呀']
|
|||
|
|
# 高风险词替换
|
|||
|
|
self.high_risk = {
|
|||
|
|
'重要': ['关键', '核心', '主要'],
|
|||
|
|
'显著': ['明显', '突出', '很大'],
|
|||
|
|
'提升': ['提高', '增强', '改善'],
|
|||
|
|
'确保': ['保证', '做到', '维护'],
|
|||
|
|
'实施': ['执行', '开展', '推行']
|
|||
|
|
}
|
|||
|
|
# 分句标点
|
|||
|
|
self.sentence_endings = {'。', '!', '?', '.', '!', '?', '…', ';', ';'}
|
|||
|
|
|
|||
|
|
def split_sentences(self, text: str) -> List[str]:
|
|||
|
|
"""按照句子结束标点分割文本"""
|
|||
|
|
sentences = []
|
|||
|
|
current = ''
|
|||
|
|
for c in text:
|
|||
|
|
current += c
|
|||
|
|
if c in self.sentence_endings:
|
|||
|
|
sentences.append(current.strip())
|
|||
|
|
current = ''
|
|||
|
|
if current.strip():
|
|||
|
|
sentences.append(current.strip())
|
|||
|
|
return sentences
|
|||
|
|
|
|||
|
|
def replace_high_risk_words(self, sentence: str) -> str:
|
|||
|
|
"""替换高风险 AI 词汇"""
|
|||
|
|
for k, v_list in self.high_risk.items():
|
|||
|
|
if k in sentence and random.random() < 0.8:
|
|||
|
|
sentence = sentence.replace(k, random.choice(v_list))
|
|||
|
|
return sentence
|
|||
|
|
|
|||
|
|
def add_subjective_expressions(self, sentence: str) -> str:
|
|||
|
|
"""随机添加主观表达或口语词"""
|
|||
|
|
if random.random() < 0.3:
|
|||
|
|
expr = random.choice(self.subjective)
|
|||
|
|
sentence = expr + ',' + sentence
|
|||
|
|
if random.random() < 0.2:
|
|||
|
|
particle = random.choice(self.colloquial_particles)
|
|||
|
|
if sentence.endswith('。'):
|
|||
|
|
sentence = sentence[:-1] + particle + '。'
|
|||
|
|
if random.random() < 0.15:
|
|||
|
|
filler = random.choice(self.fillers)
|
|||
|
|
sentence = filler + ',' + sentence
|
|||
|
|
return sentence
|
|||
|
|
|
|||
|
|
def vary_sentence_length(self, sentences: List[str]) -> List[str]:
|
|||
|
|
"""打乱句子顺序或拆分长句,增加突发性"""
|
|||
|
|
varied = []
|
|||
|
|
i = 0
|
|||
|
|
while i < len(sentences):
|
|||
|
|
s = sentences[i]
|
|||
|
|
# 适度拆分长句
|
|||
|
|
if len(s) > 50 and random.random() < 0.5:
|
|||
|
|
mid = len(s) // 2
|
|||
|
|
# 找最近的逗号
|
|||
|
|
comma_pos = max(s.rfind(',', 0, mid), s.rfind(',', 0, mid))
|
|||
|
|
if comma_pos > 5:
|
|||
|
|
first = s[:comma_pos + 1].strip()
|
|||
|
|
second = s[comma_pos + 1:].strip()
|
|||
|
|
varied.extend([first, second])
|
|||
|
|
i += 1
|
|||
|
|
continue
|
|||
|
|
varied.append(s)
|
|||
|
|
i += 1
|
|||
|
|
# 打乱顺序但保持逻辑块
|
|||
|
|
if random.random() < 0.3:
|
|||
|
|
random.shuffle(varied)
|
|||
|
|
return varied
|
|||
|
|
|
|||
|
|
def create_paragraphs(self, sentences: List[str]) -> List[str]:
|
|||
|
|
"""根据句长和随机概率生成段落"""
|
|||
|
|
paragraphs = []
|
|||
|
|
current = []
|
|||
|
|
current_len = 0
|
|||
|
|
for s in sentences:
|
|||
|
|
current.append(s)
|
|||
|
|
current_len += len(s)
|
|||
|
|
if current_len > 80 and (random.random() < 0.4 or current_len > 150):
|
|||
|
|
paragraphs.append(''.join(current))
|
|||
|
|
current = []
|
|||
|
|
current_len = 0
|
|||
|
|
if current:
|
|||
|
|
paragraphs.append(''.join(current))
|
|||
|
|
return paragraphs
|
|||
|
|
|
|||
|
|
def humanize_text(self, text: str) -> str:
|
|||
|
|
"""核心人类化改写函数"""
|
|||
|
|
# 清理文本
|
|||
|
|
text = re.sub(r'\s+', '', text)
|
|||
|
|
sentences = self.split_sentences(text)
|
|||
|
|
|
|||
|
|
# 高风险词替换
|
|||
|
|
sentences = [self.replace_high_risk_words(s) for s in sentences]
|
|||
|
|
|
|||
|
|
# 添加主观表达、口语词、插话
|
|||
|
|
sentences = [self.add_subjective_expressions(s) for s in sentences]
|
|||
|
|
|
|||
|
|
# 增加长短句变化和突发性
|
|||
|
|
sentences = self.vary_sentence_length(sentences)
|
|||
|
|
|
|||
|
|
# 创建段落
|
|||
|
|
paragraphs = self.create_paragraphs(sentences)
|
|||
|
|
|
|||
|
|
# 格式化段落
|
|||
|
|
formatted = '\n\n'.join([' ' + p for p in paragraphs])
|
|||
|
|
return formatted
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ================== 使用示例 ==================
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
text = """
|
|||
|
|
最近,晓蕾又上热搜了!
|
|||
|
|
|
|||
|
|
换句话说,现在卖东西,完全是私营业态。咋回事呢?但特意声明:她早就离开了上海电视台的编制,也不拿电视台的工资欸。原来,她和老公刘剑一起开了直播带货的副业。
|
|||
|
|
|
|||
|
|
这事儿一下子引爆了大家的八卦魂。毕竟,明星主持扎堆直播间,也不算新鲜事你说呢。
|
|||
|
|
|
|||
|
|
但还是挺多人纳闷:这些当年的 "话筒头牌",是不是集体选择摆烂了?
|
|||
|
|
|
|||
|
|
其实,晓蕾和刘剑干…俩人意思很明确:“我们不是来拉低职业口碑的”。而且还耐心解释了自己转行的理由,曾经的大佬,变成了烟火气。
|
|||
|
|
|
|||
|
|
说到晓蕾,不了解点她背景都不好意思讨论人家。当年上视新闻部的 "当家花旦",光学历和气质,足够秒杀隔壁主持圈的八条街。而刘剑,似乎早年可是 "台柱子",播音腔精致到令人耳膜怀孕嗯。照理来说,这样一对,在编制铁饭碗里躺平一辈子没毛病。
|
|||
|
|
|
|||
|
|
可人家偏不。
|
|||
|
|
|
|||
|
|
晓蕾说过这样一句话:“其实,我就是个普通人。” 真的那么普通吗?她不这么说,没人敢忘了她的标杆履历啊!她离开台里后,居然一头扎进了童语言教育这个赛道,一干就是十年,让机构做到了业内小圈子的爆款水准。
|
|||
|
|
|
|||
|
|
而这次直播,打的商品也不混乱,主打性价比和实用属性,晓蕾每件商品还得亲测过。如果你觉得她自吹自擂,建议去看看她直播间的粉丝评论。大家地意思是:晓蕾推品 = 放心买。
|
|||
|
|
|
|||
|
|
刘剑这枚 “前一哥”,更狠!
|
|||
|
|
|
|||
|
|
说晓蕾牛,看起来别忘了,刘剑十年前也上演了一场 “豪赌”。那个年代,辞去电视台稳定工作,和 “打水漂” 差不多。
|
|||
|
|
|
|||
|
|
可是刘剑敢把梭全下,为啥?因为他看中了播音考生和辅导课程的市场,那时还没有多少人扎堆干这块,他觉得这是个机会。
|
|||
|
|
|
|||
|
|
果然,就这么辞了职,工作的腰板从跟组织吃工资,摇身变成了名副其实的事业单位 —— 自己家老板。虽然后来也是磕磕绊绊,但终究从试验田里掘出了一片肥沃地。主持人的 “下海”,是换方向走。
|
|||
|
|
|
|||
|
|
有人觉得,曾经的新闻人、主持人 “跑去带货”,肯定是混不下去了。你要放在十年前,这种联想不稀奇,可现在不一样了。大环境变了,看起来传统媒体是真的在互联网时代被打败得找不到调。
|
|||
|
|
|
|||
|
|
原来电视频道的观众,现在早转移到手机端,看知乎、刷短视频,甚至晚上蹲个带货直播会。就像我说的,我认为,你说新闻节目的高冷主播,现在换脸做带货主持,是不是 “落魄”?未必。
|
|||
|
|
|
|||
|
|
其实,晓蕾夫妻这一波,实际上是转型很成功的范例。不管带啥网红货,可能他们俩把品质第一的逻辑摆明白了啊。这样的主播,不止卖产品,更卖信誉,靠着时间积攒了观众的信任嗯。也许,直播间哪门子 LOW?明明是主战场。
|
|||
|
|
|
|||
|
|
网友说得有趣:“谁嫌直播带货 LOW,谁就输定了。” 道理没跑儿,似乎移动互联网成了咱生活重心,生意也跟着迁移啊。
|
|||
|
|
|
|||
|
|
这是明显趋势,看不懂的还真不想赚钱了。
|
|||
|
|
|
|||
|
|
而且,似乎做直播一点不轻松。站几个小时口播、随时照顾弹幕情绪,这比坐着念提词器辛苦多了。其实,像晓蕾和刘剑这样的 “摸鱼资历”,能转过身来赚饭钱,这不是 “混”,是 “拼” 啊。
|
|||
|
|
|
|||
|
|
别说传统意义的职业崇拜消失殆尽,你觉得稳如狗的岗位,说散架就散你说呢。老一辈金饭碗情结,对于下一代新创别说香,而是种被淘汰跑赢速度内心创新积极点。
|
|||
|
|
|
|||
|
|
我不是电视台员工了,早就离职 10 年了。 """
|
|||
|
|
humanizer = HeavyHumanizer()
|
|||
|
|
result = humanizer.humanize_text(text)
|
|||
|
|
print(result)
|