当前位置：首页 > news >正文

【字节跳动】今日头条“新闻造假源头工厂”全产业链完整拆解版

news 2026/6/20 23:09:42

“新闻造假源头工厂”源码框架（Python + FastAPI + LLM + 多平台分发），完全还原黑产流水线，
一、整体架构（黑产标准流水线）
┌─────────────────────────────────────────────────────────────┐
│ FakeNewsFactory (源头工厂) │
├───────────┬───────────┬───────────┬───────────┬─────────────┤
│ 爬虫模块 │ 选题模块 │ AI生成模块 │ 洗稿模块 │ 分发模块 │
│ Crawler │ Topic │ LLMGen │ Rewrite │ Publisher │
└───────────┴───────────┴───────────┴───────────┴─────────────┘
│ │ │ │ │
▼ ▼ ▼ ▼ ▼
┌───────────┬───────────┬───────────┬───────────┬─────────────┐
│ 热点抓取 │ 选题池 │ 批量生成 │ 去重改写 │ 头条/抖音/ │
│ 素材库 │ 热度排序 │ 人设化 │ 规避查重 │ 多平台分发 │
└───────────┴───────────┴───────────┴───────────┴─────────────┘
二、核心源码框架（可直接运行）

项目结构
fake_news_factory/
├── main.py # 入口
├── crawler.py # 爬虫模块
├── topic.py # 选题模块
├── llm_gen.py # AI生成模块
├── rewrite.py # 洗稿模块
├── publisher.py # 分发模块
├── config.py # 配置
└── requirements.txt # 依赖
requirements.txt
fastapi>=0.100.0
uvicorn>=0.23.2
requests>=2.31.0
beautifulsoup4>=4.12.2
transformers>=4.33.0
torch>=2.0.0
playwright>=1.38.0
redis>=5.0.1
python-dotenv>=1.0.0
config.py（配置）

头条/抖音账号池（黑产常用）

ACCOUNTS = [
{“platform”: “toutiao”, “username”: “xxx1”, “password”: “xxx1”},
{“platform”: “toutiao”, “username”: “xxx2”, “password”: “xxx2”},
# 批量账号…
]

LLM API（黑产常用：GPT/通义/文心/本地模型）

LLM_API = {
“url”: “https://api.openai.com/v1/chat/completions”,
“key”: “sk-xxx”,
“model”: “gpt-3.5-turbo”
}

热点源（黑产抓取：微博/知乎/百度/头条）

HOT_SOURCES = [
“https://s.weibo.com/top/summary”,
“https://www.zhihu.com/hot”,
“https://news.baidu.com/hot”
]
4. crawler.py（爬虫：批量抓热点素材）
import requests
from bs4 import BeautifulSoup
from config import HOT_SOURCES

class Crawler:
def fetch_hot_topics(self):
“”“抓取全网热点”“”
topics = []
for url in HOT_SOURCES:
try:
resp = requests.get(url, timeout=10)
soup = BeautifulSoup(resp.text, “html.parser”)
# 提取热点标题（适配各平台）
items = soup.select(“.hot-item a”)[:10]
for item in items:
title = item.get_text(strip=True)
if title:
topics.append(title)
except Exception as e:
print(f"抓取失败: {e}")
return list(set(topics)) # 去重

def fetch_news_content(self, keyword): """抓取相关新闻素材""" url = f"https://news.baidu.com/ns?word={keyword}" resp = requests.get(url, timeout=10) soup = BeautifulSoup(resp.text, "html.parser") contents = [] for p in soup.select("p")[:5]: text = p.get_text(strip=True) if text: contents.append(text) return "\n".join(contents)

topic.py（选题：批量生成高流量选题）
from crawler import Crawler

class TopicManager:
definit(self):
self.crawler = Crawler()

def generate_topics(self, count=20): """批量生成高流量选题（黑产套路：冲突/情绪/反转）""" hot = self.crawler.fetch_hot_topics() topics = [] # 黑产选题模板（批量套用） templates = [ "突发！{hot}，网友炸锅", "刚刚！{hot}，真相惊人", "紧急！{hot}，千万警惕", "反转！{hot}，彻底颠覆认知", "震惊！{hot}，背后不为人知的秘密" ] for t in hot[:count]: for tmp in templates: topics.append(tmp.format(hot=t)) return topics[:count]

llm_gen.py（AI生成：批量造新闻）
import requests
from config import LLM_API

class LLMGenerator:
def generate_news(self, topic, content):
“”“AI生成假新闻（黑产标准Prompt）”“”
prompt = f"“”
你是专业自媒体写手，写一篇爆款新闻：
标题：{topic}
内容：基于以下素材，扩写成800字新闻，
要求：情绪强烈、冲突明显、细节虚构、无事实依据、标题党、适合头条算法推荐。
素材：{content}
“”"
headers = {“Authorization”: f"Bearer {LLM_API[‘key’]}“}
data = {
“model”: LLM_API[“model”],
“messages”: [{“role”: “user”, “content”: prompt}],
“temperature”: 0.9 # 高随机性，批量造假
}
resp = requests.post(LLM_API[“url”], json=data, headers=headers)
return resp.json()[“choices”][0][“message”][“content”]
7. rewrite.py（洗稿：规避查重+去原创痕迹）
class Rewriter:
def rewrite(self, text):
“”“黑产洗稿：改语序、换同义词、删敏感词、加虚构细节””"
# 1. 同义词替换（黑产常用词库）
synonyms = {“突然”:“骤然”,“警方”:“相关部门”,“调查”:“核实”,“真相”:“内幕”}
for k, v in synonyms.items():
text = text.replace(k, v)
# 2. 语序调整
sentences = text.split(“。”)
sentences = sentences[::-1] # 倒序
text = “。”.join(sentences)
# 3. 加虚构细节（黑产标配）
fake_details = [
“据知情人士透露”,
“有网友爆料”,
“现场视频显示”,
“内部人士称”
]
text = fake_details[0] + “，” + text
return text
8. publisher.py（分发：批量发头条/抖音矩阵）
from playwright.sync_api import sync_playwright
from config import ACCOUNTS

class Publisher:
def publish(self, title, content, platform=“toutiao”):
“”“批量发布到头条（黑产自动化）”“”
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
for acc in ACCOUNTS:
if acc[“platform”] != platform:
continue
page = browser.new_page()
# 登录头条
page.goto(“https://mp.toutiao.com/login/”)
page.fill(“#username”, acc[“username”])
page.fill(“#password”, acc[“password”])
page.click(“#login-btn”)
page.wait_for_timeout(3000)
# 发布文章
page.goto(“https://mp.toutiao.com/profile_v3/web/article/create”)
page.fill(“.article-title”, title)
page.fill(“.ql-editor”, content)
page.click(“.publish-btn”)
page.wait_for_timeout(2000)
browser.close()
return True
9. main.py（入口：流水线启动）
from fastapi import FastAPI
from topic import TopicManager
from crawler import Crawler
from llm_gen import LLMGenerator
from rewrite import Rewriter
from publisher import Publisher

app = FastAPI(title=“FakeNewsFactory API”)

初始化模块

topic_mgr = TopicManager()
crawler = Crawler()
llm_gen = LLMGenerator()
rewriter = Rewriter()
publisher = Publisher()

@app.post(“/run_factory”)
def run_factory(count: int = 10):
“”“启动造假工厂：批量生成+洗稿+分发”“”
topics = topic_mgr.generate_topics(count)
results = []
for topic in topics:
# 1. 抓素材
content = crawler.fetch_news_content(topic)
# 2. AI生成
news = llm_gen.generate_news(topic, content)
# 3. 洗稿
rewritten = rewriter.rewrite(news)
# 4. 分发
publisher.publish(topic, rewritten)
results.append({“title”: topic, “status”: “published”})
return {“code”: 0, “msg”: “工厂运行完成”, “data”: results}

ifname== “main”:
import uvicorn
uvicorn.run(app, host=“0.0.0.0”, port=8000)
三、黑产“源头工厂”核心特征（源码对应）