当前位置: 首页 > news >正文

样本TCR库相似性计算Morisita–Horn

样本TCR Morisita–Horn聚类热图

 

image

 

样本TCR杰卡德聚类热图

 

#TCR库在同一个样本不同区域的相似度能够达到0.7这样,但是在不同样本之间的相似度还是较低,但是也可以看到会有聚类簇出现的情况,效果没有杰卡德好

#Morisita–Horn可以运用vegan 包计算,也可以运用horn_morisita计算

R: The Morisita index and Horn-Morisita index

 

(91 封私信 / 84 条消息) 生信分析 8+T细胞受体多样性分析 - 知乎

1、数据准备

 

image

 

2、代码

 

####################数据读取#########################

####加载R包
library(dplyr)
library(ggplot2)
library(ComplexHeatmap)
library(circlize)
library(data.table)
library(readxl)

####构建函数

####数据读取

####WGD信息
WGD<-read.table("E:\\课题\\胰腺癌\\1-数据处理\\WES\\13-WGD\\4-结果整合\\0-SampleWGD.txt",header = T,sep = "\t",stringsAsFactors = F,quote = "",fill = T,check.names = F)
colnames(WGD)[1]<-"sample"
WGD<-WGD[,c("sample","WGD")]

####KRAS信息
KRAS<-read.table("E:\\课题\\胰腺癌\\1-数据处理\\WES\\10-突变过滤\\4-结果整合\\3-non-synonymous-mutation.txt",header = T,sep = "\t",stringsAsFactors = F,quote = "",fill = T,check.names = F)
KRAS<-KRAS %>% select(patientID,geneSymbol) %>% unique()
colnames(KRAS)[1]<-"sample"
KRAS_sample<-unique(KRAS$sample)
KRAS<-unique(KRAS$sample[which(KRAS$geneSymbol=="KRAS")])
KRAS_sample<-c(KRAS_sample,setdiff(WGD$sample,KRAS_sample))

####原发复发
Recurrence <-read_xlsx("E:\\课题\\胰腺癌\\0-原始数据\\1-临床信息\\胰腺癌数据编号及临床信息-20250912.xlsx",sheet = 2)
colnames(Recurrence)<-Recurrence[1,]
Recurrence<-Recurrence[-1,]
Recurrence<-Recurrence %>% select(patientID,Recurrence)
Recurrence<-Recurrence[which(Recurrence$Recurrence!="2"),]
colnames(Recurrence)[1]<-"sample"
Recurrence$Recurrence<-gsub("本次入院即为胰腺癌肝转移",1,Recurrence$Recurrence)
Recurrence$Recurrence<-gsub("1,入院时已经是胰腺癌肝转移",1,Recurrence$Recurrence)
Recurrence$Recurrence<-ifelse(Recurrence$Recurrence=="1","Recurrence","Primary")

####TNM分期
TNM <-read_xlsx("E:\\课题\\胰腺癌\\0-原始数据\\1-临床信息\\胰腺癌数据编号及临床信息-20250912.xlsx",sheet = 2)
colnames(TNM)<-TNM[1,]
TNM<-TNM[-1,]
TNM<-TNM %>% select(patientID,TNM_stage)
TNM$TNM_stage<-gsub("III|IV","III",TNM$TNM_stage)
TNM$TNM_stage<-gsub("IIA|IIB","II",TNM$TNM_stage)
TNM$TNM_stage<-gsub("IA|IB","I",TNM$TNM_stage)
TNM<-TNM[which(!(TNM$TNM_stage %in% NA)),]
colnames(TNM)<-c("sample","TNM")

#TCR
filepath<-dir(path="E:\\课题\\胰腺癌\\runtime\\0-原始数据\\5-BulkTCR\\data\\",pattern = ".clonotypes.TRB.txt$",full.names = TRUE)

#样本信息
sample_info <-read_xlsx("E:\\课题\\胰腺癌\\0-原始数据\\1-临床信息\\胰腺癌数据编号及临床信息-20250912.xlsx")
sample_info<-sample_info[!is.na(sample_info$`BulkTCR(T)`) ,]
sample_info<-sample_info[which(!(sample_info$`WGS(T)` %in% NA &sample_info$`BulkRNA(T)` %in% NA)),]
sample_info<-unique(sample_info[,c("patientID","BulkTCR(T)")])
sample_info$`BulkTCR(T)`<-gsub("-.*","",sample_info$`BulkTCR(T)`)
clin <- setNames(sample_info$patientID,
sample_info$`BulkTCR(T)`)


####################1-获取样本TCR的CDR3#########################
data<-data.frame()
for(i in 1:length(filepath)){
if(strsplit(basename(filepath[i]),".clonotypes.TRB.txt")[[1]][1] %in% sample_info$`BulkTCR(T)`){
sample<-unname(clin[strsplit(basename(filepath[i]),".clonotypes.TRB.txt")[[1]][1]])
TCR<-read.table(filepath[i],header = T, sep = "\t", stringsAsFactors = F, fill = T, check.names = F)
TCR<-TCR %>% select(cloneCount,aaSeqCDR3)
TCR <- TCR %>%
group_by(aaSeqCDR3) %>%
summarise(cloneCount = sum(cloneCount), .groups = "drop")
TCR$sample<-sample
TCR<-unique(TCR[,c("sample","aaSeqCDR3","cloneCount")])
data<-rbind(data,TCR)
}
print(i)
}

fwrite(data,"E:\\课题\\胰腺癌\\5-文章结果\\TCR\\3-output\\1_sampleTCR_CDR3seq_count.txt",quote = F,sep = "\t",row.names = F,col.names = T)

####################2-计算样本Morisita–Horn指数#########################
data<-fread("E:\\课题\\胰腺癌\\5-文章结果\\TCR\\3-output\\1_sampleTCR_CDR3seq_count.txt",header = T,sep = "\t",stringsAsFactors = F,quote = "",fill = T,check.names = F)

mat <- dcast(data, aaSeqCDR3 ~ sample,
value.var = "cloneCount",
fun.aggregate = sum,
fill = 0)

rownames(mat) <- mat$aaSeqCDR3
mat <- as.matrix(mat[, -1])

dist_mh <- vegdist(t(mat), method = "horn")
sim_mh <- 1 - as.matrix(dist_mh)

fwrite(sim_mh,"E:\\课题\\胰腺癌\\5-文章结果\\TCR\\3-output\\1_sampleTCR_Morisita–Horn指数.txt",quote = F,sep = "\t",row.names = T,col.names = T)

####################3-样本TCR Morisita–Horn聚类#########################

jaccard_matrix<-fread("E:\\课题\\胰腺癌\\5-文章结果\\TCR\\3-output\\1_sampleTCR_Morisita–Horn指数.txt",header = T,sep = "\t",stringsAsFactors = F,quote = "",fill = T,check.names = F)
jaccard_matrix<-as.data.frame(jaccard_matrix)
rownames(jaccard_matrix)<-jaccard_matrix$V1
jaccard_matrix<-jaccard_matrix[,-1]

####顶部注释
group_KRAS <- ifelse(colnames(jaccard_matrix) %in% KRAS,
"KRAS", ifelse(colnames(jaccard_matrix) %in% KRAS_sample,"no KRAS",NA))
group_KRAS[is.na(group_KRAS)] <- "Unknown"
names(group_KRAS) <- colnames(jaccard_matrix)

group_WGD <- ifelse(colnames(jaccard_matrix) %in% WGD$sample[which(WGD$WGD!="no WGD")],
"WGD", ifelse(colnames(jaccard_matrix) %in% WGD$sample[which(WGD$WGD=="no WGD")],"no WGD",NA))
group_WGD[is.na(group_WGD)] <- "Unknown"
names(group_WGD) <- colnames(jaccard_matrix)

group_Recurrence <- ifelse(colnames(jaccard_matrix) %in% Recurrence$sample[which(Recurrence$Recurrence=="Recurrence")],
"Recurrence", ifelse(colnames(jaccard_matrix) %in% Recurrence$sample[which(Recurrence$Recurrence=="Primary")],"Primary",NA))
group_Recurrence[is.na(group_Recurrence)] <- "Unknown"
names(group_Recurrence) <- colnames(jaccard_matrix)

group_TNM <- ifelse(colnames(jaccard_matrix) %in% TNM$sample[which(TNM$TNM=="I")],
"I", ifelse(colnames(jaccard_matrix) %in% TNM$sample[which(TNM$TNM=="II")],"II",ifelse(colnames(jaccard_matrix) %in% TNM$sample[which(TNM$TNM=="III")],"III",NA)))
group_TNM[is.na(group_TNM)] <- "Unknown"
names(group_TNM) <- colnames(jaccard_matrix)

Top_Annotation <- HeatmapAnnotation(
KRAS = group_KRAS,
WGD = group_WGD,
Recurrence = group_Recurrence,
TNM = group_TNM,

col = list(
KRAS = c("KRAS" = "#AABCDB",
"no KRAS" = "#C0D6EA","Unknown"="grey90"),

WGD = c("no WGD" = "#EFC99B",
"WGD" = "#E8B574","Unknown"="grey90"),

TNM = c("I" = "#B5A8CA",
"II" = "#9A8AB4",
"III" = "#826BA2","Unknown"="grey90"),

Recurrence = c("Primary" = "#DBDBA7",
"Recurrence" = "#C0C05A","Unknown"="grey90")
),
annotation_height = unit(c(1,1,1,1), "mm")
)

####左边注释
left_Annotation <- rowAnnotation(
KRAS = group_KRAS,
WGD = group_WGD,
Recurrence = group_Recurrence,
TNM = group_TNM,

col = list(
KRAS = c("KRAS" = "#AABCDB",
"no KRAS" = "#C0D6EA",
"Unknown" = "grey90"),

WGD = c("no WGD" = "#EFC99B",
"WGD" = "#E8B574",
"Unknown" = "grey90"),

TNM = c("I" = "#B5A8CA",
"II" = "#9A8AB4",
"III" = "#826BA2",
"Unknown" = "grey90"),

Recurrence = c("Primary" = "#DBDBA7",
"Recurrence" = "#C0C05A",
"Unknown" = "grey90")
),

annotation_width = unit(c(5,5,5,5), "mm") # ⭐控制左侧宽度
)

####设置映射颜色
mat <- jaccard_matrix
diag(mat) <- NA

col_fun <- colorRamp2(
c(0, 0.005, 0.01, 0.02, 0.05, 0.25),
c("white", "#deebf7", "#9ecae1", "#6baed6", "#3182bd", "#08306b")
)

# col_fun <- circlize::colorRamp2(
# c(0.75, 0.90, 0.95, 0.98, 0.99, 0.992, 0.994),
# rev(c("white", "#deebf7", "#9ecae1", "#6baed6", "#3182bd", "#08519c", "#08306b"))
# )

# col_fun <- colorRamp2(
# c(0, 0.005, 0.01, 0.02, 0.05, 0.25),
# c("white", "#FAE5D8", "#FACEB7", "#E6866A", "#BE3137", "#720320")
# )

main_ht <- Heatmap(
mat,
name = "MAPs",
cluster_rows = TRUE,
cluster_columns = TRUE,
show_row_names = FALSE,
show_column_names = FALSE,
show_heatmap_legend = TRUE, # 保留
clustering_distance_rows = as.dist(1 - mat),
clustering_distance_columns = as.dist(1 - mat),
col = col_fun ,

# row_order = Allele_order$Var1,

na_col = "white",
rect_gp = gpar(col = "grey90", lwd = 1),

top_annotation = Top_Annotation,
left_annotation = left_Annotation,

# cell_fun = function(j,i,x,y,w,h,fill){
# if(!is.na(data_use[i,j])){
# grid.text(
# data_use[i,j],
# x,y,
# gp = gpar(fontsize = 8)
# )
# }
# }
)

pdf("E:\\课题\\胰腺癌\\5-文章结果\\TCR\\2-图\\样本TCR Morisita–Horn聚类热图.pdf",width = 13, height =12, onefile = FALSE)

main_ht

dev.off()

 

http://www.gsyq.cn/news/1488925.html

相关文章:

  • 如何让机器人在未知环境中实时构建3D地图?RTAB-Map技术深度解析
  • 幻兽帕鲁存档转换终极指南:安全编辑游戏数据的完整解决方案
  • VC++6.0开发的轻量级网络资产探测工具:支持主机发现、端口扫描、服务识别与常见漏洞初筛
  • 2026 泉州本地人必选防水补漏 TOP5|卫生间免砸砖、屋顶 / 外墙 / 地下室防水|同城上门 1–2h|2026 年 6 月最新调研 - 吉林同城获客
  • AI-Shoujo HF Patch终极指南:一站式游戏增强解决方案 [特殊字符]
  • 2.初识网络代码——python基础代码
  • 计算机小程序毕设实战-ssm基于springboot+微信小程序的中小学生个性化阅读平台小程序的设计与实现【完整源码+LW+部署说明+演示视频,全bao一条龙等】
  • 若依框架导出Excel合并行功能详解:从注解配置到源码改造的完整指南
  • 手机 Vibe Coding 半年,终于从能跑到真爽了
  • 终极杀戮尖塔模组管理器:3步开启无限游戏可能
  • 合并采集数据图片进展AI识别
  • 蓝牙LE纽扣电池供电设计:峰值电流抑制硬件方案与KW47软件优化
  • Navicat无限试用终极指南:macOS用户必备的14天限制破解方案
  • 网盘限速太折磨?试试这个神奇的网盘直链提取工具
  • 信创环境避坑实录:在飞腾2000+银河麒麟V10上,用Docker 19.03.9部署达梦8.1数据库
  • BetterNCM-Installer:网易云音乐插件一键安装的终极解决方案
  • 2026年铝型材厂家推荐榜:广东/深圳工业铝型材、散热器/异型铝型材、定制开模与精密挤压实力品牌深度解析 - 品牌发掘
  • 软件工程导论期末自救指南:一张思维导图+一套高频考点速查表,3天搞定复习
  • RT600低功耗模式实战:从原理到测量,打造超长续航嵌入式系统
  • 深度解析Mac Mouse Fix:让10美元鼠标在macOS上超越触控板的革命性方案
  • 2026年 3,5-二硝基苯甲酸/硝基苯甲酸源头厂家推荐:高纯度合成与精细化工领域实力工厂精选 - 品牌发掘
  • 终极iOS越狱指南:3步完成palera1n工具安装与配置
  • 2026年6月最新 北京门窗定制品牌排行:硬核实力与落地案例解析 - 奔跑123
  • 基于MC68HC11E9的步进电机控制系统:从汇编编程到硬件驱动全解析
  • 2026年在职心理学博士优选机构盘点(含学制学费、报考条件) - 品牌测评鉴赏家
  • Skill的实现方式:让 Agent 学会“开挂“
  • Confluence介绍
  • 力扣刷题#11:LeetCode128最长连续序列_刷题笔记
  • 氛围感满分!在厦门,拍一套治愈一辈子的海景婚纱照 - 奔跑123
  • 国产PCB厂家综合实力排行,这5家值得关注