当前位置: 首页 > news >正文

可视化结构域序列并提取序列

1、可视化

点击查看代码
from Bio import AlignIO
import os# ====== 用户参数 ======
alignment_file = "比对.fa"    # 输入比对文件(fasta/clustal)
alignment_format = "fasta"
html_output = "msa_ruvc_all.html"# 背景渐变蓝色(保守性)
light_blue = "ffffff"
dark_blue  = "ffff66"# RUVC1/2/3 定义(基于 ungapped 序列位置,1-based)
RUVC1 = {"FnCas12a_6I1K_1": [(892, 953)],"LbCas12a_5ID6_1": [(809, 858)],"LbCas12a_6NME_1": [(808, 872)],"Lb2Cas12a_8I54_1": [(792, 852)],"ReChb_Cas12a_1": [(853, 914)],
}
RUVC2 = {"FnCas12a_6I1K_1": [(971, 1078)],"LbCas12a_5ID6_1": [(890, 1011)],"LbCas12a_6NME_1": [(890, 997)],"Lb2Cas12a_8I54_1": [(869, 992)],"ReChb_Cas12a_1": [(930, 1044)],
}
RUVC3 = {"FnCas12a_6I1K_1": [(1254, 1300)],"LbCas12a_5ID6_1": [(1138, 1228)],"LbCas12a_6NME_1": [(1179, 1228)],"Lb2Cas12a_8I54_1": [(1151, 1206)],"ReChb_Cas12a_1": [(1215, 1261)],
}# RUVC 样式
RUVC_color = "#ff0000"  # 所有 RUVC 使用同一颜色
RUVC_italic = False     # 是否斜体# ====== 读取比对 ======
alignment = AlignIO.read(alignment_file, alignment_format)
seq_len = alignment.get_alignment_length()# 计算保守性
conservation = []
for i in range(seq_len):column = [rec.seq[i] for rec in alignment]chars = [aa for aa in column if aa != "-"]freq = max([chars.count(aa)/len(chars) for aa in set(chars)]) if chars else 0.0conservation.append(freq)# ====== 辅助函数 ======
def hex_to_rgb(hexstr):return int(hexstr[0:2], 16), int(hexstr[2:4], 16), int(hexstr[4:6], 16)def rgb_to_hex(r, g, b):return f"{r:02x}{g:02x}{b:02x}"lr, lg, lb = hex_to_rgb(light_blue)
dr, dg, db = hex_to_rgb(dark_blue)# 将 ungapped 座标映射到 alignment 座标
def build_ruvc_aligned(ruvc_dict, alignment):result = {}for rec in alignment:seq_id = rec.idseq = str(rec.seq)mapping = [i for i, ch in enumerate(seq) if ch != "-"]if seq_id in ruvc_dict:newranges = []seq_len = len(mapping)for s, e in ruvc_dict[seq_id]:if s > seq_len:continuestart_al = mapping[s-1]end_al   = mapping[min(e, seq_len)-1]if start_al <= end_al:newranges.append((start_al, end_al))if newranges:result[seq_id] = newrangesreturn resultRUVC1_aligned = build_ruvc_aligned(RUVC1, alignment)
RUVC2_aligned = build_ruvc_aligned(RUVC2, alignment)
RUVC3_aligned = build_ruvc_aligned(RUVC3, alignment)# 判断某位置是否属于任意 RUVC
def in_ruvc(seq_id, pos, aa):if aa == "-":return Falsefor ruvc_map in [RUVC1_aligned, RUVC2_aligned, RUVC3_aligned]:for start, end in ruvc_map.get(seq_id, []):if start <= pos <= end:return Truereturn False# ====== 生成 HTML ======
with open(html_output, "w", encoding="utf-8") as out:out.write("<!doctype html><html lang='zh-CN'><head><meta charset='utf-8'>\n")out.write("<title>MSA - RUVC 高亮</title>\n")out.write("""<style>body{font-family:Consolas,monospace;padding:16px}table{border-collapse:collapse;}td.id{vertical-align:top;padding:4px 8px;white-space:nowrap;}td.seq{vertical-align:top;padding:4px 8px;white-space:pre;font-size:16px;}span.res{display:inline-block;padding:0 1px;cursor:pointer;}#zoom-controls{margin-bottom:10px;}</style><script>let currentZoom = 1.0;function zoomIn(){currentZoom += 0.1; updateZoom();}function zoomOut(){currentZoom = Math.max(0.5, currentZoom - 0.1); updateZoom();}function updateZoom(){document.querySelectorAll('td.seq').forEach(e=>{e.style.fontSize = (16 * currentZoom) + 'px';});}function showInfo(seqId, alignedPos, ungappedPos, aa){alert(`序列: ${seqId}\\n氨基酸: ${aa}\\n比对位置: ${alignedPos+1}\\n原序列位置: ${ungappedPos}`);}</script></head><body><h2>多序列比对(RUVC 高亮) — """ + os.path.basename(alignment_file) + """</h2><div id='zoom-controls'><button onclick='zoomIn()'>放大</button><button onclick='zoomOut()'>缩小</button></div><div style='overflow-x:auto'><table>""")for rec in alignment:seq_id = rec.idseq = str(rec.seq)out.write("<tr>")out.write(f"<td class='id'>{seq_id}</td>")out.write("<td class='seq'>")ungapped_pos = 0for i, aa in enumerate(seq):if aa != "-":ungapped_pos += 1# 背景渐变白→荧光黄if aa == "-":bg = "#ffffff"color = "#000000"style_extra = ""else:cons = conservation[i]r = int(round(lr + (dr - lr) * cons))g = int(round(lg + (dg - lg) * cons))b = int(round(lb + (db - lb) * cons))bg = "#" + rgb_to_hex(r, g, b)# RUVC 标注if in_ruvc(seq_id, i, aa):color = RUVC_colorstyle_extra = "font-style:italic;" if RUVC_italic else ""else:color = "#000000"style_extra = ""out.write(f"<span class='res' style='background-color:{bg};color:{color};{style_extra}' "f"onclick=\"showInfo('{seq_id}', {i}, {ungapped_pos if aa!='-' else 'null'}, '{aa}')\">{aa}</span>")out.write("</td></tr>\n")out.write("</table></div></body></html>\n")print(f"已生成:{html_output}")

2、提取结构域区域序列

点击查看代码
from Bio import AlignIO# ====== 用户参数 ======
alignment_file = "比对.fa"    # 输入多序列比对文件
alignment_format = "fasta"
output_fasta = "Cas12a_RUVC1.fasta"  # 输出文件
start_pos = 1610
end_pos = 1865# ====== 读取比对 ======
alignment = AlignIO.read(alignment_file, alignment_format)
seq_len = alignment.get_alignment_length()print(f"总比对长度:{seq_len}")
if end_pos > seq_len:raise ValueError(f"区间 {end_pos} 超出比对长度 {seq_len}")# ====== 提取区间并去掉空位符 ======
records = []
for rec in alignment:seq_id = rec.idseq_str = str(rec.seq)# 截取对应区间(1-based → Python 0-based)sub_seq = seq_str[start_pos-1:end_pos]# 去掉空位符sub_seq_nogap = sub_seq.replace("-", "")records.append((seq_id, sub_seq_nogap))# ====== 输出为 FASTA ======
with open(output_fasta, "w", encoding="utf-8") as f:for seq_id, seq in records:f.write(f">{seq_id}\n{seq}\n")print(f"✅ 已生成文件:{output_fasta}")
print(f"提取区间:{start_pos}-{end_pos},共 {len(records)} 条序列。")
http://www.gsyq.cn/news/41757.html

相关文章:

  • 2025年11月国际连锁酒店投资加盟推荐:专业评价与选择指南
  • vue 安装后端调试接口 - 东方不败-
  • 动态规划经典题
  • 2025年11月连锁酒店加盟品牌推荐榜单:权威解析五大品牌投资价值对比
  • sql 常用命令
  • 今日依旧是java的基础知识内容
  • Russian Per GDP
  • 2025年11月智能AI客服品牌推荐:综合实力排名榜
  • 近期小细节总结
  • 2025年石墨增碳剂搅拌机源头厂家权威推荐:塑料粉混合机/厚型防火涂料搅拌机混合机/灌浆料搅拌机设备厂商精选
  • 2025年粉末涂料最新权威排行榜TOP10,行业新启航
  • 2025年11月学习机品牌对比榜:从早教到高中全场景机型盘点
  • 2025 年合金管公司最新推荐榜权威发布:12Cr1MoVG/15CrMoG/P22 合金管优质企业榜单及选择指南
  • re-catch
  • 2025 年颗粒灌装机源头厂家最新推荐榜:涵盖全自动、双头、四头、杂粮等多类型设备,助力企业精准选优质生产商
  • 激光驱动器技术:精密控制与创新应用
  • 2025年南京苹果售后维修点推荐:高性价比门店名单与选择策略
  • springboot 下的Thymeleaf 前端渲染引擎
  • 2025年泉州苹果售后维修点推荐:主流门店列表与选择方案
  • 利用keil +RASC给瑞萨RA8D1编译烧写软件
  • Gitee:中国DevSecOps实践的标杆与未来
  • MyEMS:智能化能源管理的创新实践者
  • 2025年抖音SEO排名推荐:市场报告揭示的头部解决方案
  • 2025年精密精轧管生产厂家权威推荐榜单:不锈钢管精轧管/冷拔管精轧管/精轧焊管源头厂家精选
  • 还在担心文件泄露?这个浏览器里的格式转换工具,给了另一种答案
  • 2025年11月效果图公司推荐榜:权威评测五强排名与横向对比
  • psql -U PostgreSQL端命令
  • Linux V4L2框架详解:Camera软件架构与驱动达成
  • 简单接口并发测试
  • 2025 年白炭黑生产厂家最新推荐排行榜:涵盖微珠 / 疏水 / 气相法 / 沉淀法等多类型产品,权威测评选出优质企业供下游参考二氧化硅/胶粉用白炭黑公司推荐