1. 病情说明:
机制难寻,肠道菌群
这句话足以说明菌群研究的必要性,毕竟很多工作都表明了菌群与疾病、发育、神经等的紧密联系。另外,从目前国内科研服务相关的企业的战略布局来看,微生物单细胞组和单细胞三代全长是重要的开发方向,这些企业的鼻子可是很灵的。如果5年前,没能抓住单细胞转录组,今天又没有抓住单细胞空间转录组,那两三年后的单细胞全长转录组、微生物单细胞组和单细胞表观组一定不能再错过了。那我们就先来看看怎么把菌株-通路-代谢物-宿主靶点联系起来的关键工具--humann。
HUMAnN采用比对泛基因组的方法鉴定群体的已知物种,并进一步翻译检索末分类的序列,最终定量基因家族和通路。 结果同时获得功能通路中具体物种组成,建立起了物种与功能的联系,可进一步研究功能组成的贡献者。
通过humann的一通分析,我们可以拿到这些代谢通路的丰度差异结果噢!
2. 治疗方案
import time
import threading
import argparse
import os
from pickle import FALSE
import threading
import shutil
import datetime
import json
import time
import subprocess
import pandas as pd
method_path = 'plot_scripts'
refer_path = 'DB'
# 1: check function
def humann_check(
data_path: str = None, ## path of kneaddout
input_type: str = None, ## choose {fastq,fastq.gz,fasta,fasta.gz,sam,bam,blastm8,genetable,biom}
search_mode: str = None, ## default [uniref90], uniref50,uniref90
alignment_type : str = None, ## software to use for translated alignment,{usearch,rapsearch,diamond}
pathway_type: str = None, ## [DEFAULT: metacyc] {metacyc,unipathway} the database to use for pathway computations
out_path: str = None, ## outpath of humann_check
):
## check knead file
check_file = f'{data_path}/knead_call_file.txt'
if not os.path.exists(check_file):
raise SystemExit('No knead file! Please run knead module!')
else:
knead_param = pd.read_csv(check_file, header=None)
cleandata_path = knead_param.at[0,0]
sampleinfo = knead_param.at[1,0]
inputtype = f'{input_type}'
search_mode = f'{search_mode}'
alignment_type = f"{alignment_type}"
pathway_type = f'{pathway_type}'
if not os.path.exists(out_path):
os.makedirs(out_path, exist_ok=True)
else:
pass
nucleotide_database = os.path.abspath(refer_path+'/full_chocophlan.v201901_v31')
if search_mode == 'uniref90':
protein_database = os.path.abspath(refer_path+'/uniref90_annotated_v201901b')
elif search_mode == 'uniref50':
protein_database = os.path.abspath(refer_path+'/uniref50_annotated_v201901b')
else:
pass
## output
out_path = os.path.abspath(out_path)
check_file_path = f'{out_path}/humann_check_file.txt'
check_file = pd.DataFrame([
cleandata_path,
sampleinfo,
inputtype,
search_mode,
alignment_type,
pathway_type,
out_path ,
nucleotide_database,
protein_database
])
check_file.to_csv(check_file_path,
header=None, index=None, sep='\t', mode='w')
return 0
def execCmd(cmd):
try:
print("命令{}开始运行{}".format(cmd, datetime.datetime.now()))
time.sleep(5)
os.system(cmd)
print("命令{}结束运行{}".format(cmd, datetime.datetime.now()))
except:
print("{}\t运行失败".format(cmd))
def humann_call(
out_path: str = None # outpath of humann
):
## check humann_check file
humann_check_path = f'{out_path}/humann_check_file.txt'
if not os.path.exists(humann_check_path):
raise SystemExit(' No humann_check_file ! Please run humann_check !')
else:
humann_check = pd.read_csv(f'{out_path}/humann_check_file.txt',
header=None,delimiter='\t')
cleandata_path = humann_check.at[0,0]
sampleinfo = humann_check.at[1,0]
inputtype =humann_check.at[2,0]
search_mode= humann_check.at[3,0]
alignment_type = humann_check.at[4,0]
pathway_type = humann_check.at[5,0]
out_path = humann_check.at[6,0]
nucleotide_database = humann_check.at[7,0]
protein_database = humann_check.at[8,0]
sample_info_dic = {}
cmds = []
for line in open(sampleinfo):
if not line.startswith("sample"):
line=line.strip().split()
sample_info_dic[line[0]]=line[1]
cmd = f'''/root/miniconda3/envs/Rdoc/bin/humann -i {cleandata_path}/{line[0]}/{line[0]}.{inputtype} --output {out_path} --nucleotide-database {nucleotide_database} --protein-database {protein_database} \
--input-format {inputtype} --threads 30 --memory-use maximum '''
cmds.append(cmd)
threads = []
for cmd in cmds:
th = threading.Thread(target=execCmd, args=(cmd,))
th.start()
th.join()
time.sleep(2)
threads.append(th)
humann_out_path = os.path.abspath(out_path)
sampleinfo = os.path.abspath(sampleinfo)
call_file_path = f'{out_path}/humann_call_file.txt'
call_file = pd.DataFrame([humann_out_path,sampleinfo])
call_file.to_csv(call_file_path,
header=None, index=None, sep='\t', mode='w')
return 0
humann_call(out_path)
def humann_post(
out_path:str = None
):
## check humann_call file
# humann_call_path = f'{out_path}/humann_call_file.txt'
# if not os.path.exists(humann_call_path):
# raise SystemExit(' No humann_call_file ! Please run humann_call !')
# else:
# pass
merge_path = out_path +'/humann_merge_out'
if not os.path.exists(merge_path):
os.makedirs(merge_path, exist_ok=True)
cmd = f'''/root/miniconda3/envs/Rdoc/bin/humann_join_tables -s --input {out_path} --file_name pathabundance --output {merge_path}/humann_pathabundance.tsv && \
/root/miniconda3/envs/Rdoc/bin/humann_join_tables -s --input {out_path} --file_name pathcoverage --output {merge_path}/humann_pathcoverage.tsv && \
/root/miniconda3/envs/Rdoc/bin/humann_join_tables -s --input {out_path} --file_name genefamilies --output {merge_path}/humann_genefamilies.tsv
'''
execCmd(cmd)
else:
pass
time.sleep(10)
cmd_norm = f'''/root/miniconda3/envs/Rdoc/bin/humann_renorm_table --input {merge_path}/humann_pathabundance.tsv \
--units relab --output {merge_path}/humann_pathabundance_relab.tsv && \
/root/miniconda3/envs/Rdoc/bin/humann_renorm_table --input {merge_path}/humann_genefamilies.tsv \
--units relab --output {merge_path}/humann_genefamilies_relab.tsv
'''
execCmd(cmd_norm)
## split
final_path = out_path +'/humann_final_out'
cmd_split = f'''/root/miniconda3/envs/Rdoc/bin/humann_split_stratified_table --input {merge_path}/humann_pathabundance_relab.tsv --output {final_path} && \
/root/miniconda3/envs/Rdoc/bin/humann_split_stratified_table --input {merge_path}/humann_genefamilies_relab.tsv --output {final_path} && \
/root/miniconda3/envs/Rdoc/bin/humann_split_stratified_table --input {merge_path}/humann_pathcoverage.tsv --output {final_path}
'''
execCmd(cmd_split)
return 0
data_path = 'kneaddout2'
input_type = 'fastq'
search_mode = 'uniref90'
alignment_type = 'diamond'
pathway_type = 'metacyc'
out_path = 'humann_out'
humann_check(data_path,input_type,search_mode,alignment_type,pathway_type,out_path)
humann_call(out_path)
3. 补充说明
下一篇可以画画图了,毕竟我们拿到的这些表格连自己都惊艳不了,又怎么能在组会上搏老板一笑呢。分析环境和测试数据我都整理好啦,一键部署到自己的服务器噢,持续关注,因为我会持续更新的。