从fasta中抽取需要的序列:
Python方法一:(这种方式最慢)
由于平时工作比较忙,工作中遇到的问题都没有及时记录,现在有空时间了所以整理一下一些常见的问题及技巧供大家参考:
!/usr/bin/python
import sys
def Fasta(inputfile) :
f = open(inputfile,"r")
fastadic = {}
while True :
line = f.readline().rstrip()
if len(line) == 0 :
break
else :
pass
if line.startswith(">") :
name = line.split()[0]
fastadic[name] = ""
else :
fastadic[name]+=line
f.close()
return fastadic
def usage():
print ("1,fasta\n2,list")
if len(sys.argv) != 3 :
usage()
sys.exit()
print (sys.argv[0])
fastafile = sys.argv[1]
listfile = sys.argv[2]
dic = Fasta(fastafile)
ff=open(listfile,"r")
while 1:
line1 = ff.readline().rstrip()
if len(line1) == 0 :
break
else :
pass
array = line1.split("\t")
if ">"+array[0] in dic.keys() :
print (">"+array[0]+"\n"+dic[">"+array[0]])
else :
print ("Error:"+line1)
ff.close()
Python方法二:(次慢)
!/usr/bin/python
import sys
def Fasta(inputfile) :
f = open(inputfile,"r")
fastadic = {}
while True :
line = f.readline().rstrip()
if len(line) == 0 :
break
else :
pass
if line.startswith(">") :
name = line.split()[0]
fastadic[name] = []
else :
fastadic[name].append(line)
f.close()
return fastadic
def usage():
print ("1,fasta\n2,list")
if len(sys.argv) != 3 :
usage()
sys.exit()
print (sys.argv[0])
fastafile = sys.argv[1]
listfile = sys.argv[2]
dic = Fasta(fastafile)
ff=open(listfile,"r")
while 1:
line1 = ff.readline().rstrip()
if len(line1) == 0 :
break
else :
pass
array = line1.split("\t")
if ">"+array[0] in dic.keys() :
print (">"+array[0]+"\n"+"".join(dic[">"+array[0]]))
else :
print ("Error:"+line1)
ff.close()
Perl 方法三(最快)
!/usr/bin/perl -w
die "perl ARGV[0] ;
while (<I>) {
chomp;
if (/^>/) {
@name = split/\s+/,fasta{fasta{_ ;
}
}
close I;
open T,_ ;
if (exists tax[0]}) {
print ">".fasta{">".$tax[0]}, "\n" ;
}
}
close T;