提取特定长度的reads,初始版本,只适合小文件,25000条reads需要十几分钟:
import glob
import gzip
length=[]
with gzip.open('test.fq.gz','rt') as fp:
i = 0
for line in fp:
i += 1
if i % 4 == 2:
if len(line)-1==20:
length.append(i)
output_fasta = open("temp.fq",'w')
def WRIRE():
N= 0
with gzip.open('test.fq.gz','rt') as fp:
for line in fp:
N += 1
if N == i-1:
output_fasta.write(line)
elif N == i:
output_fasta.write(line)
elif N == i+1:
output_fasta.write(line)
elif N == i+2:
output_fasta.write(line)
for i in length:
WRIRE()
output_fasta.close()
end = time.time()
print("used %s s" % str(end - start))