with pysam.AlignmentFile(bamfile_path, "rb",require_index=False) as bamfile:
# Iterate over each read
for read in bamfile:
idx += 1
gene = read.get_tag(gene_tag) if read.has_tag(gene_tag) else None
bc=read.get_tag(bc_tag) if read.has_tag(gene_tag) else None
UMI = read.get_tag(umi_tag) if read.has_tag(umi_tag) else None
for sub_prop in sub_props:
random_num = random.randint(0, 99)
if random_num < sub_prop * 100:
# Check if the tag is present in the read
if (gene is not None) and (bc is not None) and (UMI is not None):
# Add the tag value to the list
if gene not in gene_count_dict[sub_prop]:
gene_count_dict[sub_prop][gene] = {}
gene_count_dict[sub_prop][gene].setdefault(bc, set()).add(UMI)
res_cnt[sub_prop]["transcriptomic"] += 1
这里我们关注到:
if gene not in gene_count_dict[sub_prop]:
gene_count_dict[sub_prop][gene] = {}
gene_count_dict[sub_prop][gene].setdefault(bc, set()).add(UMI)
这段代码的功能是:
检查 gene 是否已经在 gene_count_dict[sub_prop] 字典中。如果不在,为这个基因创建一个新的空字典。
然后,使用 .setdefault() 方法检查 bc(条形码)是否已经作为键存在于 gene 的字典中。如果不存在,设置其值为一个新的空集合。
最后,将 UMI 添加到与 bc 对应的集合中。`