记录一下ROH热点计算和绘图的过程

进入目标文件夹


cd /public/ejye/other/GraduateData/zzhsnp/single_pop/ROH/ROH2022.2.17/pure20220217

分群体的单个个体


ls *.vcf |tr "\t" "\n"|while read id ;do bgzip $id;done    #批量压缩

ls *.vcf.gz |tr "\t" "\n"|while read id ;do tabix -p vcf $id;done    #批量建立索引

ls *.vcf.gz |tr "\t" "\n"|sed "s/\.purein\.vcf\.gz//g"|while read id ;do zcat $id.purein.vcf.gz |grep -v "##"|grep "#"|cut -f10- |tr "\t" "\n"|while read i;do bcftools view $id.purein.vcf.gz -s $i -Oz -o $id/$i.vcf.gz;done;done           #批量提取单个样本

计算每个个体的ROH

BN 62

BRA 70

Laos 53

LX 65

RJF 114

TLF 72

WL 152


ls *.vcf.gz |tr "\t" "\n"|sed "s/\.purein\.vcf\.gz//g"|while read id ;do mkdir -p $id/single_sample_ROH ;done   #批量新建文件夹

cd BN

ls *.vcf.gz |tr "\t" "\n"|sed "s/\.vcf\.gz//g"|while read id ;do plink --vcf $id.vcf.gz  --chr-set 33 --homozyg --homozyg-window-snp 10 --homozyg-window-het 3 --homozyg-window-missing 5 --homozyg-window-threshold 0.05 --homozyg-snp 62 --homozyg-kb 300 --homozyg-density 50 --homozyg-gap 1000 --out single_sample_ROH/$id ; done

head -n1 BN01_BN01.hom>BN_all.hom.txt

cat *.hom|grep -v "KB" >>BN_all.hom.txt

cd ../BRA

ls *.vcf.gz |tr "\t" "\n"|sed "s/\.vcf\.gz//g"|while read id ;do plink --vcf $id.vcf.gz  --chr-set 33 --homozyg --homozyg-window-snp 10 --homozyg-window-het 3 --homozyg-window-missing 5 --homozyg-window-threshold 0.05 --homozyg-snp 70 --homozyg-kb 300 --homozyg-density 50 --homozyg-gap 1000 --out single_sample_ROH/$id ; done

head -n1 BRA01_BRA01.hom >BRA_all.hom.txt

cat *.hom|grep -v "KB" >>BRA_all.hom.txt

cd ../Laos

ls *.vcf.gz |tr "\t" "\n"|sed "s/\.vcf\.gz//g"|while read id ;do plink --vcf $id.vcf.gz  --chr-set 33 --homozyg --homozyg-window-snp 10 --homozyg-window-het 3 --homozyg-window-missing 5 --homozyg-window-threshold 0.05 --homozyg-snp 53 --homozyg-kb 300 --homozyg-density 50 --homozyg-gap 1000 --out single_sample_ROH/$id ; done

head -n1 Laos01_Laos01.hom >Laos_all.hom.txt

cat *.hom|grep -v "KB" >>Laos_all.hom.txt

cd ../LX

ls *.vcf.gz |tr "\t" "\n"|sed "s/\.vcf\.gz//g"|while read id ;do plink --vcf $id.vcf.gz  --chr-set 33 --homozyg --homozyg-window-snp 10 --homozyg-window-het 3 --homozyg-window-missing 5 --homozyg-window-threshold 0.05 --homozyg-snp 65 --homozyg-kb 300 --homozyg-density 50 --homozyg-gap 1000 --out single_sample_ROH/$id ; done

head -n1 LX01_LX01.hom >LX_all.hom.txt

cat *.hom|grep -v "KB" >>LX_all.hom.txt

cd ../RJF

ls *.vcf.gz |tr "\t" "\n"|sed "s/\.vcf\.gz//g"|while read id ;do plink --vcf $id.vcf.gz  --chr-set 33 --homozyg --homozyg-window-snp 10 --homozyg-window-het 3 --homozyg-window-missing 5 --homozyg-window-threshold 0.05 --homozyg-snp 114 --homozyg-kb 300 --homozyg-density 50 --homozyg-gap 1000 --out single_sample_ROH/$id ; done

head -n1 RJF01_RJF01.hom >RJF_all.hom.txt

cat *.hom|grep -v "KB" >>RJF_all.hom.txt

cd ../TLF

ls *.vcf.gz |tr "\t" "\n"|sed "s/\.vcf\.gz//g"|while read id ;do plink --vcf $id.vcf.gz  --chr-set 33 --homozyg --homozyg-window-snp 10 --homozyg-window-het 3 --homozyg-window-missing 5 --homozyg-window-threshold 0.05 --homozyg-snp 72 --homozyg-kb 300 --homozyg-density 50 --homozyg-gap 1000 --out single_sample_ROH/$id ; done

head -n1 TLF01_TLF01.hom >TLF_all.hom.txt

cat *.hom|grep -v "KB" >>TLF_all.hom.txt

cd ../WL

ls *.vcf.gz |tr "\t" "\n"|sed "s/\.vcf\.gz//g"|while read id ;do plink --vcf $id.vcf.gz  --chr-set 33 --homozyg --homozyg-window-snp 10 --homozyg-window-het 3 --homozyg-window-missing 5 --homozyg-window-threshold 0.05 --homozyg-snp 152 --homozyg-kb 300 --homozyg-density 50 --homozyg-gap 1000 --out single_sample_ROH/$id ; done

head -n1 WL01_WL01.hom >WL_all.hom.txt

cat *.hom|grep -v "KB" >>WL_all.hom.txt

统计每个样本检测出来的总ROH数目


cat *.log|grep -wo "found\ [0-9]\{2,3\}"|grep -wo "[0-9]\{2,3\}"

分长中短聚类


setwd("C:/Users/TE/Desktop/叶尔江ROH分析/ROH样本分布")

library(mclust)

data <- read.table("BN_all.hom.txt",header=TRUE)

mod2 <- Mclust(data[,9],G=3)

summary(mod2,parameters=TRUE)

a <- as.data.frame(mod2[15])

write.table(a,file="BN.class.txt",row.names=FALSE,col.names=TRUE,quote=FALSE)

计算每个样本每个类别的数目


cat BRA.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat BRA.txt|grep -w "$id"|awk '{if($1==1)print $0}'|wc -l;done

cat BRA.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat BRA.txt|grep -w "$id"|awk '{if($1==2)print $0}'|wc -l;done

cat BRA.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat BRA.txt|grep -w "$id"|awk '{if($1==3)print $0}'|wc -l;done

cat BN.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat BN.txt|grep -w "$id"|awk '{if($1==1)print $0}'|wc -l;done

cat BN.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat BN.txt|grep -w "$id"|awk '{if($1==2)print $0}'|wc -l;done

cat BN.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat BN.txt|grep -w "$id"|awk '{if($1==3)print $0}'|wc -l;done

cat Laos.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat Laos.txt|grep -w "$id"|awk '{if($1==1)print $0}'|wc -l;done

cat Laos.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat Laos.txt|grep -w "$id"|awk '{if($1==2)print $0}'|wc -l;done

cat Laos.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat Laos.txt|grep -w "$id"|awk '{if($1==3)print $0}'|wc -l;done

cat LX.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat LX.txt|grep -w "$id"|awk '{if($1==1)print $0}'|wc -l;done

cat LX.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat LX.txt|grep -w "$id"|awk '{if($1==2)print $0}'|wc -l;done

cat LX.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat LX.txt|grep -w "$id"|awk '{if($1==3)print $0}'|wc -l;done

cat RJF.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat RJF.txt|grep -w "$id"|awk '{if($1==1)print $0}'|wc -l;done

cat RJF.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat RJF.txt|grep -w "$id"|awk '{if($1==2)print $0}'|wc -l;done

cat RJF.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat RJF.txt|grep -w "$id"|awk '{if($1==3)print $0}'|wc -l;done

cat TLF.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat TLF.txt|grep -w "$id"|awk '{if($1==1)print $0}'|wc -l;done

cat TLF.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat TLF.txt|grep -w "$id"|awk '{if($1==2)print $0}'|wc -l;done

cat TLF.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat TLF.txt|grep -w "$id"|awk '{if($1==3)print $0}'|wc -l;done

cat WL.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat WL.txt|grep -w "$id"|awk '{if($1==1)print $0}'|wc -l;done

cat WL.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat WL.txt|grep -w "$id"|awk '{if($1==2)print $0}'|wc -l;done

cat WL.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat WL.txt|grep -w "$id"|awk '{if($1==3)print $0}'|wc -l;done

计算每个样本每个类别的总长度


cat BRA.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat BRA.txt|grep -w "$id"|awk '{if($1==1){s+=$10}}END{print s}';done

cat BRA.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat BRA.txt|grep -w "$id"|awk '{if($1==2){s+=$10}}END{print s}';done

cat BRA.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat BRA.txt|grep -w "$id"|awk '{if($1==3){s+=$10}}END{print s}';done

cat BN.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat BN.txt|grep -w "$id"|awk '{if($1==1){s+=$10}}END{print s}';done

cat BN.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat BN.txt|grep -w "$id"|awk '{if($1==2){s+=$10}}END{print s}';done

cat BN.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat BN.txt|grep -w "$id"|awk '{if($1==3){s+=$10}}END{print s}';done

cat Laos.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat Laos.txt|grep -w "$id"|awk '{if($1==1){s+=$10}}END{print s}';done

cat Laos.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat Laos.txt|grep -w "$id"|awk '{if($1==2){s+=$10}}END{print s}';done

cat Laos.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat Laos.txt|grep -w "$id"|awk '{if($1==3){s+=$10}}END{print s}';done

cat LX.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat LX.txt|grep -w "$id"|awk '{if($1==1){s+=$10}}END{print s}';done

cat LX.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat LX.txt|grep -w "$id"|awk '{if($1==2){s+=$10}}END{print s}';done

cat LX.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat LX.txt|grep -w "$id"|awk '{if($1==3){s+=$10}}END{print s}';done

cat RJF.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat RJF.txt|grep -w "$id"|awk '{if($1==1){s+=$10}}END{print s}';done

cat RJF.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat RJF.txt|grep -w "$id"|awk '{if($1==2){s+=$10}}END{print s}';done

cat RJF.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat RJF.txt|grep -w "$id"|awk '{if($1==3){s+=$10}}END{print s}';done

cat TLF.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat TLF.txt|grep -w "$id"|awk '{if($1==1){s+=$10}}END{print s}';done

cat TLF.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat TLF.txt|grep -w "$id"|awk '{if($1==2){s+=$10}}END{print s}';done

cat TLF.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat TLF.txt|grep -w "$id"|awk '{if($1==3){s+=$10}}END{print s}';done

cat WL.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat WL.txt|grep -w "$id"|awk '{if($1==1){s+=$10}}END{print s}';done

cat WL.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat WL.txt|grep -w "$id"|awk '{if($1==2){s+=$10}}END{print s}';done

cat WL.txt|sed "1d"|cut -f2|sort|uniq|while read id;do cat WL.txt|grep -w "$id"|awk '{if($1==3){s+=$10}}END{print s}';done

计算ROH重叠区域和热点区域


cd /public/ejye/other/GraduateData/zzhsnp/single_pop/ROH/ROH2022.2.17/pure20220217

plink --vcf BN.purein.vcf.gz  --chr-set 33 --homozyg group --homozyg-window-snp 10 --homozyg-window-het 3 --homozyg-window-missing 5 --homozyg-window-threshold 0.05 --homozyg-snp 65 --homozyg-kb 300 --homozyg-density 50 --homozyg-gap 1000 --out BN/BN

cat  BN/BN.hom.overlap|grep "CON"   #查看重叠区域，定义在所有样本都检测到的区域为热点区域（最多允许10%个体的缺失）

plink --vcf BRA.purein.vcf.gz  --chr-set 33 --homozyg group --homozyg-window-snp 10 --homozyg-window-het 3 --homozyg-window-missing 5 --homozyg-window-threshold 0.05 --homozyg-snp 65 --homozyg-kb 300 --homozyg-density 50 --homozyg-gap 1000 --out BRA/BRA

cat  NRA/BRA.hom.overlap|grep "CON" 

plink --vcf Laos.purein.vcf.gz  --chr-set 33 --homozyg group --homozyg-window-snp 10 --homozyg-window-het 3 --homozyg-window-missing 5 --homozyg-window-threshold 0.05 --homozyg-snp 65 --homozyg-kb 300 --homozyg-density 50 --homozyg-gap 1000 --out Laos/Laos

cat  Laos/Laos.hom.overlap|grep "CON" 

plink --vcf LX.purein.vcf.gz  --chr-set 33 --homozyg group --homozyg-window-snp 10 --homozyg-window-het 3 --homozyg-window-missing 5 --homozyg-window-threshold 0.05 --homozyg-snp 65 --homozyg-kb 300 --homozyg-density 50 --homozyg-gap 1000 --out LX/LX

cat  LX/LX.hom.overlap|grep "CON" 

plink --vcf RJF.purein.vcf.gz  --chr-set 33 --homozyg group --homozyg-window-snp 10 --homozyg-window-het 3 --homozyg-window-missing 5 --homozyg-window-threshold 0.05 --homozyg-snp 65 --homozyg-kb 300 --homozyg-density 50 --homozyg-gap 1000 --out RJF/RJF

cat  RJF/RJF.hom.overlap|grep "CON" 

plink --vcf TLF.purein.vcf.gz  --chr-set 33 --homozyg group --homozyg-window-snp 10 --homozyg-window-het 3 --homozyg-window-missing 5 --homozyg-window-threshold 0.05 --homozyg-snp 65 --homozyg-kb 300 --homozyg-density 50 --homozyg-gap 1000 --out TLF/TLF

cat  TLF/TLF.hom.overlap|grep "CON" 

plink --vcf WL.purein.vcf.gz  --chr-set 33 --homozyg group --homozyg-window-snp 10 --homozyg-window-het 3 --homozyg-window-missing 5 --homozyg-window-threshold 0.05 --homozyg-snp 65 --homozyg-kb 300 --homozyg-density 50 --homozyg-gap 1000 --out WL/WL

cat  WL/WL.hom.overlap|grep "CON"

统计snp在ROH区域出现的频数


cd /public/ejye/other/GraduateData/zzhsnp/single_pop/ROH/ROH2022.2.17/pure20220217

vi TLF.position

#文件包含每个ROH片段的染色体 起始位置和终止位置三列，数据来自计算ROH得到的hom文件

bcftools filter -R TLF.position  TLF.purein.vcf.gz >TLF.snp.ROH.vcf

gzip -d TLF.purein.vcf.gz

cat TLF.purein.vcf|grep -v "#"|cut -f1,2>1.tmp

cat  TLF.snp.ROH.vcf|grep -v "#"|cut -f1,2>>1.tmp   #合并所有的snp和在ROH中的snp

cat 1.tmp|sort -k1,1n -k2,2n |uniq -c >2.tmp      #排序，求每个位点出现的次数

cat 2.tmp|awk '{print$2"\t"$3"\t"$1-1}'>TLF.manhattan.txt  #整理得到画图文件

rm -f *.tmp

画曼哈顿图


R

mydata<-read.table("TLF.manhattan.txt",header = TRUE,sep="\t")

library(doBy)

library(plyr)

library(ggplot2)

library(cowplot)

mydata$SNP<-seq(1,nrow(mydata),1)

mydata$P<-mydata$number*100/21

n<- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)

for (i in n){mydata$SNP[which(mydata$CHR==i)]<-mydata$SNP[which(mydata$CHR==i)]+10000*(i-1)}

chr<- summaryBy(SNP~CHR, mydata, FUN = median)

data1=subset(mydata,P>=90)

p2<-ggplot(mydata,aes(SNP,P)) +

geom_point( data = mydata,pch=20,show.legend = F,  size=1,aes(color=as.factor(mydata$CHR))) + 

  geom_point(data = data1,pch=20,show.legend = F, alpha=0.8, size=2,color="#e7b005") + 

 scale_color_manual(values = rep(c("#D3D3D3", "#666666"), 32 ))+

      theme_bw(base_size = 16)+

  theme(panel.grid = element_blank(), 

        axis.line = element_line(colour = 'black'),

        panel.background = element_rect(fill = "transparent"),

        axis.title.y = element_text(face = "bold"))+

 labs(x ='Chromsome', y = expression("Percentage")) +

 scale_x_continuous(breaks = chr$SNP.median,labels = chr$CHR,expand=(c(0.02,0)))+

scale_y_continuous(breaks =seq(-5,100,20),limits=c(-1,100),expand=(c(0.02,0.02)))+

geom_hline(yintercept =90,col="#fd348a",lty=1,lwd=0.25,alpha=0.5)+

  theme(axis.text.x = element_text(angle = 270,size=9.5,face="bold", hjust = 0.5, vjust = 0.5))+theme(plot.title = element_text(hjust = 0.5,size=11,face="bold"))

ggsave(p2,filename="TLF.snp.png",width = 12,height = 4)

mydata<-read.table("TLF.manhattan.txt",header = TRUE,sep="\t")

library(doBy)

library(plyr)

library(ggplot2)

library(cowplot)

mydata$SNP<-seq(1,nrow(mydata),1)

n<- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)

for (i in n){mydata$SNP[which(mydata$CHR==i)]<-mydata$SNP[which(mydata$CHR==i)]+10000*(i-1)}

chr<- summaryBy(SNP~CHR, mydata, FUN = median)

data1=subset(mydata,number>=20)

p2<-ggplot(mydata,aes(SNP,number)) +

geom_point( data = mydata,pch=20,show.legend = F,  size=1,aes(color=as.factor(mydata$CHR))) + 

  geom_point(data = data1,pch=20,show.legend = F, alpha=0.8, size=2,color="#e7b005") + 

 scale_color_manual(values = rep(c("#D3D3D3", "#666666"), 32 ))+

      theme_bw(base_size = 16)+

  theme(panel.grid = element_blank(), 

        axis.line = element_line(colour = 'black'),

        panel.background = element_rect(fill = "transparent"),

        axis.title.y = element_text(face = "bold"))+

 labs(x ='Chromsome', y = expression("Percentage")) +

 scale_x_continuous(breaks = chr$SNP.median,labels = chr$CHR,expand=(c(0.02,0)))+

scale_y_continuous(breaks =seq(-1,22,5),limits=c(-1,22),expand=(c(0.02,0.02)))+

geom_hline(yintercept =90,col="#fd348a",lty=1,lwd=0.25,alpha=0.5)+

  theme(axis.text.x = element_text(angle = 270,size=9.5,face="bold", hjust = 0.5, vjust = 0.5))+theme(plot.title = element_text(hjust = 0.5,size=11,face="bold"))

ggsave(p2,filename="TLF.snp.number.png",width = 16,height = 4)

125个鸡连锁不平衡过滤+计算ROH


cd /public/ejye/other/GraduateData/zzhsnp/single_pop/ROH/chicken125

plink --vcf chicken125.zk.vcf.gz --maf 0.05 --recode vcf --out chicken125.qc --chr-set 33

cat chicken125.qc.vcf|"#" >chicken125.vcf

cat chicken125.qc.vcf|grep -v "#"|awk '$3=$1":"$2{print$0}'|tr " " "\t" >>chicken125.vcf

plink --vcf chicken125.vcf --recode --out chicken125  --chr-set  33

plink -file chicken125 --indep-pairwise 50 5 0.5 --out chicken125 --chr-set 33

plink  --threads 10 --file chicken125 --extract  chicken125.prune.in  --recode vcf --out chicken125.prunein --chr-set 33

###########################

plink --vcf chicken125.prunein.vcf.gz  --chr-set 33 --homozyg group --homozyg-window-snp 10 --homozyg-window-het 3 --homozyg-window-missing 5 --homozyg-window-threshold 0.05 --homozyg-snp 50 --homozyg-kb 300 --homozyg-density 50 --homozyg-gap 1000 --out chicken125

cat chicken125.hom|awk '{print$4"\t"$7"\t"$8}'|sed "1d">chicken125.ROH.region

tabix -p vcf chicken125.prunein.vcf.gz

bcftools filter -R chicken125.ROH.region chicken125.prunein.vcf.gz |grep -v "#"|cut -f1,2>1.tmp

zcat chicken125.prunein.vcf.gz|grep -v "#"|cut -f1,2>>1.tmp

cat 1.tmp|sort -k1,1n -k2,2n |uniq -c >2.tmp      

cat 2.tmp|awk '{print$2"\t"$3"\t"$1-1}'>chicken.manhattan.txt 

rm -f *.tmp

--------------------------------------------------------------

mydata<-read.table("chicken.manhattan.txt",header = TRUE,sep="\t")

library(doBy)

library(plyr)

library(ggplot2)

library(cowplot)

mydata$SNP<-seq(1,nrow(mydata),1)

mydata$P<-mydata$number*100/125

n<- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)

for (i in n){mydata$SNP[which(mydata$CHR==i)]<-mydata$SNP[which(mydata$CHR==i)]+50000*(i-1)}

chr<- summaryBy(SNP~CHR, mydata, FUN = median)

data1=subset(mydata,P>=90)

p2<-ggplot(mydata,aes(SNP,P)) +

geom_point( data = mydata,pch=20,show.legend = F,  size=1.4,aes(color=as.factor(mydata$CHR))) + 

  geom_point(data = data1,pch=20,show.legend = F, alpha=0.8, size=2,color="#e7b005") + 

 scale_color_manual(values = rep(c("#D3D3D3", "#666666"), 32 ))+

      theme_bw(base_size = 16)+

  theme(panel.grid = element_blank(), 

        axis.line = element_line(colour = 'black'),

        panel.background = element_rect(fill = "transparent"),

        axis.title.y = element_text(face = "bold"))+

 labs(x ='Chromsome', y = expression("Percentage")) +

 scale_x_continuous(breaks = chr$SNP.median,labels = chr$CHR,expand=(c(0.02,0.02)))+

scale_y_continuous(breaks =seq(0,100,20),limits=c(-1,100),expand=(c(0.02,0.02)))+

geom_hline(yintercept =90,col="#fd348a",lty=2,lwd=0.5,alpha=0.8)+

  theme(axis.text.x = element_text(angle = 270,size=9.5,face="bold", hjust = 0.5, vjust = 0.5))+theme(plot.title = element_text(hjust = 0.5,size=11,face="bold"))

ggsave(p2,filename="chicken.snp.png",width = 16,height = 4)

改变ROH的检测阈值条件


cd /public/ejye/other/GraduateData/zzhsnp/single_pop/ROH/chicken125

plink --vcf chicken125.prunein.vcf.gz  --chr-set 33 --homozyg group --homozyg-window-snp 10 --homozyg-window-het 2 --homozyg-window-missing 3 --homozyg-window-threshold 0.05 --homozyg-snp 50 --homozyg-kb 500 --homozyg-density 50 --homozyg-gap 1000 --out chicken125

cat chicken125.hom|awk '{print$4"\t"$7"\t"$8}'|sed "1d">chicken125.ROH.region

tabix -p vcf chicken125.prunein.vcf.gz

bcftools filter -R chicken125.ROH.region chicken125.prunein.vcf.gz |grep -v "#"|cut -f1,2>1.tmp

zcat chicken125.prunein.vcf.gz|grep -v "#"|cut -f1,2>>1.tmp

cat 1.tmp|sort -k1,1n -k2,2n |uniq -c >2.tmp      

cat 2.tmp|awk '{print$2"\t"$3"\t"$1-1}'>chicken.manhattan.txt 

rm -f *.tmp

--------------------------------------------------------------

mydata<-read.table("chicken.manhattan.txt",header = TRUE,sep="\t")

library(doBy)

library(plyr)

library(ggplot2)

library(cowplot)

mydata$SNP<-seq(1,nrow(mydata),1)

mydata$P<-mydata$number*100/125

n<- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)

for (i in n){mydata$SNP[which(mydata$CHR==i)]<-mydata$SNP[which(mydata$CHR==i)]+50000*(i-1)}

chr<- summaryBy(SNP~CHR, mydata, FUN = median)

data1=subset(mydata,P>=quantile(mydata$P,0.99))

p2<-ggplot(mydata,aes(SNP,P)) +

geom_point( data = mydata,pch=20,show.legend = F,  size=1.4,aes(color=as.factor(mydata$CHR))) + 

  geom_point(data = data1,pch=20,show.legend = F, alpha=0.6, size=2.4,color="#e7b005") + 

 scale_color_manual(values = rep(c("#D3D3D3", "#666666"), 32 ))+

      theme_bw(base_size = 16)+

  theme(panel.grid = element_blank(), 

        axis.line = element_line(colour = 'black'),

        panel.background = element_rect(fill = "transparent"),

        axis.title.y = element_text(face = "bold"))+

 labs(x ='Chromsome', y = expression("Percentage")) +

 scale_x_continuous(breaks = chr$SNP.median,labels = chr$CHR,expand=(c(0.02,0.02)))+

scale_y_continuous(breaks =seq(0,100,20),limits=c(-1,100),expand=(c(0.02,0.02)))+

geom_hline(yintercept =quantile(mydata$P,0.99),col="#fd348a",lty=2,lwd=0.5,alpha=0.8)+

  theme(axis.text.x = element_text(angle = 270,size=9.5,face="bold", hjust = 0.5, vjust = 0.5))+theme(plot.title = element_text(hjust = 0.5,size=11,face="bold"))

ggsave(p2,filename="chicken.snp.png",width = 14,height = 4)

------------------------------------------------------------------

data1=subset(mydata,P>=50)

p2<-ggplot(mydata,aes(SNP,P)) +

geom_point( data = mydata,pch=20,show.legend = F,  size=1.4,aes(color=as.factor(mydata$CHR))) + 

  geom_point(data = data1,pch=20,show.legend = F, alpha=0.6, size=2.4,color="#e7b005") + 

 scale_color_manual(values = rep(c("#D3D3D3", "#666666"), 32 ))+

      theme_bw(base_size = 16)+

  theme(panel.grid = element_blank(), 

        axis.line = element_line(colour = 'black'),

        panel.background = element_rect(fill = "transparent"),

        axis.title.y = element_text(face = "bold"))+

 labs(x ='Chromsome', y = expression("Percentage")) +

 scale_x_continuous(breaks = chr$SNP.median,labels = chr$CHR,expand=(c(0.02,0.02)))+

scale_y_continuous(breaks =seq(0,100,20),limits=c(-1,100),expand=(c(0.02,0.02)))+

geom_hline(yintercept =50,col="#fd348a",lty=2,lwd=0.5,alpha=0.8)+

  theme(axis.text.x = element_text(angle = 270,size=9.5,face="bold", hjust = 0.5, vjust = 0.5))+theme(plot.title = element_text(hjust = 0.5,size=11,face="bold"))

ggsave(p2,filename="chicken.snp50.png",width = 14,height = 4)

画分布热图

1号染色体


cd /mnt/c/Users/TE/Desktop/叶尔江ROH分析/125只鸡/严格条件

echo "sample chr start end" |tr " " "\t">chicken125.chr1.roh.hetmap

cat chicken125.hom|awk '{if($4==1)print$0}'|awk '{if(($7>=70000000&&$7<=80000000)||($8>=70000000&&$8<=80000000)){print$1"\t"$4"\t"$7"\t"$8}}'|awk '{if($3<70000000){print $1"\t"$2"\t"70000001"\t"$4}else if($4>80000000){print $1"\t"$2"\t"$3"\t"79999999}else {print$0}}'>>chicken125.chr1.roh.hetmap

然后把缺失的样本起始和终止坐标用NA表示，并在每个群体的最后行加上该群体的代码99，并使坐标充满整个区域以区分不同的群体

library(ggplot2)

data=read.table(file="chicken125.chr1.roh.hetmap",header = T,sep = "\t")

ggplot(data)+scale_x_continuous(breaks =seq(70,80,2),limits=c(70,80),expand=(c(0,0)))+scale_y_continuous(expand=(c(0.01,0.01)))+theme_bw(base_size = 16)+theme(panel.grid = element_blank(),axis.line = element_line(colour = 'black'),panel.background = element_rect(fill = "transparent"),axis.text.y =element_blank(),axis.ticks.y=element_blank(),axis.text.x = element_text(size=9.5,face="bold", hjust = 0.5, vjust = 0.5))+labs(x ='Chromsome 1 Position (Mb)', y = expression("")) +geom_rect(aes(xmin=data$start/1000000, xmax=data$end/1000000, ymin=as.numeric(data$sample)-0.4,ymax=as.numeric(data$sample)+0.18),fill='#72e972')+geom_vline(xintercept =74.247989,col="gray",lty=2,lwd=1.2,alpha=1)

ggsave("chr1rohhet.pdf",width=8,height=12)

25号染色体


cd /mnt/c/Users/TE/Desktop/叶尔江ROH分析/125只鸡/严格条件

echo "sample chr start end" |tr " " "\t">chicken125.chr25.roh.hetmap

cat chicken125.hom|awk '{if($4==25)print$0}'|awk '{if(($7>=0&&$7<=4000000)||($8>=0&&$8<=4000000)){print$1"\t"$4"\t"$7"\t"$8}}'|awk '{if($3<=0){print $1"\t"$2"\t"1"\t"$4}else if($4>4000000){print $1"\t"$2"\t"$3"\t"3999999}else {print$0}}'>>chicken125.chr25.roh.hetmap

然后把缺失的样本起始和终止坐标用NA表示，并在每个群体的最后行加上该群体的代码99，并使坐标充满整个区域以区分不同的群体

library(ggplot2)

data=read.table(file="chicken125.chr25.roh.hetmap",header = T,sep = "\t")

ggplot(data)+scale_x_continuous(breaks =seq(0,4,1),limits=c(0,4),expand=(c(0,0)))+scale_y_continuous(expand=(c(0.01,0.01)))+theme_bw(base_size = 16)+theme(panel.grid = element_blank(),axis.line = element_line(colour = 'black'),panel.background = element_rect(fill = "transparent"),axis.text.y =element_blank(),axis.ticks.y=element_blank(),axis.text.x = element_text(size=9.5,face="bold", hjust = 0.5, vjust = 0.5))+labs(x ='Chromsome 25 Position (Mb)', y = expression("")) +geom_rect(aes(xmin=data$start/1000000, xmax=data$end/1000000, ymin=as.numeric(data$sample)-0.4,ymax=as.numeric(data$sample)+0.18),fill='#72e972')+geom_vline(xintercept =0.736312,col="gray",lty=2,lwd=1.2,alpha=1)+geom_vline(xintercept =1.438820,col="gray",lty=2,lwd=1.2,alpha=1)

ggsave("chr25rohhet.pdf",width=8,height=12)

22号染色体


cd /mnt/c/Users/TE/Desktop/叶尔江ROH分析/125只鸡/严格条件

echo "sample chr start end" |tr " " "\t">chicken125.chr22.roh.hetmap

cat chicken125.hom|awk '{if($4==22)print$0}'|awk '{if(($7>=0&&$7<=6000000)||($8>=0&&$8<=6000000)){print$1"\t"$4"\t"$7"\t"$8}}'|awk '{if($3<=0){print $1"\t"$2"\t"1"\t"$4}else if($4>6000000){print $1"\t"$2"\t"$3"\t"5999999}else {print$0}}'>>chicken125.chr22.roh.hetmap

然后把缺失的样本起始和终止坐标用NA表示，并在每个群体的最后行加上该群体的代码99，并使坐标充满整个区域以区分不同的群体

library(ggplot2)

data=read.table(file="chicken125.chr22.roh.hetmap",header = T,sep = "\t")

ggplot(data)+scale_x_continuous(breaks =seq(0,6,1),limits=c(0,6),expand=(c(0,0)))+scale_y_continuous(expand=(c(0.01,0.01)))+theme_bw(base_size = 16)+theme(panel.grid = element_blank(),axis.line = element_line(colour = 'black'),panel.background = element_rect(fill = "transparent"),axis.text.y =element_blank(),axis.ticks.y=element_blank(),axis.text.x = element_text(size=9.5,face="bold", hjust = 0.5, vjust = 0.5))+labs(x ='Chromsome 22 Position (Mb)', y = expression("")) +geom_rect(aes(xmin=data$start/1000000, xmax=data$end/1000000, ymin=as.numeric(data$sample)-0.4,ymax=as.numeric(data$sample)+0.18),fill='#72e972')+geom_vline(xintercept =3.062142,col="gray",lty=2,lwd=1.2,alpha=1)

ggsave("chr22rohhet.pdf",width=8,height=12)

2号染色体


cd /mnt/c/Users/TE/Desktop/叶尔江ROH分析/125只鸡/严格条件

echo "sample chr start end" |tr " " "\t">chicken125.chr2.roh.hetmap

cat chicken125.hom|awk '{if($4==2)print$0}'|awk '{if(($7>=50000000&&$7<=58000000)||($8>=50000000&&$8<=58000000)){print$1"\t"$4"\t"$7"\t"$8}}'|awk '{if($3<=50000000){print $1"\t"$2"\t"50000001"\t"$4}else if($4>58000000){print $1"\t"$2"\t"$3"\t"57999999}else {print$0}}'>>chicken125.chr2.roh.hetmap

然后把缺失的样本起始和终止坐标用NA表示，并在每个群体的最后行加上该群体的代码99，并使坐标充满整个区域以区分不同的群体

library(ggplot2)

data=read.table(file="chicken125.chr2.roh.hetmap",header = T,sep = "\t")

ggplot(data)+scale_x_continuous(breaks =seq(50,58,2),limits=c(50,58),expand=(c(0,0)))+scale_y_continuous(expand=(c(0.01,0.01)))+theme_bw(base_size = 16)+theme(panel.grid = element_blank(),axis.line = element_line(colour = 'black'),panel.background = element_rect(fill = "transparent"),axis.text.y =element_blank(),axis.ticks.y=element_blank(),axis.text.x = element_text(size=9.5,face="bold", hjust = 0.5, vjust = 0.5))+labs(x ='Chromsome 2 Position (Mb)', y = expression("")) +geom_rect(aes(xmin=data$start/1000000, xmax=data$end/1000000, ymin=as.numeric(data$sample)-0.4,ymax=as.numeric(data$sample)+0.18),fill='#72e972')+geom_vline(xintercept =53.490610,col="gray",lty=2,lwd=1.2,alpha=1)+geom_vline(xintercept =53.805665,col="gray",lty=2,lwd=1.2,alpha=1)

ggsave("chr2rohhet.pdf",width=8,height=12)

最后编辑于：2022.02.22 21:19:25

人面猴
序言：七十年代末，一起剥皮案震惊了整个滨河市，随后出现的几起案子，更是在滨河造成了极大的恐慌，老刑警刘岩，带你破解...
沈念sama阅读 199,636评论 5赞 468
死咒
序言：滨河连续发生了三起死亡事件，死亡现场离奇诡异，居然都是意外死亡，警方通过查阅死者的电脑和手机，发现死者居然都...
沈念sama阅读 83,890评论 2赞 376
救了他两次的神仙让他今天三更去死
文/潘晓璐我一进店门，熙熙楼的掌柜王于贵愁眉苦脸地迎上来，“玉大人，你说我怎么就摊上这事。” “怎么了？”我有些...
开封第一讲书人阅读 146,680评论 0赞 330
道士缉凶录：失踪的卖姜人
文/不坏的土叔我叫张陵，是天一观的道长。经常有香客问我，道长，这世上最难降的妖魔是什么？我笑而不...
开封第一讲书人阅读 53,766评论 1赞 271
港岛之恋（遗憾婚礼）
正文为了忘掉前任，我火速办了婚礼，结果婚礼上，老公的妹妹穿的比我还像新娘。我一直安慰自己，他们只是感情好，可当我...
茶点故事阅读 62,665评论 5赞 359
恶毒庶女顶嫁案：这布局不是一般人想出来的
文/花漫我一把揭开白布。她就那样静静地躺着，像睡着了一般。火红的嫁衣衬着肌肤如雪。梳的纹丝不乱的头发上，一...
开封第一讲书人阅读 48,045评论 1赞 276
城市分裂传说
那天，我揣着相机与录音，去河边找鬼。笑死，一个胖子当着我的面吹牛，可吹牛的内容都是我干的。我是一名探鬼主播，决...
沈念sama阅读 37,515评论 3赞 390
双鸳鸯连环套：你想象不到人心有多黑
文/苍兰香墨我猛地睁开眼，长吁一口气：“原来是场噩梦啊……” “哼！你这毒妇竟也来了？” 一声冷哼从身侧响起，我...
开封第一讲书人阅读 36,182评论 0赞 254
万荣杀人案实录
序言：老挝万荣一对情侣失踪，失踪者是张志新（化名）和其女友刘颖，没想到半个月后，有当地人在树林里发现了一具尸体，经...
沈念sama阅读 40,334评论 1赞 294
护林员之死
正文独居荒郊野岭守林人离奇死亡，尸身上长有42处带血的脓包…… 初始之章·张勋以下内容为张勋视角年9月15日...
茶点故事阅读 35,274评论 2赞 317
白月光启示录
正文我和宋清朗相恋三年，在试婚纱的时候发现自己被绿了。大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
茶点故事阅读 37,319评论 1赞 329
活死人
序言：一个原本活蹦乱跳的男人离奇死亡，死状恐怖，灵堂内的尸体忽然破棺而出，到底是诈尸还是另有隐情，我是刑警宁泽，带...
沈念sama阅读 33,002评论 3赞 315
日本核电站爆炸内幕
正文年R本政府宣布，位于F岛的核电站，受9级特大地震影响，放射性物质发生泄漏。R本人自食恶果不足惜，却给世界环境...
茶点故事阅读 38,599评论 3赞 303
男人毒药：我在死后第九天来索命
文/蒙蒙一、第九天我趴在偏房一处隐蔽的房顶上张望。院中可真热闹，春花似锦、人声如沸。这庄子的主人今日做“春日...
开封第一讲书人阅读 29,675评论 0赞 19
一桩弑父案，背后竟有这般阴谋
文/苍兰香墨我抬头看了看天上的太阳。三九已至，却和暖如春，着一层夹袄步出监牢的瞬间，已是汗流浃背。一阵脚步声响...
开封第一讲书人阅读 30,917评论 1赞 255
情欲美人皮
我被黑心中介骗来泰国打工，没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留，地道东北人。一个月前我还...
沈念sama阅读 42,309评论 2赞 345
代替公主和亲
正文我出身青楼，却偏偏与公主长得像，于是被迫代替她去往敌国和亲。传闻我的和亲对象是个残疾皇子，可洞房花烛夜当晚...
茶点故事阅读 41,885评论 2赞 341

记录一下ROH热点计算和绘图的过程

画分布热图

推荐阅读更多精彩内容