六、多个重复bam的merge
samtools merge out.bam input1.last.bam input2.last.bam input3.last.bam(#时间会特别久)
七、片段长度分布图
samtools view out.bam | \
awk -F'\t' 'function abs(x){return ((x < 0.0) ? -x : x)} {print $1"\t"abs($9)}' | \
sort | uniq | cut -f2 > fragment..txt
(#接下来在R中进行)
library(tidyverse)
getwd()
data <- read.table("D:/R/ATAC-seq/fragment.duroc.txt")
# 去除含零行
data <- tbl_df(data) %>% filter(V1!=0)
# 设置插入片段长度的阈值,过滤掉太长的片段
length_cutoff <- 1200
fragment <- data$V1[data$V1 <= length_cutoff]
######
##Part1:基础语法画图
######
# 利用直方图统计频数分布,设置柱子个数
breaks_num <- 500
res <- hist(fragment, breaks = breaks_num, plot = FALSE)
# 添加坐标原点
plot(x = c(0, res$breaks),
y = c(0, 0, res$counts) / 10^2,
type = "l", col = "red",full="red",
xlab = "Fragment length(bp)",
ylab = expression(Normalized ~ read ~ density ~ 10^2),
main = "Sample Fragment sizes")
######
##Part2:ggplot2 画图及其拼图
######
## 不同数据分布
library("ggplot2")
DATA <- data.frame(x1 = c(0, res$breaks),y1=c(0, 0, res$counts) / 10^2)
p1 <- ggplot(DATA,aes(x =x1,y = y1 ))+
geom_line(col="red")+
xlab("Fragment length(bp)")+
ylab(expression(Normalized ~ read ~ count ~ 10^2))+
ggtitle("Sample Fragment sizes")+
theme_classic()
## 画小图
DATA2 <- data.frame(x1 = c(0, res$breaks),y1=log10(c(0, 0, res$counts) / 10^2)+1)
p2 <- ggplot(DATA2,aes(x =x1,y = y1 ))+
geom_line(col="red")+
xlab("Fragment length(bp)")+
ylab(expression(Normalized ~ read ~ count ~ (log)))+
ggtitle("Sample Fragment sizes")+
theme_classic()
## 小图插入右上角
library(cowplot)
ggdraw() +
draw_plot(p1, 0, 0, 1, 1) +
draw_plot(p2, 0.5, 0.52, 0.5, 0.4) +
draw_plot_label(c("A", "B"), c(0, 0.5), c(1, 0.92), size = 15)
八、peak注释可视化
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(version = "3.11")
BiocManager::install("ChIPseeker")
BiocManager::install("org.Ss.eg.db") #此处为猪的物种包
BiocManager::install("clusterProfiler")
BiocManager::install("ReactomePA")
BiocManager::install("DOSE")
BiocManager::install("Biobase")
BiocManager::install("RMariaDB")
BiocManager::install("GenomicFeatures")
BiocManager::install("IRanges")
setwd("wwh/4chipseeker")
library("Biobase")
library("RMariaDB")
library("GenomicFeatures")
ss11 <- makeTxDbFromGFF("wwh/ATAC_en/Sus_scrofa.Sscrofa11.1.106.gff3.gz")
library("ChIPseeker")
library("org.Ss.eg.db")
library("clusterProfiler")
library("UpSetR")
library("ggupset")
library("ggplot2")
library("ggplotify")
library("IRanges")
luchuan1 <- readPeakFile("wuwh/ATAC_en/peaks/luchuan1_peaks.narrowPeak")
luchuan2 <- readPeakFile("/wuwh/ATAC_en/peaks/luchuan2_peaks.narrowPeak")
luchuan3 <- readPeakFile("/wuwh/ATAC_en/peaks/luchuan3_peaks.narrowPeak")
duroc2 <- readPeakFile("/wuwh/ATAC_en/peaks/duroc2_peaks.narrowPeak")
duroc3 <- readPeakFile("/wuwh/ATAC_en/peaks/duroc3_peaks.narrowPeak")
duroc1 <- readPeakFile("/wuwh/ATAC_en/peaks/duroc1_peaks.narrowPeak")
pdf(file="peak1.pdf",width=10,height=20 )
covplot(luchuan1,weightCol=5)
dev.off()
pdf(file="peak2.pdf",width=10,height=20 )
covplot(duroc1,weightCol=5)
dev.off()
#Heatmap of ChIP binding to TSS regions
peaks <- list(luchuan1=luchuan1,luchuan2=luchuan2,luchuan3=luchuan3,duroc1=duroc1,duroc2=duroc2,duroc3=duroc3)
pdf(file="heatmap.pdf",width=10,height=20)
peakHeatmap(peaks, weightCol="V5", TxDb=ss11, upstream=3000, downstream=3000, color=rainbow(length(peaks)))
dev.off()
promoter <- getPromoters(TxDb=ss11, upstream=3000, downstream=3000)
tagMatrix <- getTagMatrix(duroc1, windows=promoter)
pdf(file="AvgProf.pdf")
plotAvgProf(tagMatrix, xlim=c(-3000, 3000),xlab="Genomic Region (5'->3')", ylab = "Read Count Frequency")
dev.off()
promoter <- getPromoters(TxDb=gg7, upstream=3000, downstream=3000)
tagMatrix <- getTagMatrix(nc2_peak, windows=promoter)
pdf(file="AvgProf2.pdf")
plotAvgProf(tagMatrix, xlim=c(-3000, 3000),xlab="Genomic Region (5'->3')", ylab = "Read Count Frequency")
dev.off()
peakAnnoList <- lapply(peaks, annotatePeak, tssRegion = c(-3000, 3000), TxDb = ss11, level = "transcript")
pdf(file="bar.pdf")
plotAnnoBar(peakAnnoList)
dev.off()
pdf(file="tos.pdf")
plotDistToTSS(peakAnnoList)
dev.off()
pdf(file="1u.pdf")
upsetplot(peakAnnoList$luchuan1)
dev.off()
pdf(file="2u.pdf")
upsetplot(peakAnnoList$duroc1)
dev.off()
pdf(file="1v.pdf")
vennpie(peakAnnoList$luchuan1)
dev.off()
pdf(file="2v.pdf")
vennpie(peakAnnoList$duroc1)
dev.off()