library(GenomicRanges)
library(chromVAR)
- 输出文件:
file1 = left_join(peakN, peakOverlap, by = c("Histone", "peakType")) %>% mutate(peakReprodRate = peakReprod/peakN * 100)
;
file2 = left_join(inPeakData, alignResult, by = c("Histone", "Replicate")) %>% mutate(frip = inPeakN/MappedFragNum_hg38 * 100)
Number of peaks called
peakN = c()
peakWidth = c()
peakType = c("top0.05", "top0.01")
for(hist in sampleList){
histInfo = strsplit(hist, "_")[[1]]
if(histInfo[1] != "IgG"){
for(type in peakType){
peakInfo = read.table(paste0(projPath, "\\seacr\\", hist, ".seacr_", type, ".peaks.stringent.bed"), header = FALSE, fill = TRUE) %>% mutate(width = abs(V3-V2))
peakN = data.frame(peakN = nrow(peakInfo), peakType = type, Histone = histInfo[1], Replicate = histInfo[2]) %>% rbind(peakN, .)
peakWidth = data.frame(width = peakInfo$width, peakType = type, Histone = histInfo[1], Replicate = histInfo[2]) %>% rbind(peakWidth, .)
}
}
}
peakN %>% select(Histone, Replicate, peakType, peakN)
Reproducibility of the peak across biological replicates
histL = c("A", "W")
repL = paste0("rep", 1:2)
peakType = c("top0.05", "top0.01")
peakOverlap = c()
for(type in peakType){
for(hist in histL){
overlap.gr = GRanges()
for(rep in repL){
peakInfo = read.table(paste0(projPath, "\\seacr\\", hist, "_", rep, ".seacr_", type, ".peaks.stringent.bed"), header = FALSE, fill = TRUE)
peakInfo.gr = GRanges(peakInfo$V1, IRanges(start = peakInfo$V2, end = peakInfo$V3), strand = "*")
if(length(overlap.gr) >0){
overlap.gr = overlap.gr[findOverlaps(overlap.gr, peakInfo.gr)@from]
}else{
overlap.gr = peakInfo.gr
}
}
peakOverlap = data.frame(peakReprod = length(overlap.gr), Histone = hist, peakType = type) %>% rbind(peakOverlap, .)
}
}
peakReprod = left_join(peakN, peakOverlap, by = c("Histone", "peakType")) %>% mutate(peakReprodRate = peakReprod/peakN * 100)
peakReprod %>% select(Histone, Replicate, peakType, peakN, peakReprodNum = peakReprod, peakReprodRate)
FRagment proportion in Peaks regions (FRiPs).
histL = c("A", "W")
repL = paste0("rep", 1:2)
peakType = c("top0.05", "top0.01")
bamDir = ("E:\\bam")
inPeakData = c()
## overlap with bam file to get count
for(type in peakType){
for(hist in histL){
for(rep in repL){
peakRes = read.table(paste0(projPath, "\\seacr\\", hist, "_", rep, ".seacr_", type, ".peaks.stringent.bed"), header = FALSE, fill = TRUE)
peak.gr = GRanges(seqnames = peakRes$V1, IRanges(start = peakRes$V2, end = peakRes$V3), strand = "*")
bamFile = paste0(bamDir, "\\", hist, "_", rep, ".sortedbw.rmDup.bam")
fragment_counts <- getCounts(bamFile, peak.gr, paired = TRUE, by_rg = FALSE, format = "bam")
inPeakN = counts(fragment_counts)[,1] %>% sum
inPeakData = rbind(inPeakData, data.frame(inPeakN = inPeakN, Histone = hist, Replicate = rep, peakType = type))
}
}
}
frip = left_join(inPeakData, alignResult, by = c("Histone", "Replicate")) %>% mutate(frip = inPeakN/MappedFragNum_hg38 * 100) #frip或许是比上清洗后的reads数
frip %>% select(Histone, Replicate, SequencingDepth, MappedFragNum_hg38, AlignmentRate_hg38, FragInPeakNum = inPeakN, FRiPs = frip)
fig7A = peakN %>% ggplot(aes(x = Histone, y = peakN, fill = Histone)) +
geom_boxplot() +
geom_jitter(aes(color = Replicate), position = position_jitter(0.15)) +
facet_grid(~peakType) +
scale_fill_viridis(discrete = TRUE, begin = 0.1, end = 0.55, option = "magma", alpha = 0.8) +
scale_color_viridis(discrete = TRUE, begin = 0.1, end = 0.9) +
theme_bw(base_size = 18) +
ylab("Number of Peaks") +
xlab("")
fig7B = peakWidth %>% ggplot(aes(x = Histone, y = width, fill = Histone)) +
geom_violin() +
facet_grid(Replicate~peakType) +
scale_fill_viridis(discrete = TRUE, begin = 0.1, end = 0.55, option = "magma", alpha = 0.8) +
scale_color_viridis(discrete = TRUE, begin = 0.1, end = 0.9) +
scale_y_continuous(trans = "log", breaks = c(400, 3000, 22000)) +
theme_bw(base_size = 18) +
ylab("Width of Peaks") +
xlab("")
fig7C = peakReprod %>% ggplot(aes(x = Histone, y = peakReprodRate, fill = Histone, label = round(peakReprodRate, 2))) +
geom_bar(stat = "identity") +
geom_text(vjust = 0.1) +
facet_grid(Replicate~peakType) +
scale_fill_viridis(discrete = TRUE, begin = 0.1, end = 0.55, option = "magma", alpha = 0.8) +
scale_color_viridis(discrete = TRUE, begin = 0.1, end = 0.9) +
theme_bw(base_size = 18) +
ylab("% of Peaks Reproduced") +
xlab("")
fig7D = frip %>% ggplot(aes(x = Histone, y = frip, fill = Histone, label = round(frip, 2))) +
geom_boxplot() +
geom_jitter(aes(color = Replicate), position = position_jitter(0.15)) +
scale_fill_viridis(discrete = TRUE, begin = 0.1, end = 0.55, option = "magma", alpha = 0.8) +
scale_color_viridis(discrete = TRUE, begin = 0.1, end = 0.9) +
theme_bw(base_size = 18) +
ylab("% of Fragments in Peaks") +
xlab("")
ggarrange(fig7A, fig7B, fig7C, fig7D, ncol = 2, nrow=2, common.legend = TRUE, legend="bottom")