R| Agilent芯片分析-limma

Aglient的芯片在科研界也是一大宠儿，通常根据其染色分为单通道和多通道两种。最为奇葩的是Aglient芯片的许多表达矩阵下载后发现有空值、负值，因此就要求我们从原始数据开始着手。下面就一起学习下吧。

核心函数：

read.maimages(raw_datas, source = "agilent", green.only = T, other.columns = "gIsWellAboveBG")

1.单通道芯片

以下以GSE23558为例，是《aglient芯片原始数据处理》的学习笔记。

1.1 数据下载及读取

rm(list = ls())
library(tidyverse)
library(limma)
library(GEOquery)
library(AnnoProbe)
gse="GSE23558"
#setwd(gse)
#geoChina(gse)
load("D:/jianshu/microarry-analysis/GSE23558/GSE23558_eSet.Rdata") #提取原始数据

# 分组信息
pd <- pData(gset[[1]])
raw_dir <- "D:/jianshu/microarry-analysis/GSE23558/GSE23558_RAW"
raw_datas <- paste0(raw_dir,"/",list.files(raw_dir))
raw_order <- str_extract(raw_datas,"GSM\\d*")
pd <- pd[match(raw_order,rownames(pd)),]
pd <- pd %>% 
  select(geo_accession,`tissue:ch1`)
colnames(pd) <- c("id","type")
pd$type <- case_when(pd$type=="Oral Tumor"~"tumor",
                     T~"normal")
pd$type <- factor(pd$type,levels = c("normal","tumor"))
group_list <- pd$type
names(group_list) <- pd$id

#原始数据读取
data.raw <- read.maimages(raw_datas,
                          source = "agilent",
                          green.only = T,
                          other.columns = "gIsWellAboveBG")

## Read D:/jianshu/microarry-analysis/GSE23558/GSE23558_RAW/GSM577914.txt.gz 
## Read D:/jianshu/microarry-analysis/GSE23558/GSE23558_RAW/GSM577915.txt.gz 
## Read D:/jianshu/microarry-analysis/GSE23558/GSE23558_RAW/GSM577916.txt.gz 
......
## Read D:/jianshu/microarry-analysis/GSE23558/GSE23558_RAW/GSM577943.txt.gz 
## Read D:/jianshu/microarry-analysis/GSE23558/GSE23558_RAW/GSM577944.txt.gz 
## Read D:/jianshu/microarry-analysis/GSE23558/GSE23558_RAW/GSM577945.txt.gz

1.2 背景矫正和标准化

data.bg <- backgroundCorrect(data.raw,method = "normexp")

## Array 1 corrected
## Array 2 corrected
## Array 3 corrected
.....
## Array 29 corrected
## Array 30 corrected
## Array 31 corrected
## Array 32 corrected

data.norm <- normalizeBetweenArrays(data.bg,method = "quantile")

1.3 基因过滤

去掉对照探针、未匹配到genesymbol探针、去表达探针（至少在一般样本中高于背景）、重复探针。

ctrl <- data.norm$genes$ControlType==1L
Nosymbol <- is.na(data.norm$genes$GeneName)
IsExpr <- rowSums(data.norm$other$gIsWellAboveBG>0)>= nrow(pd)/2
Isdup <- duplicated(data.norm$genes$GeneName)
data.filt <- data.norm[!ctrl&!Nosymbol&IsExpr&!Isdup,]
dim(data.filt)

## [1] 20650    32

过滤后剩余2万零650个探针。

1.4 表达矩阵

data.exp <- data.filt@.Data[[1]]
library(RColorBrewer)
colors <- brewer.pal(12,"Set3")
boxplot(data.exp,col=colors,las=3)

image.png

colnames(data.exp) <- str_extract(colnames(data.exp),"GSM\\d*")

1.5 获得基因名

anno <- data.filt$genes
nrow(anno);nrow(data.exp)

## [1] 20650

## [1] 20650

rownames(data.exp)=anno$GeneName
ids <- unique(anno$GeneName)
data.exp <- data.exp[!duplicated(anno$GeneName),]

其实，整个过程相当于对作者上传的标准化矩阵进行了修复。

1.6 差异分析

design <- model.matrix(~group_list)
fit <- lmFit(data.exp,design)
fit1 <- eBayes(fit,trend = T,robust=T)
summary(decideTests(fit))

##        (Intercept) group_listtumor
## Down             0            2090
## NotSig           0           16887
## Up           20650            1673

options(digits = 4)
deg <- topTable(fit1,coef = 2,n=dim(data.exp)[1])
boxplot(data.exp[rownames(deg)[1],]~group_list)

image.png

2.双通道芯片

rm(list = ls())
gse="GSE29609"
library(limma)
library(AnnoProbe)
#setwd(gse)
#geoChina(gse)
load("D:/jianshu/microarry-analysis/GSE29609_eSet.Rdata")
pd <- Biobase::pData(gset[[1]])
raw_dir <- "D:/jianshu/microarry-analysis/GSE29609/GSE29609_RAW"

raw_datas <- paste0(raw_dir,"/",list.files(raw_dir,pattern = "GSM\\d*"))
raw_order <- str_extract(raw_datas,"GSM\\d*")
pd <- pd[match(raw_order,rownames(pd)),]

#原始数据读取
data.raw <- read.maimages(raw_datas,
                          source = "agilent",
                          green.only = F,
                          other.columns = "gIsWellAboveBG")

## Read D:/jianshu/microarry-analysis/GSE29609/GSE29609_RAW/GSM733579_US22502565_251239115211_S01_A01_GE2_44k_1005.txt.gz 
## Read D:/jianshu/microarry-analysis/GSE29609/GSE29609_RAW/GSM733580_US22502565_251239125482_S01_A01_GE2_44k_1005.txt.gz 
## Read D:/jianshu/microarry-analysis/GSE29609/GSE29609_RAW/GSM733581_US22502565_251239144561_S01_A01_GE2_44k_1005.txt.gz 
......
## Read D:/jianshu/microarry-analysis/GSE29609/GSE29609_RAW/GSM733615_US22502565_251239125485_S01_A01_GE2_44k_1005.txt.gz 
## Read D:/jianshu/microarry-analysis/GSE29609/GSE29609_RAW/GSM733616_US22502565_251239115213_S01_A01_GE2_44k_1005.txt.gz 
## Read D:/jianshu/microarry-analysis/GSE29609/GSE29609_RAW/GSM733617_US22502565_251239144552_S01_A01_GE2_44k_1005.txt.gz

2.1 背景矫正、标准化

data.bg <- backgroundCorrect(data.raw,method = "normexp")

## Array 1 corrected
## Array 2 corrected
## Array 3 corrected
......
## Array 37 corrected
## Array 38 corrected
## Array 39 corrected
## Array 1 corrected
## Array 2 corrected
## Array 3 corrected
......
## Array 37 corrected
## Array 38 corrected
## Array 39 corrected

data.norm <- normalizeBetweenArrays(data.bg,method = "quantile")

ctrl <- data.norm$genes$ControlType==1L
Nosymbol <- is.na(data.norm$genes$GeneName)
#IsExpr <- rowSums(data.norm$other$gIsWellAboveBG>0)>= nrow(pd)/2
Isdup <- duplicated(data.norm$genes$GeneName)
data.filt <- data.norm[!ctrl&!Nosymbol&!Isdup,]
dim(data.filt)

## [1] 31036    39

data.exp <- data.filt@.Data[[4]]
library(RColorBrewer)
colors <- brewer.pal(12,"Set3")
boxplot(data.exp,col=colors,las=3)

image.png

疑问：双通道芯片，我是按照单通道的处理的，不知道是否正确？希望得到大神的指导，万分感谢。

参考链接：

aglient芯片原始数据处理

双通道芯片MA和density

人面猴
序言：七十年代末，一起剥皮案震惊了整个滨河市，随后出现的几起案子，更是在滨河造成了极大的恐慌，老刑警刘岩，带你破解...
沈念sama阅读 203,547评论 6赞 477
死咒
序言：滨河连续发生了三起死亡事件，死亡现场离奇诡异，居然都是意外死亡，警方通过查阅死者的电脑和手机，发现死者居然都...
沈念sama阅读 85,399评论 2赞 381
救了他两次的神仙让他今天三更去死
文/潘晓璐我一进店门，熙熙楼的掌柜王于贵愁眉苦脸地迎上来，“玉大人，你说我怎么就摊上这事。” “怎么了？”我有些...
开封第一讲书人阅读 150,428评论 0赞 337
道士缉凶录：失踪的卖姜人
文/不坏的土叔我叫张陵，是天一观的道长。经常有香客问我，道长，这世上最难降的妖魔是什么？我笑而不...
开封第一讲书人阅读 54,599评论 1赞 274
港岛之恋（遗憾婚礼）
正文为了忘掉前任，我火速办了婚礼，结果婚礼上，老公的妹妹穿的比我还像新娘。我一直安慰自己，他们只是感情好，可当我...
茶点故事阅读 63,612评论 5赞 365
恶毒庶女顶嫁案：这布局不是一般人想出来的
文/花漫我一把揭开白布。她就那样静静地躺着，像睡着了一般。火红的嫁衣衬着肌肤如雪。梳的纹丝不乱的头发上，一...
开封第一讲书人阅读 48,577评论 1赞 281
城市分裂传说
那天，我揣着相机与录音，去河边找鬼。笑死，一个胖子当着我的面吹牛，可吹牛的内容都是我干的。我是一名探鬼主播，决...
沈念sama阅读 37,941评论 3赞 395
双鸳鸯连环套：你想象不到人心有多黑
文/苍兰香墨我猛地睁开眼，长吁一口气：“原来是场噩梦啊……” “哼！你这毒妇竟也来了？” 一声冷哼从身侧响起，我...
开封第一讲书人阅读 36,603评论 0赞 258
万荣杀人案实录
序言：老挝万荣一对情侣失踪，失踪者是张志新（化名）和其女友刘颖，没想到半个月后，有当地人在树林里发现了一具尸体，经...
沈念sama阅读 40,852评论 1赞 297
护林员之死
正文独居荒郊野岭守林人离奇死亡，尸身上长有42处带血的脓包…… 初始之章·张勋以下内容为张勋视角年9月15日...
茶点故事阅读 35,605评论 2赞 321
白月光启示录
正文我和宋清朗相恋三年，在试婚纱的时候发现自己被绿了。大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
茶点故事阅读 37,693评论 1赞 329
活死人
序言：一个原本活蹦乱跳的男人离奇死亡，死状恐怖，灵堂内的尸体忽然破棺而出，到底是诈尸还是另有隐情，我是刑警宁泽，带...
沈念sama阅读 33,375评论 4赞 318
日本核电站爆炸内幕
正文年R本政府宣布，位于F岛的核电站，受9级特大地震影响，放射性物质发生泄漏。R本人自食恶果不足惜，却给世界环境...
茶点故事阅读 38,955评论 3赞 307
男人毒药：我在死后第九天来索命
文/蒙蒙一、第九天我趴在偏房一处隐蔽的房顶上张望。院中可真热闹，春花似锦、人声如沸。这庄子的主人今日做“春日...
开封第一讲书人阅读 29,936评论 0赞 19
一桩弑父案，背后竟有这般阴谋
文/苍兰香墨我抬头看了看天上的太阳。三九已至，却和暖如春，着一层夹袄步出监牢的瞬间，已是汗流浃背。一阵脚步声响...
开封第一讲书人阅读 31,172评论 1赞 259
情欲美人皮
我被黑心中介骗来泰国打工，没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留，地道东北人。一个月前我还...
沈念sama阅读 43,970评论 2赞 349
代替公主和亲
正文我出身青楼，却偏偏与公主长得像，于是被迫代替她去往敌国和亲。传闻我的和亲对象是个残疾皇子，可洞房花烛夜当晚...
茶点故事阅读 42,414评论 2赞 342

R| Agilent芯片分析-limma

核心函数：

1.单通道芯片

1.1 数据下载及读取

1.2 背景矫正和标准化

1.3 基因过滤

1.4 表达矩阵

1.5 获得基因名

1.6 差异分析

2.双通道芯片

2.1 背景矫正、标准化

疑问：双通道芯片，我是按照单通道的处理的，不知道是否正确？希望得到大神的指导，万分感谢。

推荐阅读更多精彩内容