环境与生态统计||探索性数据分析

rm(list=ls())
source("D:\\Users\\Administrator\\Desktop\\RStudio\\Environmental_and_Ecological_Statistics_with_R\\DataCode\\Code\\FrontMatter.R")

##################
### Chapter 3  ###
##################
plotDIRch3 <- paste (plotDIR, "chapter3", "figures", sep="/")
if(file_test("-d", plotDIRch3)){cat("plotDIRch3 already exists")}else{dir.create(plotDIRch3,recursive = TRUE)}

##### Everglades reference sites
wca2tp <- read.csv(paste(dataDIR, "WCA2TP.csv", sep="/"), header=T)
wca2tp$TP <- 1000*wca2tp$RESULT
TP.reference <- wca2tp[wca2tp$Type=="R",]
TP.reference$SITE <- as.vector( TP.reference$SITE)
TP.reference$Month <- ordered(months(as.Date(TP.reference$Date, "%m/%d/%y"), T),
  levels=c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"))
展示分布图形
par(mfrow=c(1,2), mar=c(2.5,2.5,.5,1), mgp=c(1.25, 0.125, 0), las=1, tck=0.01)
hist(log(TP.reference$TP), dens=-1, xlab="TP (ppb)",
     ylab="", main="", nclass=20)
hist(log(TP.reference$TP), dens=-1, xlab="TP (ppb)",
     ylab="", main="", nclass=10)

直方图会受分组个数的影响。

分位数图
TP.example<- c(0.21, 0.35, 0.50, 0.64, 0.79,0.90, 1.00, 1.01, 1.12, 5.66)
#postscript(file=paste(plotDIR,"evergQplot.eps", sep="/"),
#           height=2, width=3, horizontal=F)
#tikz(file=paste(plotDIRch3,"evergQplot.tex", sep="/"),
#           height=2, width=3, standAlone=F)
par(mar=c(2.5,2.5,.5,1), mgp=c(1.25, 0.125, 0), las=1, tck=0.01)
plot((1:length(TP.example) -0.5)/length(TP.example), TP.example, xlab="f", ylab="TP (ppb)")
Tukey 的箱线图
## Figure \ref{fig:clevelandBox} 3.6
data <-
  c(0.9, 1.6, 2.26305, 2.55052, 2.61059, 2.69284, 2.78511, 2.80955,
    2.94647, 2.96043, 3.05728, 3.15748, 3.18033, 3.20021,
    3.20156, 3.24435, 3.33231, 3.34176, 3.3762, 3.39578, 3.4925,
    3.55195, 3.56207, 3.65149, 3.72746, 3.73338, 3.73869,
    3.80469, 3.85224, 3.91386, 3.93034, 4.02351, 4.03947,
    4.05481, 4.10111, 4.26249, 4.28782, 4.37586, 4.48811,
    4.6001, 4.65677, 4.66167, 4.73211, 4.80803, 4.9812, 5.17246,
    5.3156, 5.35086, 5.36848, 5.48167, 5.68, 5.98848, 6.2, 7.1,
    7.4)

## explaining the box plot
uq <- quantile(data,.75)
lq <- quantile(data,.25)
r <- 1.5*(uq-lq)
h <- c(lq-r,1.6,lq,uq,6.2,uq+r)
writing <- c("lower quartile - 1.5 r",
             "lower adjacent value",
             "lower quartile",
             "upper quartile",
             "upper adjacent value",
             "upper quartile + 1.5 r")

n <- length(data)

#postscript(file=paste(plotDIR, "explainBox.eps", sep="/"),
#           width=4.75, height=2.25, horizontal=F)
#tikz(file=paste(plotDIRch3, "explainBox.tex", sep="/"),
#          width=4.75, height=2.25, standAlone=F)
par(mfrow=c(1,2), mar=c(2.5,2.5,.25,0.125), mgp=c(1.25, 0.125, 0),
    las=1, tck=0.01)
            #layout(matrix(c(1,2), nrow=1), width=c(1,1.5))
boxplot(data, rep(NA, length(data)), rep(NA, length(data)), ylab = "Data")
usr <- par("usr")
x <- usr[1] + (usr[2] - usr[1]) * 1/3
at <- c(0.9, 1.6, 3.2, 3.8, 4.65, 6.2, 7.2)
arrows(rep(x * 1.15, 7), at, rep(x, 7), at, length=0.075)
mtext("Main Title",1,1,cex=.8)
text(rep(x * 1.2, 7), at, adj = 0, cex=0.7,
     labels = c("outside value", "lower adjacent value",
       "lower quartile", "median", "upper quartile",
       "upper adjacent value", "outside values"))

plot(((1:n)-0.5)/n, data, xlab="f-value", ylab="Data")
abline(h = h, lwd =2, col = gray(0.85))
text(rep(0,3), h[4:6], writing[4:6], adj=0, cex=0.75)
text(rep(1,3), h[1:3], writing[1:3], adj=1, cex=0.75)
points(((1:n)-0.5)/n, data)
比较分布的图形
## Figure \ref{fig:addVmlt} 3.7
### Q-Q plot for additive and multiplicative shifts
x.data <- rnorm(500)
y.data <- rnorm(500, 2)
#postscript(file=paste(plotDIR,"additive.eps", sep="/"),
#           height=4, width=3.5, horizontal=F)
#tikz(file=paste(plotDIRch3,"additive.tex", sep="/"),
#     height=4, width=3.5, standAlone=F)
layout(matrix(c(1,3,2,3), nrow=2), heights=c(1,2), respect=T)
par(mar=c(2.5,2.5,.5,0.5), mgp=c(1.25, 0.125, 0), las=1, tck=0.01)
hist(x.data, xlim=range(c(x.data,y.data)), xlab="x", ylab="", main="", prob=T)
curve(dnorm(x), add=T, from=-3, to=3)
hist(y.data, xlim=range(c(x.data,y.data)), xlab="y", ylab="", main="", prob=T)
curve(dnorm(x, mean=2), add=T, from=-1, to=5)
qqplot(x.data, y.data, xlim=range(c(x.data,y.data)),
       ylim=range(c(x.data,y.data)), xlab="x", ylab="y")
abline(0,1, col=gray(0.5))
abline(2,1, col=gray(0.5))
x.data2 <- exp(rnorm(100))
y.data2 <- exp(rnorm(100, 1))

#postscript(file=paste(plotDIR, "multiplicative.eps", sep="/"),
#           height=4, width=3.5, horizontal=F)
#tikz(file=paste(plotDIRch3, "multiplicative.tex", sep="/"),
#     height=4, width=3.5, standAlone=F)
layout(matrix(c(1,3,2,3), nrow=2), heights=c(1,2), respect=T)
par(mar=c(2.5,2.5,.5,0.5), mgp=c(1.25, 0.125, 0), las=1, tck=0.01)
hist(x.data2, xlim=range(c(x.data2,y.data2)),
     xlab="x", ylab="", main="", prob=T, nclass=10)
curve(dlnorm(x), add=T, from=exp(-3), to=exp(3))
hist(y.data2, xlim=range(c(x.data2,y.data2)),
     xlab="y", ylab="", main="", prob=T, nclass=10)
curve(dlnorm(x, mean=1), add=T, from=exp(-2), to=exp(4))
qqplot(log(x.data2), log(y.data2),  xlab="x", ylab="y")
abline(0,1, col=gray(0.5))
layout(rbind(c(1,2,0,4,5), c(3,3,0,6,6)), widths=c(1,1,lcm(0.5), 1,1),
       heights=c(1,2), respect=T)
par(mar=c(2.5,2.5,.5,0.5), mgp=c(1.25, 0.125, 0), las=1, tck=0.01)

hist(x.data, xlim=range(c(x.data,y.data)), xlab="x", ylab="", main="", prob=T)
curve(dnorm(x), add=T, from=-3, to=3)
hist(y.data, xlim=range(c(x.data,y.data)), xlab="y", ylab="", main="", prob=T)
curve(dnorm(x, mean=2), add=T, from=-1, to=5)
qqplot(x.data, y.data, xlim=range(c(x.data,y.data)),
       ylim=range(c(x.data,y.data)),xlab="x", ylab="y")
abline(0,1, col=gray(0.5))
abline(2,1, col=gray(0.5))

hist(x.data2, xlim=range(c(x.data2,y.data2)),
     xlab="x", ylab="", main="", prob=T, nclass=10)
curve(dlnorm(x), add=T, from=exp(-3), to=exp(3))
hist(y.data2, xlim=range(c(x.data2,y.data2)),
     xlab="y", ylab="", main="", prob=T, nclass=10)
curve(dlnorm(x, mean=1), add=T, from=exp(-2), to=exp(4))
qqplot(x.data2, y.data2, xlim=range(c(x.data2,y.data2)),
       ylim=range(c(x.data2,y.data2)), xlab="x", ylab="y")
abline(0,1, col=gray(0.5))
识别变量间的依存关系的图形
## Car.test.frame in package rpart
packages(rpart)
names(car.test.frame)
#postscript(file=paste(plotDIR, "carsTest.eps", sep="/"),
#           width=4.75, height=2, horizontal=F)
#tikz(file=paste(plotDIRch3, "carsTest.tex", sep="/"),
 #          width=4.75, height=2, standAlone=F)
par(mfrow=c(1,2), mar=c(2.5,2.5,.5,0.5),
    mgp=c(1.25, 0.125, 0), las=1, tck=0.01)
plot(Mileage ~ Weight, data=car.test.frame,
     xlab="Weight (lb)", ylab="Mileage (mpg)")
abline(lsfit(car.test.frame$Weight, car.test.frame$Mileage))

plot(1/Mileage ~ Weight, data=car.test.frame,
     xlab="Weight (lb)", ylab="Mileage (mpg)")
abline(lsfit(car.test.frame$Weight, car.test.frame$Mileage),
       col=gray(0.5), lty=2)
cars.lo <- loess(I(1/Mileage) ~ Weight, data = car.test.frame)
oo <- order(cars.lo$x)
lines(cars.lo$x[oo], cars.lo$fitted[oo])
散点图矩阵
## Car.test.frame in package rpart

panel.hist = function(x, col.hist="grey", ...)
  {
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(usr[1:2], 0, 1.5) )
    h <- hist(x, plot = FALSE)
    breaks <- h$breaks; nB <- length(breaks)
    y <- h$counts; y <- y/max(y)
    rect(breaks[-nB], 0, breaks[-1],y, col=col.hist)
  }

#postscript(file=paste(plotDIR, "airquality.eps", sep="/"),
#           height=4.5, width=4.5, horizontal=F)
#tikz(file=paste(plotDIRch3, "airquality.tex", sep="/"),
#           height=4.5, width=4.5, standAlone=F)
pairs(Ozone~Solar.R+Wind+Temp, data=airquality,
      panel=function(x, y, ...){
        points(x, y,  ...)
        panel.smooth(x, y, col.smooth=1, ...)
      }, col=gray(0.5), cex=0.5, diag.panel=panel.hist)
### the iris data
Snames <- dimnames(iris3)[[3]]
iris.df <- rbind(iris3[,,1],iris3[,,2],iris3[,,3])
iris.df <- as.data.frame(iris.df)
iris.df$Species <- factor(rep(Snames,rep(50,3)))
#pairs(iris.df[1:4],main = "Anderson?s Iris Data",
#pch = c("<", "+", ">")[unclass(iris$Species)],
#col = c(gray(0.25),gray(0.5),gray(0.25))[unclass(iris$Species)])

#postscript(file=paste(plotDIR, "irisPairs.eps", sep="/"),
#           height=4.5, width=4.5, horizontal=F)
#tikz(file=paste(plotDIRch3, "irisPairs.tex", sep="/"),
 #          height=4.5, width=4.5, standAlone=F)
pairs(iris.df[1:4],main = "The Iris Data",
      pch = c(2, 4, 6)[unclass(iris$Species)],
      col = c("#BFBFBF", "#808080", "#404040")[unclass(iris$Species)])
变量转换
nadb <- read.csv(paste(dataDIR, "nadb.csv", sep="/"), header=T)
par(mfrow=c(1,2), mar=c(2.5,2.5,.5,0.5), mgp=c(1.5, 0.5, 0))
plot(TPOut ~ PLI, data=nadb,
     xlab="P Loading", ylab="P Concentration", cex=0.5)
plot(TPOut ~ PLI, data=nadb,
     xlab="P Loading", ylab="P Concentration", log="x", axes=F, cex=0.5)
axis(1, at=c(0.01, 1, 100), label=c("0.01", "1", "100"))
axis(2)
box()
  • 幂转换
powerT <- function(y, lambda1=c(-1,-1/2,-1/4), lambda2 = c(1/4, 1/2, 1), layout1=2){
  nt <- length(lambda1)+length(lambda2)+1
  transformed <- cbind(outer(y,lambda1,"^"),log(y),(outer(y,lambda2,"^")))
  y.power <- data.frame(transformed=c(transformed),
                        lambda = factor(rep(round(c(lambda1,0,lambda2), 2),
                          rep(length(y),nt))))
  ans <- qqmath(~transformed | lambda,
                data=y.power,
                prepanel = prepanel.qqmathline,
                panel = function(x, ...) {
                  panel.grid(h = 0)
                  panel.qqmath(x, col=gray(0.5))
                  panel.qqmathline(x, distribution = qnorm)
                }, aspect=1, scale = list(y = "free"),
                layout=c(layout1,ceiling(nt/layout1)),
                xlab = "Unit Normal Quantile",
                ylab = "y")
  ans
}
trellis.par.set(theme = canonical.theme("postscript", col=FALSE))
powerT(y=nadb$PLI)
  • 双变量散点图
pmdata<-read.table(paste(dataDIR, "PM-RAW-DATA.txt", sep="/"), header=T)
pmdata$log.value<-log(pmdata$value)
xyplot(log.value~AvgTemp, panel=function(x,y,...){
  panel.grid()
  panel.xyplot(x,y, col=grey(0.65), cex=0.5, ...)
  panel.loess(x,y,span=1, degree=1,col=1,...)
}, scales=list(x=list(cex=0.75), y=list(cex=0.75)),
       par.settings=trellis.par.temp,
       data=pmdata, xlab="Average Daily Temperature (F)", ylab="Log PM2.5")
## traditional coplot
coplot(sqrt(Ozone) ~ Solar.R|Wind*Temp , data=airquality,
       given.values=list(co.intervals(airquality$Wind, 3, 0.25),
         co.intervals(airquality$Temp, 3, 0.25)),
       panel=function(x, y, ...)
       panel.smooth(x, y, span=1, ...),
       ylab="Log Ozone",
       xlab=c("Solar Radiation", "Wind","Temperature"))

WindSpeed <- equal.count(airquality$Wind, 3, 0.25)
Temperature <- equal.count(airquality$Temp, 3, 0.25)
SolarR <- equal.count(airquality$Solar.R, 3, 0.25)
print(
xyplot(sqrt(Ozone) ~ SolarR|WindSpeed*Temperature,
       data=airquality,
       panel=function(x,y,...){
#            panel.loess(x, y, span=1, degree=1, ...)
            panel.grid()
            panel.lmline(x, y, col="grey",...)
            panel.xyplot(x, y, col=1, cex=0.5, ...)
       },
       ylab=list(label="Sqrt Ozone", cex=0.6),
       xlab=list(label="Solar Radiation", cex=0.6),
       scales=list(x=list(alternating=c(1, 2, 1))),
#       between=list(y=1),
       par.strip.text=list(cex=0.4), aspect=1,
       par.settings=list(axis.text=list(cex=0.4)))
)
print(
      xyplot(sqrt(Ozone) ~ Wind|SolarR*Temperature,
             data=airquality,
             panel=function(x,y,...){
#            panel.loess(x, y, span=1, degree=1, ...)
               panel.grid()
               panel.lmline(x, y, col="grey",...)
               panel.xyplot(x, y, col=1, cex=0.5, ...)
             },
             ylab=list(label="Sqrt Ozone", cex=0.6),
             xlab=list(label="Wind Speed", cex=0.6),
             scales=list(x=list(alternating=c(1, 2, 1))),
                                        #       between=list(y=1),
             par.strip.text=list(cex=0.4), aspect=1,
             par.settings=list(axis.text=list(cex=0.4)))
      )
print(
xyplot(sqrt(Ozone) ~ Temp|WindSpeed*SolarR,
       data=airquality,
       panel=function(x,y,...){
#            panel.loess(x, y, span=1, degree=1, ...)
            panel.grid()
            panel.lmline(x, y, col="grey",...)
            panel.xyplot(x, y, col=1, cex=0.5, ...)
       },
       ylab=list(label="Sqrt Ozone", cex=0.6),
       xlab=list(label="Temperature", cex=0.6),
       scales=list(x=list(alternating=c(1, 2, 1))),
#       between=list(y=1),
       par.strip.text=list(cex=0.4), aspect=1,
       par.settings=list(axis.text=list(cex=0.4)))
)
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 202,607评论 5 476
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 85,047评论 2 379
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 149,496评论 0 335
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 54,405评论 1 273
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 63,400评论 5 364
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 48,479评论 1 281
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 37,883评论 3 395
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 36,535评论 0 256
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 40,743评论 1 295
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 35,544评论 2 319
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 37,612评论 1 329
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 33,309评论 4 318
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 38,881评论 3 306
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 29,891评论 0 19
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 31,136评论 1 259
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 42,783评论 2 349
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 42,316评论 2 342

推荐阅读更多精彩内容