环境与生态统计||探索性数据分析

rm(list=ls())
source("D:\\Users\\Administrator\\Desktop\\RStudio\\Environmental_and_Ecological_Statistics_with_R\\DataCode\\Code\\FrontMatter.R")

##################
### Chapter 3  ###
##################
plotDIRch3 <- paste (plotDIR, "chapter3", "figures", sep="/")
if(file_test("-d", plotDIRch3)){cat("plotDIRch3 already exists")}else{dir.create(plotDIRch3,recursive = TRUE)}

##### Everglades reference sites
wca2tp <- read.csv(paste(dataDIR, "WCA2TP.csv", sep="/"), header=T)
wca2tp$TP <- 1000*wca2tp$RESULT
TP.reference <- wca2tp[wca2tp$Type=="R",]
TP.reference$SITE <- as.vector( TP.reference$SITE)
TP.reference$Month <- ordered(months(as.Date(TP.reference$Date, "%m/%d/%y"), T),
  levels=c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"))

展示分布图形

par(mfrow=c(1,2), mar=c(2.5,2.5,.5,1), mgp=c(1.25, 0.125, 0), las=1, tck=0.01)
hist(log(TP.reference$TP), dens=-1, xlab="TP (ppb)",
     ylab="", main="", nclass=20)
hist(log(TP.reference$TP), dens=-1, xlab="TP (ppb)",
     ylab="", main="", nclass=10)

直方图会受分组个数的影响。

分位数图

TP.example<- c(0.21, 0.35, 0.50, 0.64, 0.79,0.90, 1.00, 1.01, 1.12, 5.66)
#postscript(file=paste(plotDIR,"evergQplot.eps", sep="/"),
#           height=2, width=3, horizontal=F)
#tikz(file=paste(plotDIRch3,"evergQplot.tex", sep="/"),
#           height=2, width=3, standAlone=F)
par(mar=c(2.5,2.5,.5,1), mgp=c(1.25, 0.125, 0), las=1, tck=0.01)
plot((1:length(TP.example) -0.5)/length(TP.example), TP.example, xlab="f", ylab="TP (ppb)")

Tukey 的箱线图

## Figure \ref{fig:clevelandBox} 3.6
data <-
  c(0.9, 1.6, 2.26305, 2.55052, 2.61059, 2.69284, 2.78511, 2.80955,
    2.94647, 2.96043, 3.05728, 3.15748, 3.18033, 3.20021,
    3.20156, 3.24435, 3.33231, 3.34176, 3.3762, 3.39578, 3.4925,
    3.55195, 3.56207, 3.65149, 3.72746, 3.73338, 3.73869,
    3.80469, 3.85224, 3.91386, 3.93034, 4.02351, 4.03947,
    4.05481, 4.10111, 4.26249, 4.28782, 4.37586, 4.48811,
    4.6001, 4.65677, 4.66167, 4.73211, 4.80803, 4.9812, 5.17246,
    5.3156, 5.35086, 5.36848, 5.48167, 5.68, 5.98848, 6.2, 7.1,
    7.4)

## explaining the box plot
uq <- quantile(data,.75)
lq <- quantile(data,.25)
r <- 1.5*(uq-lq)
h <- c(lq-r,1.6,lq,uq,6.2,uq+r)
writing <- c("lower quartile - 1.5 r",
             "lower adjacent value",
             "lower quartile",
             "upper quartile",
             "upper adjacent value",
             "upper quartile + 1.5 r")

n <- length(data)

#postscript(file=paste(plotDIR, "explainBox.eps", sep="/"),
#           width=4.75, height=2.25, horizontal=F)
#tikz(file=paste(plotDIRch3, "explainBox.tex", sep="/"),
#          width=4.75, height=2.25, standAlone=F)
par(mfrow=c(1,2), mar=c(2.5,2.5,.25,0.125), mgp=c(1.25, 0.125, 0),
    las=1, tck=0.01)
            #layout(matrix(c(1,2), nrow=1), width=c(1,1.5))
boxplot(data, rep(NA, length(data)), rep(NA, length(data)), ylab = "Data")
usr <- par("usr")
x <- usr[1] + (usr[2] - usr[1]) * 1/3
at <- c(0.9, 1.6, 3.2, 3.8, 4.65, 6.2, 7.2)
arrows(rep(x * 1.15, 7), at, rep(x, 7), at, length=0.075)
mtext("Main Title",1,1,cex=.8)
text(rep(x * 1.2, 7), at, adj = 0, cex=0.7,
     labels = c("outside value", "lower adjacent value",
       "lower quartile", "median", "upper quartile",
       "upper adjacent value", "outside values"))

plot(((1:n)-0.5)/n, data, xlab="f-value", ylab="Data")
abline(h = h, lwd =2, col = gray(0.85))
text(rep(0,3), h[4:6], writing[4:6], adj=0, cex=0.75)
text(rep(1,3), h[1:3], writing[1:3], adj=1, cex=0.75)
points(((1:n)-0.5)/n, data)

比较分布的图形

## Figure \ref{fig:addVmlt} 3.7
### Q-Q plot for additive and multiplicative shifts
x.data <- rnorm(500)
y.data <- rnorm(500, 2)
#postscript(file=paste(plotDIR,"additive.eps", sep="/"),
#           height=4, width=3.5, horizontal=F)
#tikz(file=paste(plotDIRch3,"additive.tex", sep="/"),
#     height=4, width=3.5, standAlone=F)
layout(matrix(c(1,3,2,3), nrow=2), heights=c(1,2), respect=T)
par(mar=c(2.5,2.5,.5,0.5), mgp=c(1.25, 0.125, 0), las=1, tck=0.01)
hist(x.data, xlim=range(c(x.data,y.data)), xlab="x", ylab="", main="", prob=T)
curve(dnorm(x), add=T, from=-3, to=3)
hist(y.data, xlim=range(c(x.data,y.data)), xlab="y", ylab="", main="", prob=T)
curve(dnorm(x, mean=2), add=T, from=-1, to=5)
qqplot(x.data, y.data, xlim=range(c(x.data,y.data)),
       ylim=range(c(x.data,y.data)), xlab="x", ylab="y")
abline(0,1, col=gray(0.5))
abline(2,1, col=gray(0.5))

x.data2 <- exp(rnorm(100))
y.data2 <- exp(rnorm(100, 1))

#postscript(file=paste(plotDIR, "multiplicative.eps", sep="/"),
#           height=4, width=3.5, horizontal=F)
#tikz(file=paste(plotDIRch3, "multiplicative.tex", sep="/"),
#     height=4, width=3.5, standAlone=F)
layout(matrix(c(1,3,2,3), nrow=2), heights=c(1,2), respect=T)
par(mar=c(2.5,2.5,.5,0.5), mgp=c(1.25, 0.125, 0), las=1, tck=0.01)
hist(x.data2, xlim=range(c(x.data2,y.data2)),
     xlab="x", ylab="", main="", prob=T, nclass=10)
curve(dlnorm(x), add=T, from=exp(-3), to=exp(3))
hist(y.data2, xlim=range(c(x.data2,y.data2)),
     xlab="y", ylab="", main="", prob=T, nclass=10)
curve(dlnorm(x, mean=1), add=T, from=exp(-2), to=exp(4))
qqplot(log(x.data2), log(y.data2),  xlab="x", ylab="y")
abline(0,1, col=gray(0.5))

layout(rbind(c(1,2,0,4,5), c(3,3,0,6,6)), widths=c(1,1,lcm(0.5), 1,1),
       heights=c(1,2), respect=T)
par(mar=c(2.5,2.5,.5,0.5), mgp=c(1.25, 0.125, 0), las=1, tck=0.01)

hist(x.data, xlim=range(c(x.data,y.data)), xlab="x", ylab="", main="", prob=T)
curve(dnorm(x), add=T, from=-3, to=3)
hist(y.data, xlim=range(c(x.data,y.data)), xlab="y", ylab="", main="", prob=T)
curve(dnorm(x, mean=2), add=T, from=-1, to=5)
qqplot(x.data, y.data, xlim=range(c(x.data,y.data)),
       ylim=range(c(x.data,y.data)),xlab="x", ylab="y")
abline(0,1, col=gray(0.5))
abline(2,1, col=gray(0.5))

hist(x.data2, xlim=range(c(x.data2,y.data2)),
     xlab="x", ylab="", main="", prob=T, nclass=10)
curve(dlnorm(x), add=T, from=exp(-3), to=exp(3))
hist(y.data2, xlim=range(c(x.data2,y.data2)),
     xlab="y", ylab="", main="", prob=T, nclass=10)
curve(dlnorm(x, mean=1), add=T, from=exp(-2), to=exp(4))
qqplot(x.data2, y.data2, xlim=range(c(x.data2,y.data2)),
       ylim=range(c(x.data2,y.data2)), xlab="x", ylab="y")
abline(0,1, col=gray(0.5))

识别变量间的依存关系的图形

## Car.test.frame in package rpart
packages(rpart)
names(car.test.frame)
#postscript(file=paste(plotDIR, "carsTest.eps", sep="/"),
#           width=4.75, height=2, horizontal=F)
#tikz(file=paste(plotDIRch3, "carsTest.tex", sep="/"),
 #          width=4.75, height=2, standAlone=F)
par(mfrow=c(1,2), mar=c(2.5,2.5,.5,0.5),
    mgp=c(1.25, 0.125, 0), las=1, tck=0.01)
plot(Mileage ~ Weight, data=car.test.frame,
     xlab="Weight (lb)", ylab="Mileage (mpg)")
abline(lsfit(car.test.frame$Weight, car.test.frame$Mileage))

plot(1/Mileage ~ Weight, data=car.test.frame,
     xlab="Weight (lb)", ylab="Mileage (mpg)")
abline(lsfit(car.test.frame$Weight, car.test.frame$Mileage),
       col=gray(0.5), lty=2)
cars.lo <- loess(I(1/Mileage) ~ Weight, data = car.test.frame)
oo <- order(cars.lo$x)
lines(cars.lo$x[oo], cars.lo$fitted[oo])

散点图矩阵

## Car.test.frame in package rpart

panel.hist = function(x, col.hist="grey", ...)
  {
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(usr[1:2], 0, 1.5) )
    h <- hist(x, plot = FALSE)
    breaks <- h$breaks; nB <- length(breaks)
    y <- h$counts; y <- y/max(y)
    rect(breaks[-nB], 0, breaks[-1],y, col=col.hist)
  }

#postscript(file=paste(plotDIR, "airquality.eps", sep="/"),
#           height=4.5, width=4.5, horizontal=F)
#tikz(file=paste(plotDIRch3, "airquality.tex", sep="/"),
#           height=4.5, width=4.5, standAlone=F)
pairs(Ozone~Solar.R+Wind+Temp, data=airquality,
      panel=function(x, y, ...){
        points(x, y,  ...)
        panel.smooth(x, y, col.smooth=1, ...)
      }, col=gray(0.5), cex=0.5, diag.panel=panel.hist)

### the iris data
Snames <- dimnames(iris3)[[3]]
iris.df <- rbind(iris3[,,1],iris3[,,2],iris3[,,3])
iris.df <- as.data.frame(iris.df)
iris.df$Species <- factor(rep(Snames,rep(50,3)))
#pairs(iris.df[1:4],main = "Anderson?s Iris Data",
#pch = c("<", "+", ">")[unclass(iris$Species)],
#col = c(gray(0.25),gray(0.5),gray(0.25))[unclass(iris$Species)])

#postscript(file=paste(plotDIR, "irisPairs.eps", sep="/"),
#           height=4.5, width=4.5, horizontal=F)
#tikz(file=paste(plotDIRch3, "irisPairs.tex", sep="/"),
 #          height=4.5, width=4.5, standAlone=F)
pairs(iris.df[1:4],main = "The Iris Data",
      pch = c(2, 4, 6)[unclass(iris$Species)],
      col = c("#BFBFBF", "#808080", "#404040")[unclass(iris$Species)])

变量转换

nadb <- read.csv(paste(dataDIR, "nadb.csv", sep="/"), header=T)
par(mfrow=c(1,2), mar=c(2.5,2.5,.5,0.5), mgp=c(1.5, 0.5, 0))
plot(TPOut ~ PLI, data=nadb,
     xlab="P Loading", ylab="P Concentration", cex=0.5)
plot(TPOut ~ PLI, data=nadb,
     xlab="P Loading", ylab="P Concentration", log="x", axes=F, cex=0.5)
axis(1, at=c(0.01, 1, 100), label=c("0.01", "1", "100"))
axis(2)
box()

幂转换

powerT <- function(y, lambda1=c(-1,-1/2,-1/4), lambda2 = c(1/4, 1/2, 1), layout1=2){
  nt <- length(lambda1)+length(lambda2)+1
  transformed <- cbind(outer(y,lambda1,"^"),log(y),(outer(y,lambda2,"^")))
  y.power <- data.frame(transformed=c(transformed),
                        lambda = factor(rep(round(c(lambda1,0,lambda2), 2),
                          rep(length(y),nt))))
  ans <- qqmath(~transformed | lambda,
                data=y.power,
                prepanel = prepanel.qqmathline,
                panel = function(x, ...) {
                  panel.grid(h = 0)
                  panel.qqmath(x, col=gray(0.5))
                  panel.qqmathline(x, distribution = qnorm)
                }, aspect=1, scale = list(y = "free"),
                layout=c(layout1,ceiling(nt/layout1)),
                xlab = "Unit Normal Quantile",
                ylab = "y")
  ans
}
trellis.par.set(theme = canonical.theme("postscript", col=FALSE))
powerT(y=nadb$PLI)

双变量散点图

pmdata<-read.table(paste(dataDIR, "PM-RAW-DATA.txt", sep="/"), header=T)
pmdata$log.value<-log(pmdata$value)
xyplot(log.value~AvgTemp, panel=function(x,y,...){
  panel.grid()
  panel.xyplot(x,y, col=grey(0.65), cex=0.5, ...)
  panel.loess(x,y,span=1, degree=1,col=1,...)
}, scales=list(x=list(cex=0.75), y=list(cex=0.75)),
       par.settings=trellis.par.temp,
       data=pmdata, xlab="Average Daily Temperature (F)", ylab="Log PM2.5")

## traditional coplot
coplot(sqrt(Ozone) ~ Solar.R|Wind*Temp , data=airquality,
       given.values=list(co.intervals(airquality$Wind, 3, 0.25),
         co.intervals(airquality$Temp, 3, 0.25)),
       panel=function(x, y, ...)
       panel.smooth(x, y, span=1, ...),
       ylab="Log Ozone",
       xlab=c("Solar Radiation", "Wind","Temperature"))

WindSpeed <- equal.count(airquality$Wind, 3, 0.25)
Temperature <- equal.count(airquality$Temp, 3, 0.25)
SolarR <- equal.count(airquality$Solar.R, 3, 0.25)

print(
xyplot(sqrt(Ozone) ~ SolarR|WindSpeed*Temperature,
       data=airquality,
       panel=function(x,y,...){
#            panel.loess(x, y, span=1, degree=1, ...)
            panel.grid()
            panel.lmline(x, y, col="grey",...)
            panel.xyplot(x, y, col=1, cex=0.5, ...)
       },
       ylab=list(label="Sqrt Ozone", cex=0.6),
       xlab=list(label="Solar Radiation", cex=0.6),
       scales=list(x=list(alternating=c(1, 2, 1))),
#       between=list(y=1),
       par.strip.text=list(cex=0.4), aspect=1,
       par.settings=list(axis.text=list(cex=0.4)))
)

print(
      xyplot(sqrt(Ozone) ~ Wind|SolarR*Temperature,
             data=airquality,
             panel=function(x,y,...){
#            panel.loess(x, y, span=1, degree=1, ...)
               panel.grid()
               panel.lmline(x, y, col="grey",...)
               panel.xyplot(x, y, col=1, cex=0.5, ...)
             },
             ylab=list(label="Sqrt Ozone", cex=0.6),
             xlab=list(label="Wind Speed", cex=0.6),
             scales=list(x=list(alternating=c(1, 2, 1))),
                                        #       between=list(y=1),
             par.strip.text=list(cex=0.4), aspect=1,
             par.settings=list(axis.text=list(cex=0.4)))
      )

print(
xyplot(sqrt(Ozone) ~ Temp|WindSpeed*SolarR,
       data=airquality,
       panel=function(x,y,...){
#            panel.loess(x, y, span=1, degree=1, ...)
            panel.grid()
            panel.lmline(x, y, col="grey",...)
            panel.xyplot(x, y, col=1, cex=0.5, ...)
       },
       ylab=list(label="Sqrt Ozone", cex=0.6),
       xlab=list(label="Temperature", cex=0.6),
       scales=list(x=list(alternating=c(1, 2, 1))),
#       between=list(y=1),
       par.strip.text=list(cex=0.4), aspect=1,
       par.settings=list(axis.text=list(cex=0.4)))
)

人面猴
序言：七十年代末，一起剥皮案震惊了整个滨河市，随后出现的几起案子，更是在滨河造成了极大的恐慌，老刑警刘岩，带你破解...
沈念sama阅读 202,607评论 5赞 476
死咒
序言：滨河连续发生了三起死亡事件，死亡现场离奇诡异，居然都是意外死亡，警方通过查阅死者的电脑和手机，发现死者居然都...
沈念sama阅读 85,047评论 2赞 379
救了他两次的神仙让他今天三更去死
文/潘晓璐我一进店门，熙熙楼的掌柜王于贵愁眉苦脸地迎上来，“玉大人，你说我怎么就摊上这事。” “怎么了？”我有些...
开封第一讲书人阅读 149,496评论 0赞 335
道士缉凶录：失踪的卖姜人
文/不坏的土叔我叫张陵，是天一观的道长。经常有香客问我，道长，这世上最难降的妖魔是什么？我笑而不...
开封第一讲书人阅读 54,405评论 1赞 273
港岛之恋（遗憾婚礼）
正文为了忘掉前任，我火速办了婚礼，结果婚礼上，老公的妹妹穿的比我还像新娘。我一直安慰自己，他们只是感情好，可当我...
茶点故事阅读 63,400评论 5赞 364
恶毒庶女顶嫁案：这布局不是一般人想出来的
文/花漫我一把揭开白布。她就那样静静地躺着，像睡着了一般。火红的嫁衣衬着肌肤如雪。梳的纹丝不乱的头发上，一...
开封第一讲书人阅读 48,479评论 1赞 281
城市分裂传说
那天，我揣着相机与录音，去河边找鬼。笑死，一个胖子当着我的面吹牛，可吹牛的内容都是我干的。我是一名探鬼主播，决...
沈念sama阅读 37,883评论 3赞 395
双鸳鸯连环套：你想象不到人心有多黑
文/苍兰香墨我猛地睁开眼，长吁一口气：“原来是场噩梦啊……” “哼！你这毒妇竟也来了？” 一声冷哼从身侧响起，我...
开封第一讲书人阅读 36,535评论 0赞 256
万荣杀人案实录
序言：老挝万荣一对情侣失踪，失踪者是张志新（化名）和其女友刘颖，没想到半个月后，有当地人在树林里发现了一具尸体，经...
沈念sama阅读 40,743评论 1赞 295
护林员之死
正文独居荒郊野岭守林人离奇死亡，尸身上长有42处带血的脓包…… 初始之章·张勋以下内容为张勋视角年9月15日...
茶点故事阅读 35,544评论 2赞 319
白月光启示录
正文我和宋清朗相恋三年，在试婚纱的时候发现自己被绿了。大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
茶点故事阅读 37,612评论 1赞 329
活死人
序言：一个原本活蹦乱跳的男人离奇死亡，死状恐怖，灵堂内的尸体忽然破棺而出，到底是诈尸还是另有隐情，我是刑警宁泽，带...
沈念sama阅读 33,309评论 4赞 318
日本核电站爆炸内幕
正文年R本政府宣布，位于F岛的核电站，受9级特大地震影响，放射性物质发生泄漏。R本人自食恶果不足惜，却给世界环境...
茶点故事阅读 38,881评论 3赞 306
男人毒药：我在死后第九天来索命
文/蒙蒙一、第九天我趴在偏房一处隐蔽的房顶上张望。院中可真热闹，春花似锦、人声如沸。这庄子的主人今日做“春日...
开封第一讲书人阅读 29,891评论 0赞 19
一桩弑父案，背后竟有这般阴谋
文/苍兰香墨我抬头看了看天上的太阳。三九已至，却和暖如春，着一层夹袄步出监牢的瞬间，已是汗流浃背。一阵脚步声响...
开封第一讲书人阅读 31,136评论 1赞 259
情欲美人皮
我被黑心中介骗来泰国打工，没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留，地道东北人。一个月前我还...
沈念sama阅读 42,783评论 2赞 349
代替公主和亲
正文我出身青楼，却偏偏与公主长得像，于是被迫代替她去往敌国和亲。传闻我的和亲对象是个残疾皇子，可洞房花烛夜当晚...
茶点故事阅读 42,316评论 2赞 342

环境与生态统计||探索性数据分析

展示分布图形

分位数图

Tukey 的箱线图

比较分布的图形

识别变量间的依存关系的图形

散点图矩阵

变量转换

推荐阅读更多精彩内容