参考书籍:《R语言医学数据分析实战》
数据结构
向量
# 创建向量
x1 <- c(1,2,3,4,5) #数值型
x2 <- c("one","two","three") #字符型
x3 <- c(TRUE,FALSE,TRUE,TRUE) #逻辑型
# 创建有规律的向量
x4 <- 1:5
x5 <- seq(from = 2, to = 10, by = 2)
x6 <- rep("a", times = 4)
x7 <- paste("a", 1:5) #还有 paste0
# 去除指定位置的元素
x1[-(1:2)]
# R中的运算都是向量化的
x1 + x5
# 常见统计函数
length(x1)
mean(x1)
var(x1)
因子
# 因子类型转换
sex <- c(1,2,1,1,2,1,2)
sex.f <- factor(sex, levels = c(1,2), labels = c("Male", "Female"))
# 查看因子属性
levels(sex.f)
# [1] Male Female Male Male Female Male Female
# Levels: Male Female
# 统计模型中,R会将第一个水平当作参考组
# 改变因子水平的排列顺序->改变参考组
# 方法1:改变levels和labels顺序,注意:要同时改!
sex.f <- factor(sex, levels = c(2,1), labels = c("Female", "Male"))
# 方法2:函数relevel()
sex.f1 <- relevel(sex.f,ref = "Female")
# 表示有序因子:ordered=TRUE
status <- c(1,2,2,3,1,2,2)
status.f <- factor(status, levels = c(1,2,3),
labels = c("Poor","Improved","Excellent"),
ordered = TRUE)
status.f
# [1] Poor Improved Improved Excellent Poor Improved Improved
# Levels: Poor < Improved < Excellent
矩阵
# 创建矩阵
M <- matrix(1:6,nrow = 2)
M
# 将数值按照行排列
M <- matrix(1:6,nrow = 2, byrow = T)
M
# 矩阵乘法
mat1 <- matrix(1:6, nrow = 3)
mat1
mat2 <- matrix(5:10, nrow = 2)
mat2
dim(mat1) #得到矩阵维数
mat1 %*% mat2
# 转置运算
t(mat1)
# 行列式和逆矩阵
mat3 <- matrix(1:4, nrow = 2)
det(mat3)
solve(mat3)
# 按行/列求平均/和
rowSums(mat1)
rowMeans(mat1)
# 访问元素
mat1[1:2, 1:2]
数组
# 创建数组
A <- 1:24
dim(A) <- c(3,4,2)
A
# 创建数组
dim1 <- c("A1","A2","A3")
dim2 <- paste0("B",1:4)
dim3 <- paste0("C",1:2)
array(1:24, dim = c(3,4,2), dimnames = list(dim1, dim2, dim3))
列表
# 创建列表
list1 <- list(a = 1, b = 1:5, c = c("red", "blue", "green"))
list1
list1$a
数据框
ID <- 1:5
sex <- c("male","female","male","female","male")
age <- c(25,34,38,28,52)
pain <- c(1,3,2,2,3)
pain.f <- factor(pain, levels = 1:3, labels = c("mild", "medium", "severe"))
patients <- data.frame(ID,sex,age,pain.f)
patients
patients$ID
数据类型的判断与转换函数
小总结
1.列表和数据框都用$引用对象或变量
2.在R中,数组和矩阵差不多,只不过矩阵是2维的,而数组的维数通常大于2
3.创建数据框的时候经常用data.frame把几个向量“拼起来”
数据获取
小练习
> # 2-2
> #先生成数字,再转换为字母:LETTERS大写,letters小写
> x <- letters[seq(1,10)]
> x
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"
> y <- seq(1:10)
> y
[1] 1 2 3 4 5 6 7 8 9 10
> z <- rep(1,10)
> z
[1] 1 1 1 1 1 1 1 1 1 1
> data <- data.frame(x,y,z)
> data
x y z
1 a 1 1
2 b 2 1
3 c 3 1
4 d 4 1
5 e 5 1
6 f 6 1
7 g 7 1
8 h 8 1
9 i 9 1
10 j 10 1
# 2-3
library(survival)
str(lung)
head(lung)
# 2-4
data <- rnorm(1000,mean = 168, sd = 10)
hist(data)
# 2-5
data("iris")
str(iris)
write.csv(iris,"iris.csv")
data.csv <- read.csv("iris.csv",header = T)
head(data.csv)
str(data.csv)