R包学习
几个重要包
dplyr
- mutate (增加列,格式参考:mutate(test, new = Sepal.Length * Sepal.Width))
rm(list=ls())
test <- iris[c(1:2,51:52,101:102),]
View(test)
colnames(test)
library(dplyr)
mutate(test,Spal.volume=Sepal.Length*Sepal.Width)
mutate(test,Petal.volume=Petal.Length*Petal.Width)
mutate(test,new)
select(test,c(1,3))
select(test,Sepal.Length)
#filter(.data=,condition_1,condition_2)#将返回相匹配的数据
#同时可以多条件匹配multiple condition,当采用多条件匹配时可直接condition1,condition2或者condition1&condition2
table(test$Species)
filter(.data=iris,Sepal.Length>5,Sepal.Width<3.5)
filter(.data=iris,Sepal.Length>5,Species=="setosa")
filter(test, Species == "setosa")
filter(test, Species == "versicolor")
filter(test, Species == "virginica")
#要使用filter_all()、filter_if()、filter_at()需要先去掉Species列(非数值型列)
iris_data<-iris%>% select(-Species)
#筛选所有属性小于6的行
iris_data%>% filter_all(all_vars(.<6))
#筛选任意一个属性大于3的行
iris_data%>% filter_all(any_vars(.>3))
#筛选以sep开头的属性任一大于3的行
iris_data%>% filter_at(vars(starts_with("Sep")), any_vars(. >3))
#R中自带数据集mtcars,筛选任意一个属性大于150的行
filter_all(mtcars, any_vars(. > 150))
#筛选以d开头的属性任一可被2整除的行
filter_at(mtcars, vars(starts_with("d")), any_vars((. %% 2) == 0))
filter(test, Species == "setosa"&Sepal.Length > 5 )
filter(test, Species %in% c("setosa","versicolor"))
#arrange(),按某1列或某几列对整个表格进行排序
arrange(test, Sepal.Length)#默认从小到大排序
arrange(test, desc(Sepal.Length))#用desc从大到小
arrange(test, Sepal.Length, desc(Sepal.Width))
#summarise():汇总 对数据进行汇总操作,结合group_by使用实用性强
# 先按照Species分组,计算每组Sepal.Length的平均值和标准差
group_by(test, Species)
summarise(group_by(test, Species),mean(Sepal.Length), sd(Sepal.Length))
##########################################################################
#dplyr两个实用技能
#管道操作 %>% (cmd/ctr + shift + M)
test %>%
group_by(Species) %>%
summarise(mean(Sepal.Length), sd(Sepal.Length))
#count统计某列的unique值
count(test,Species)
#########################################################################
#dplyr处理关系数据
options(stringsAsFactors = F)
test1 <- data.frame(x = c('b','e','f','x'),
z = c("A","B","C",'D'),
stringsAsFactors = F)
test1
test2 <- data.frame(x = c('a','b','c','d','e','f'),
y = c(1,2,3,4,5,6),
stringsAsFactors = F)
test2
#1.內连inner_join,取交集
inner_join(test1, test2, by = "x")
#左连left_join
left_join(test1, test2, by = 'x')
#3.全连full_join
F1=full_join( test1, test2, by = 'x')
#F2=full_join( test2, test1, by='x')与F1不一样
#半连接:返回能够与y表匹配的x表所有记录semi_join
semi_join(x = test1, y = test2, by = 'x')
#反连接:返回无法与y表匹配的x表的所记录anti_join
anti_join(x = test2, y = test1, by = 'x')
#简单合并
test1 <- data.frame(x = c(1,2,3,4), y = c(10,20,30,40))
test1
test2 <- data.frame(x = c(5,6), y = c(50,60))
test2
test3 <- data.frame(z = c(100,200,300,400))
test3
bind_rows(test1, test2)
bind_cols(test1, test3)