filter(flights, arr_delay >= 120)
??nycflights13
library(nycflights13)
install.packages("nycflights13")
library(tidyverse)
flights
?flights
View(flights)
calss(flights)
鍙橀噺绫诲瀷
int 鏁存暟锛宒bl 鍙岀簿搴︽诞鐐规暟銆佸疄鏁帮紝dttm 鏃ユ湡+鏃堕棿锛宭gl 閫昏緫鍨嬪彉閲忥紝浠呭寘鎷琓RUE鍜孎ALSE, fctr 鍥犲瓙锛宒ate 鏃ユ湡鍨嬪彉閲?
filter 鎸夎绛涢€?
arrange 缁欒鎺掑簭
select 鎸夊垪绛涢€?
mutate鏍规嵁鐜版湁鐨勫彉閲忓垱寤烘柊鐨勫彉閲?
summarize鎽樿缁熻
浣跨敤filter杩涜绛涢€?
filter(flights, month == 1, day == 1)
jan1 <- filter(flights, month == 1, day == 1)
jan1
(dec25 <- filter(flights, month == 12, day == 25))
dec25
姣旇緝杩愮畻绗?
,>=,<,<=,!+,==
filter(flights, month == 11 | month == 12)
a <- filter(flights, month == 11 | month == 12)
View(a)
filter(flights, month %in% c(11,12))
df <- tibble(x = c(1, NA, 3))
filter(df, x > 1)
filter(df, is.na(x) | x >1)
arrange(flights, year, month, day)
arrange(flights, desc(dep_delay))##闄嶅簭
鏃犺姝e簭鍜屽€掑簭锛岀┖鍊兼帓鍦ㄦ渶鍚?
df <- tibble(x = c(5, 2, NA))
arrange(df, x)
arrange(df, desc(x))
arrange(flights, desc(is.na(dep_time)), dep_time)
select鎸夊垪绛涢€?
select(flights, year, month, day)#鏍规嵁鍒楀悕鍗曢€夋嫨鏌愬嚑鍒?
select(flights,year:day)#杩為€夊嚑鍒?
select(flights, -(year:day))#杩為€?+鍙嶉€?
starts_with("abc")#閫夋嫨abc寮€澶寸殑鍚嶇О
ends_with("eyz") #鍖归厤浠モ€渆yz鈥濈粨灏剧殑鍚嶇О
contains("ijk") #鍖归厤鍖呭惈ijk鐨勫悕绉?
matches("(.)\1") #鍖归厤姝g‘琛ㄨ揪寮忕殑閭d簺鍙橀噺
num_range("x",1:3) #鍖归厤x1,x2,x3
rename(flights,tail_num = tailnum) #閲嶅懡鍚嶏紝淇敼tailnum鏀逛负tail_num
select(flights, time_hour, air_time, everything())#鏌愬嚑鍒楃Щ鍔ㄥ埌寮€澶达紝everyting鏄叾浣欏嚑鍒椼€?
缁冧範
vars <- c("year", "month", "day", "dep_delay", "arr_delay")
select(flights, one_of(vars))
var 鏄痗haracter vector(鐗瑰緛鍚戦噺)銆傝繖鏍峰啓鍑虹殑select锛堬級涓嶉渶瑕佸啓澶氫釜鍚戦噺鍚?
select(flights,contains("YEAR",ignore.case = FALSE)) #鏈潵鏄拷鐣ュぇ灏忓啓锛岃繖鏍锋槸涓嶅拷鐣ュぇ灏忓啓
5. mutate()鐢熸垚鏂板垪
mutate鐢熸垚鏂板垪鍚庯紝娣诲姞鏂板垪鍒版暟鎹鏈熬锛岀敓鎴愭柊鐨勬暟鎹銆倀ransmute鍒欐槸鍙繚鐣欐柊鐢熸垚鐨勫垪锛屽師鏈夌殑鍒楀姞鍑忎箻闄ょ瓑璁$畻鍑虹殑缁撴灉浣滀负鏂板垪
鐢熸垚gain鍜宻peed
flights_sml <- select(flights,
year:day,
ends_with("delay"),
distance,
air_time)
flights_sml
mutate(flights_sml,
gain = dep_delay - arr_delay,
speed = distance / air_time * 60)
鏂板垪鍙互鐩存帴鍙備笌鍙︿竴鏂板垪鐨勭敓鎴?
mutate(flights_sml,
gain = dep_delay - arr_delay,
hours = air_time / 60,
gain_per_hour = gain / hours)
transmute()鍙繚鐣欐柊鍒?
transmute(flights,
gain = dep_delay - arr_delay,
hours = air_time /60,
gain_per_hour = gain / hours)
summarise()##杩涜鍒嗙粍鎽樿
summarise(flights, delay = mean(dep_delay, na.rm = TRUE))
group_by()鍑芥暟浼氫娇summarize鏇存湁鐢?
by_day <- group_by(flights, year, month, day)
summarise(by_day, delay = mean(dep_delay, na.rm = TRUE))
绠¢亾鎿嶄綔
绠¢亾鎿嶄綔浼氱畝鍖栦唬鐮侊紝鏁版嵁妗嗗悕涓嶅繀閲嶅鍐欏娆?
level1鈥斺€旂敓鎴愪腑闂翠骇鐗?
by_day <- group_by(flights, year, month, day)
summarise(by_day, delay = mean(dep_delay, na.rm = TRUE))
level2鈥斺€?
summarise(group_by(flights, year, month, day), delay = mean(dep_delay, na.rm = TRUE))
level3鈥斺€旂閬撴搷浣滈摼鎺ヤ袱姝ワ紝鏁版嵁妗嗗悕鍦ㄦ嫭鍙烽噷闈?
group_by(flights, year, month, day) %>% summarise(delay = mean(dep_delay, na.rm = TRUE))
level4_
flights %>% group_by(year, month, day) %>% summarise(delay = mean(dep_delay, na.rm = TRUE))
锛?2锛夌己澶卞€?
濡傛灉鏁版嵁涓湁NA锛岃绠楃粨鏋滃氨鍏ㄩ儴涓篘A锛屾墍浠ヤ唬鐮佷腑闇€瑕? na.rm = TRUE. 璁$畻鍓嶇Щ闄ょ己澶卞€?
锛堣鏁帮級
璁℃暟
渚嬪瓙锛氭壘鍑哄钩鍧囧欢璇椂闂存渶闀跨殑椋炴満
锛堟牴鎹畉ailname杩涜鍒嗙粍锛屽緱鍒扮殑鏄叏骞村悓涓€鏋堕鏈虹殑鍚勭鏁版嵁锛?
carriers <- group_by(flights, carrier)
s1 <- summarise(carriers, n())
s1
s2 <- count(flights, carrier)
s2
mu <- mutate(carriers,n = n())
s3 <- distinct(mu, n)
s3
s4 <- distinct(mu, carrier, n)
s4
鍥涚鏂规硶缁熻鐨勭粨鏋滄槸涓€鑷寸殑锛宒istinct鏄幓閲嶅銆?