1.str_dup 复制字符串
函数定义:str_dup(string, times)
参数列表:
string:需要重复处理的字符串
times:指定重复的次数
# 1.str_dup
> fruit<-c('apple','pear','banana')
> str_dup(fruit,2)
[1] "appleapple" "pearpear" "bananabanana"
> str_dup(fruit,2:4)
[1] "appleapple" "pearpearpear"
[3] "bananabananabananabanana"
> #下面是对循环补齐的理解
> str_dup(fruit,2:5)
[1] "appleapple" "pearpearpear"
[3] "bananabananabananabanana" "appleappleappleappleapple"
Warning message:
In stri_dup(string, times) :
longer object length is not a multiple of shorter object length
> #结合刚学的str_c
> str_c('Fresh ',str_dup(fruit,0:3))
[1] "Fresh " "Fresh pear"
[3] "Fresh bananabanana" "Fresh appleappleapple"
Warning message:
In stri_dup(string, times) :
longer object length is not a multiple of shorter object length
2.str_pad 补充字符串的长度
函数定义:str_pad(string, width, side = c("left", "right", "both"), pad = " ")
参数列表:
string: 字符串,字符串向量。
width: 字符串填充后的长度
side: 填充方向,both两边都填充,left左边填充,right右边填充
pad: 用于填充的字符,不加pad参数时默认是空格
# 2.str_pad
> rbind(str_pad("hadley", 10, side = "left"),
+ str_pad("hadley", 10, side = "right"),
+ str_pad("hadley", 10, side = "both"))
[,1]
[1,] " hadley"
[2,] "hadley "
[3,] " hadley "
> #不加side时默认是从左边left增加空格
> str_pad(c("A", "abc", "abcdef"), 8)
[1] " A" " abc" " abcdef"
> str_pad("A", c(3, 6, 10))
[1] " A" " A" " A"
> str_pad("A", 10, pad = c("!", "*", " "),side = 'right')
[1] "A!!!!!!!!!" "A*********" "A "
3.str_trim 去掉字符串的空格和TAB(\t)
函数定义:str_trim(string, side = c("both", "left", "right"))
参数列表:
string: 字符串,字符串向量。
side: 过滤方式,both两边都过滤,left左边过滤,right右边过滤
去掉字符串的空格和TAB(\t)
# 3.str_trim
> string<- ' I love biotree '
> str_trim(string)
[1] "I love biotree"
> str_trim(string,side = 'left')
[1] "I love biotree "
> str_trim(string,side = 'right')
[1] " I love biotree"
> str_trim(string,side = 'both')
[1] "I love biotree"
4.str_count 字符串计数
函数定义:str_count(string, pattern = "")
参数列表:
- string: 字符串,字符串向量。
- pattern: 匹配的字符。
# 4.str_count
> fruit <- c("apple", "banana", "pear", "pineapple")
> str_count(fruit, "a")
[1] 1 3 1 1
> str_count(fruit, c("a", "b", "p", "p"))
[1] 1 1 1 3
> str_count(c("a.", "...", ".a.a"), ".")
[1] 2 3 4
> str_count(c("a.", "...", ".a.a"), fixed("."))
[1] 1 3 2
5.str_length 字符串长度计数
函数定义:str_length(string)
参数列表:
string: 字符串,字符串向量。
str_length(),字符长度函数,该函数类似于nchar()函数,但前者将NA返回为NA,而nchar则返回2
# 5.str_length
> str_length(letters)
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
> str_length(c("i", "like", "programming", NA))
[1] 1 4 11 NA
6.str_sort 字符串值排序
str_sort(x, decreasing = FALSE, na_last = TRUE, locale = "", ...)
str_order(x, decreasing = FALSE, na_last = TRUE, locale = "", ...)
str_order和str_sort的区别在于前者返回排序后的索引(下标),后者返回排序后的实际值
参数列表:
x: 字符串,字符串向量。
decreasing: 排序方向。
na_last:NA值的存放位置,一共3个值,TRUE放到最后,FALSE放到最前,NA过滤处理
locale:按哪种语言习惯排序
# 6.str_sort
> str_sort(letters)
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t"
[21] "u" "v" "w" "x" "y" "z"
> str_sort(c('wo','love','biotree','forever'),locale = "en")
[1] "biotree" "forever" "love" "wo"
> str_sort(c('wo','love','biotree','forever'),locale = "en",decreasing = T)
[1] "wo" "love" "forever" "biotree"
> str_sort(c('我','爱','生','信','技','能','树'),locale = "zh")
[1] "爱" "技" "能" "生" "树" "我" "信"
> str_sort(c('我','爱','生','信','技','能','树'),locale = "zh",decreasing = T)
[1] "信" "我" "树" "生" "能" "技" "爱"
> #str_order
> str_order(c('wo','love','biotree','forever'),locale = "en")
[1] 3 4 2 1
> str_order(c('wo','love','biotree','forever'),locale = "en",decreasing = T)
[1] 1 2 4 3
7.str_c 字符串连接
==相当于paste/paste0==
函数定义:
str_c(..., sep = "", collapse = NULL)
str_join(..., sep = "", collapse = NULL)
参数列表:
…: 多参数的输入
sep: 把多个字符串拼接为一个大的字符串,用于字符串的分割符。
collapse: 把多个向量参数拼接为一个大的字符串,用于字符串的分割符。
# 7.str_c
#str_c与paste0类似
> str_c(letters)
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q"
[18] "r" "s" "t" "u" "v" "w" "x" "y" "z"
> str_c('a','b')
[1] "ab"
> str_c('a','b',sep = '-')
[1] "a-b"
> str_c('a','b',collapse = "-")
[1] "ab"
> str_c('a','b','c')
[1] "abc"
> str_c('a','b','c',sep = "-")
[1] "a-b-c"
> str_c('a','b','c',collapse = "-")
[1] "abc"
> str_c(c('a','a1'),c('b','b1'),sep='-')
[1] "a-b" "a1-b1"
> str_c(c('a','a1'),c('b','b1'),collapse='-')
[1] "ab-a1b1"
> #下面str_c(head(letters), collapse = '-')与str_c('a','b',collapse = "-")没有什么不同呀,超脱我的理解,并且加了sep参数竟三个结果都一样,不按套路出牌,所以只能格外理解。
> str_c(head(letters), collapse = "")
[1] "abcdef"
> str_c(head(letters), collapse = " ")
[1] "a b c d e f"
> str_c(head(letters), collapse = '-')
[1] "a-b-c-d-e-f"
> str_c(head(letters), sep = "")
[1] "a" "b" "c" "d" "e" "f"
> str_c(head(letters), sep = " ")
[1] "a" "b" "c" "d" "e" "f"
> str_c(head(letters), sep = '-')
[1] "a" "b" "c" "d" "e" "f"
> #下面是对循环补齐的理解
> str_c("Letter: ", letters[1:10])#与paste0相同,默认分隔符是无
[1] "Letter: a" "Letter: b" "Letter: c" "Letter: d" "Letter: e"
[6] "Letter: f" "Letter: g" "Letter: h" "Letter: i" "Letter: j"
> str_c("Letter", letters[1:10], sep = ": ")
[1] "Letter: a" "Letter: b" "Letter: c" "Letter: d" "Letter: e"
[6] "Letter: f" "Letter: g" "Letter: h" "Letter: i" "Letter: j"
> #下面是三个字符串拼接,如果什么参数都没有,那么就是直接无缝连接,因为str_c同paste0相同,同时注意理解循环补齐
> str_c(letters[1:5], " is for", "...")
[1] "a is for..." "b is for..." "c is for..." "d is for..." "e is for..."
> #str_c和paste的不同之处之一
> str_c('a','b') #str_c把多个字符串拼接为一个大的字符串,默认无间隙。
[1] "ab"
> paste('a','b') #paste默认sep是' '
[1] "a b"
> #str_c和paste的不同之处之二
> #str_c要连接的两个向量如果含NA,则无法连接
> #paste要连接的两个向量即使含有NA,也可连接
> str_c(c("a", NA, "b"), "-d")
[1] "a-d" NA "b-d"
> paste(c("a", NA, "b"), "-d")
[1] "a -d" "NA -d" "b -d"
> #加了str_replace_na函数,str_c也可以连接含有NA的向量
> str_c(str_replace_na(c("a", NA, "b")), "-d") #即使空,也可连接
[1] "a-d" "NA-d" "b-d"
> str_c('a','b','c',collapse = "-")
[1] "abc"
8.str_sub 截取字符串
==相当于substr/substring==
函数定义:str_sub(string, start = 1L, end = -1L)
参数列表:
- string: 字符串,字符串向量。
- start : 开始位置
- end : 结束位置
str_sub(string, start = 1L, end = -1L) 提取子字符串
str_sub(string, start = 1L, end = -1L) <- value 替换子字符串
# 8.str_sub
> #str_sub与substr()类似
> #例一 截取字符串
> jns <- 'sheng xin ji neng shu'
> str_sub(jns,1,5)
[1] "sheng"
> str_sub(jns, 6) #默认从坐标位置6开始截取
[1] " xin ji neng shu"
> str_sub(jns, end=6)
[1] "sheng "
> str_sub(jns, -3) # 通过负坐标截取字符串
[1] "shu"
> str_sub(jns, end = -3)
[1] "sheng xin ji neng s"
> str_sub(jns,c(1,4),c(2,6))
[1] "sh" "ng "
> str_sub(jns,c(1,4),c(2,6,8))
[1] "sh" "ng " "sheng xi"
Warning message:
In stri_sub(string, from = start, to = end) :
longer object length is not a multiple of shorter object length
> str_sub(jns,1,1)<-'S'
> jns
[1] "Sheng xin ji neng shu"
> #例二 截取字符串后更改
> x <- "AAABBBCCC" #对截取的字符串进行赋值。
> str_sub(x, 1, 1) <- 1; x ## 在字符串的1的位置赋值为1
[1] "1AABBBCCC"
> str_sub(x, 2, -2) <- "2345"; x ## 在字符串从2到-2的位置赋值为2345
[1] "12345C"
9.str_replace 字符串替换
==相当于gsub/sub==
函数定义:str_replace(string, pattern, replacement)
参数列表:
string: 字符串,字符串向量。
pattern: 匹配字符。
replacement: 用于替换的字符。
# 9.str_replace
> #str_replace,字符串替换
> fruits<-c('one apple','two pears','three bananas')
> str_replace(fruits,'[aeiou]','-')
[1] "-ne apple" "tw- pears" "thr-e bananas"
> str_replace_all(fruits, "[aeiou]", "-")
[1] "-n- -ppl-" "tw- p--rs" "thr-- b-n-n-s"
> str_replace_all(fruits, "[aeiou]", toupper)
[1] "OnE ApplE" "twO pEArs" "thrEE bAnAnAs"
> str_replace_all(fruits, "b", NA_character_)
[1] "one apple" "two pears" NA
> str_replace(fruits, "([aeiou])", "")
[1] "ne apple" "tw pears" "thre bananas"
> str_replace(fruits, "[aeiou]", c("1", "2", "3"))
[1] "1ne apple" "tw2 pears" "thr3e bananas"
> str_replace(fruits, c("a", "e", "i"), "-")
[1] "one -pple" "two p-ars" "three bananas"
> # 管道符应用
> fruits %>%
+ str_c(collapse = "---") %>%
+ str_replace_all(c("one" = "1", "two" = "2", "three" = "3"))
[1] "1 apple---2 pears---3 bananas"
10.str_split 字符串分割
==相当于strsplit==
函数定义:
str_split(string, pattern, n = Inf)
str_split_fixed(string, pattern, n)
参数列表:
string: 字符串,字符串向量。
pattern: 匹配的字符。
n: 分割个数 #最后一组就不会被分割
对字符串进行分割。
# 10.str_split
> fruits <- c("apples and oranges and pears and bananas",
+ "pineapples and mangos and guavas")
> str_split(fruits, " and ")
[[1]]
[1] "apples" "oranges" "pears" "bananas"
[[2]]
[1] "pineapples" "mangos" "guavas"
> str_split(fruits, " and ", simplify = TRUE)
[,1] [,2] [,3] [,4]
[1,] "apples" "oranges" "pears" "bananas"
[2,] "pineapples" "mangos" "guavas" ""
> str_split(fruits, " and ", n = 3)
[[1]]
[1] "apples" "oranges" "pears and bananas"
[[2]]
[1] "pineapples" "mangos" "guavas"
> str_split(fruits, " and ", n = 2)
[[1]]
[1] "apples" "oranges and pears and bananas"
[[2]]
[1] "pineapples" "mangos and guavas"
> # 用str_split_fixed函数返回一个matrix
> str_split_fixed(fruits, " and ", 3)
[,1] [,2] [,3]
[1,] "apples" "oranges" "pears and bananas"
[2,] "pineapples" "mangos" "guavas"
> str_split_fixed(fruits, " and ", 4)
[,1] [,2] [,3] [,4]
[1,] "apples" "oranges" "pears" "bananas"
[2,] "pineapples" "mangos" "guavas" ""
11.str_subset 返回匹配的字符串
==相当于grep==
函数定义:
str_subset(string, pattern)
参数列表:
string: 字符串,字符串向量。
pattern: 匹配的字符。
# 11.str_subset 字符串分割
> #str_subset
> fruit <- c("apple", "banana", "pear", "pinapple")
> str_subset(fruit, "a")
[1] "apple" "banana" "pear" "pinapple"
> str_which(fruit, "a")
[1] 1 2 3 4
> str_subset(fruit, "^a")
[1] "apple"
> str_subset(fruit, "a$")
[1] "banana"
> str_subset(fruit, "[aeiou]")
[1] "apple" "banana" "pear" "pinapple"
> str_subset(fruit, "^p", negate = TRUE)
[1] "apple" "banana"
> # Missings never match
> str_subset(c("a", NA, "b"), ".")
[1] "a" "b"
> str_which(c("a", NA, "b"), ".")
[1] 1 3
12.str_detect 检查匹配字符串的字符
==相当于给grepl==
函数定义:str_detect(string, pattern)
参数列表:
string: 字符串,字符串向量。
pattern: 匹配字符。
# 12.str_detect 字符串分割
> fruit <- c("apple", "banana", "pear", "pinapple")
> str_detect(fruit, "a")
[1] TRUE TRUE TRUE TRUE
> str_detect(fruit, "b")
[1] FALSE TRUE FALSE FALSE
> str_detect(fruit, "^a")
[1] TRUE FALSE FALSE FALSE
> str_detect(fruit, "a$")
[1] FALSE TRUE FALSE FALSE
> str_detect("aecfg", letters)
[1] TRUE FALSE TRUE FALSE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
[14] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
> str_detect(fruit, "^p", negate = TRUE)
[1] TRUE TRUE FALSE FALSE
13.str_extract 从字符串中提取匹配字符
函数定义:
str_extract(string, pattern)
str_extract_all(string, pattern, simplify = FALSE)
参数列表:
string: 字符串,字符串向量。
pattern: 匹配字符。
simplify: 返回值,TRUE返回matrix,FALSE返回字符串向量
# 13.str_extract 字符串分割
> shopping_list <- c("apples 4x4", "bag of flour", "bag of sugar", "milk x2")
> str_extract(shopping_list, "\\d") # 提取数字 #提取匹配模式的第一个字符串
[1] "4" NA NA "2"
> str_extract(shopping_list, "[a-z]+") #提取字母
[1] "apples" "bag" "bag" "milk"
> #str_extract
> shopping_list <- c("apples 4x4", "bag of flour", "bag of sugar", "milk x2")
> str_extract(shopping_list, "\\d") # 提取数字 #提取匹配模式的第一个字符串
[1] "4" NA NA "2"
> str_extract(shopping_list, "[a-z]+") #提取字母
[1] "apples" "bag" "bag" "milk"
> str_extract_all(shopping_list, "[a-z]+") # 提取所有匹配模式的字母,结果返回一个列表
[[1]]
[1] "apples" "x"
[[2]]
[1] "bag" "of" "flour"
[[3]]
[1] "bag" "of" "sugar"
[[4]]
[1] "milk" "x"
> str_extract_all(shopping_list, "[a-z]+",simplify = T)
[,1] [,2] [,3]
[1,] "apples" "x" ""
[2,] "bag" "of" "flour"
[3,] "bag" "of" "sugar"
[4,] "milk" "x" ""
> str_extract_all(shopping_list, "\\d") # 提取所有匹配模式的数字
[[1]]
[1] "4" "4"
[[2]]
character(0)
[[3]]
character(0)
[[4]]
[1] "2"
> # 提取所有匹配模式的字符串,结果返回一个矩阵,通过simplify = TRUE设置
> str_extract_all(shopping_list, "\\b[a-z]+\\b", simplify = TRUE)
[,1] [,2] [,3]
[1,] "apples" "" ""
[2,] "bag" "of" "flour"
[3,] "bag" "of" "sugar"
[4,] "milk" "" ""
> str_extract_all(shopping_list, "\\d", simplify = TRUE)
[,1] [,2]
[1,] "4" "4"
[2,] "" ""
[3,] "" ""
[4,] "2" ""
14.str_locate 找到匹配的字符串的位置
str_locate()和str_locate_all()的区别在于前者只匹配首次,而后者可以匹配所有可能的值
# 14.str_locate 找到匹配的字符串的位置
> x <- c("abcdef", "ghifjk")
> str_locate(x, "cde")
start end
[1,] 3 5
[2,] NA NA
> str_locate_all(c("abcdefabc", "ghifjkabc"), "abc")
[[1]]
start end
[1,] 1 3
[2,] 7 9
[[2]]
start end
[1,] 7 9
15.str_to_upper/str_to_lower字符串大小写转换
# 15.str_to_upper/str_to_lower字符串大小写转换
> text <- "We love biotree forever"
> str_to_upper(text)
[1] "WE LOVE BIOTREE FOREVER"
> str_to_lower(text)
[1] "we love biotree forever"
> str_to_title(text)
[1] "We Love Biotree Forever"
> str_to_sentence("we love biotree forever")
[1] "We love biotree forever"