在R语言中使用Stringr进行字符串操作
今天来学习下R中字符串处理操作,主要是stringr包中的字符串处理函数的用法。
先导入stringr包,library(stringr),require(stringr),或者stringr::函数名;这几种方式都行。
一、检测是否匹配
我们先定义一个字符串和变量,在此基础上演示各个函数基本用法。
1 library(stringr) 2 animal<-c("cow","dog","sheep","goat","pig","monkey","cat","cat") 3 str1<-"I love cat, cat cat !" 4 str2<-"lovelovelove" 5 6 str_detect(animal,"cow") #匹配到指定字符串返回True,否则返回False 7 [1] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 8 9 str_detect(str1,"love") 10 [1] TRUE 11 12 str_which(animal,"dog") #返回指定字符串位置 13 [1] 2 14 15 str_which(animal,"cat") 16 [1] 7 8 17 18 str_which(str2,"love") #连续重复字符只返回第一个 19 [1] 1 20 21 str_count(animal,"cat") #返回匹配次数 22 [1] 0 0 0 0 0 0 1 1 23 24 str_count(str1,"cat") 25 [1] 3 26 27 str_locate(animal,"cat") #返回匹配起始位置 28 start end 29 [1,] NA NA 30 [2,] NA NA 31 [3,] NA NA 32 [4,] NA NA 33 [5,] NA NA 34 [6,] NA NA 35 [7,] 1 3 36 [8,] 1 3 37 38 str_locate(str1,"cat") 39 start end 40 [1,] 8 10 41 42 str_locate(str2,"love") #连续重复字符值返回第一个字符起始位置 43 start end 44 [1,] 1 4 45
二、子串提取
1 str_sub(str1,1,3) # 后面两个参数为起始,结束位置 2 [1] "I l" 3 4 str_sub(str1,1) # 可以只跟起始位置,默认到结束位置 5 [1] "I love cat, cat cat !" 6 7 str_sub(str1,3) 8 [1] "love cat, cat cat !" 9 10 str_sub(str1,-5) #位置还可以为负数 11 [1] "cat !" 12 13 str_sub(str1,-5,-1) 14 [1] "cat !" 15 16 str_subset(str1,"a") #匹配到指定字符就返回整个字符串 17 [1] "I love cat, cat cat !" 18 19 str_subset(str1,"x") #匹配不到则返回空 20 character(0) 21 22 str_extract(str1,"cat") #返回第一个匹配到字符串 23 [1] "cat" 24 str_extract(str1,"ca") 25 [1] "ca" 26 27 str_extract_all(str1,"cat") #返回所有匹配到字符串 列表形式返回 28 [[1]] 29 [1] "cat" "cat" "cat" 30 31 str_extract_all(str1,"[aoe]") #返回所有匹配到字符串 列表形式返回 32 [[1]] 33 [1] "o" "e" "a" "a" "a" 34 35 str_match(str1,"cat") #返回第一个匹配到字符串 矩阵形式返回 36 [,1] 37 [1,] "cat" 38 39 str_match_all(str1,"cat") #返回所有匹配到字符串 矩阵形式返回 40 [[1]] 41 [,1] 42 [1,] "cat" 43 [2,] "cat" 44 [3,] "cat" 45 46 str_match_all(str2,"love") 47 [[1]] 48 [,1] 49 [1,] "love" 50 [2,] "love" 51 [3,] "love" 52 53 str_match(str2,"love") 54 [,1] 55 [1,] "love" 56 57 str_match_all(str1,"(I|cat)") #可以多个匹配,不过这个返回结果我没看懂 58 [[1]] 59 [,1] [,2] 60 [1,] "I" "I" 61 [2,] "cat" "cat" 62 [3,] "cat" "cat" 63 [4,] "cat" "cat"
三、字符串长度处理
1 str_length(str2) # 返回字符串长度 2 [1] 12 3 4 str_length("good job !") # 空格也算一个字符长度 5 [1] 10 6 7 str_trunc(str2,4) #指定字符串替换成替他字符, 8 [1] "l..." 9 10 str_trunc(str2,4,ellipsis = "*") #ellipsis 指定替换符 11 [1] "lov*" 12 13 str_trunc(str2,width = 8,ellipsis = "#") #width指定长度,此处指前8个字符 14 [1] "lovelov#" 15 16 str_trunc(str2,width = 8,side = c("left"),ellipsis = "#") # side指定方向(right,center,left) 17 [1] "#ovelove" 18 19 str_trim("sssss\n") # 去掉字符串首尾空字符,换行,空格等;字符串内部空字符无法去除 20 [1] "sssss" 21 str_trim(" sssss\n") 22 [1] "sssss"
四、字符串替换
1 str1 2 [1] "I love cat, cat cat !" 3 4 str_sub(str1,1,6) #提取子串 5 [1] "I love" 6 7 str_sub(str1,1,6)<-"she love" #子串替换 8 str1 9 [1] "she love cat, cat cat !" 10 11 str_sub(animal,1,1)<-"F" #向量替换也可以 12 animal 13 [1] "Fow" "Fog" "Fheep" "Foat" "Fig" "Fonkey" "Fat" 14 [8] "Fat" 15 16 str1<-"I love cat, cat cat !" 17 18 str_replace(str1,"cat","dog") #替换第一个匹配项 19 [1] "I love dog, cat cat !" 20 21 str_replace_all(str1,"cat","dog") # 替换所有匹配项 22 [1] "I love dog, dog dog !" 23 24 str_to_lower(str1) # 全部转为小写字母 25 [1] "i love cat, cat cat !" 26 27 str_to_upper(str1) # 全部转为大写字母 28 [1] "I LOVE CAT, CAT CAT !" 29 30 str_to_title(str1) # 单词首字母转为大写 31 [1] "I Love Cat, Cat Cat !" 32 33 str_to_title(str2) 34 [1] "Lovelovelove" 35
五、字符串分割和连接
1 str_c(str1,str2,sep="+") # 字符串连接 2 [1] "I love cat, cat cat !+lovelovelove" 3 4 str_c(animal,str2,sep="+") #向量一次连接字符串 5 [1] "Fow+lovelovelove" "Fog+lovelovelove" "Fheep+lovelovelove" 6 [4] "Foat+lovelovelove" "Fig+lovelovelove" "Fonkey+lovelovelove" 7 [7] "Fat+lovelovelove" "Fat+lovelovelove" 8 9 str_c(animal,sep="",collapse = "+") # 向量字符串连接 10 [1] "Fow+Fog+Fheep+Foat+Fig+Fonkey+Fat+Fat" 11 12 str_dup(str1,2) #字符串重复,数字代表次数 13 [1] "I love cat, cat cat !I love cat, cat cat !" 14 str_dup(str2,3) 15 [1] "lovelovelovelovelovelovelovelovelove" 16 17 str_split_fixed(animal,"",n=2) #分割字符串,分隔符,n=分割份数,返回矩阵 18 [,1] [,2] 19 [1,] "F" "ow" 20 [2,] "F" "og" 21 [3,] "F" "heep" 22 [4,] "F" "oat" 23 [5,] "F" "ig" 24 [6,] "F" "onkey" 25 [7,] "F" "at" 26 [8,] "F" "at" 27 28 str_split_fixed(str2,"",n=4) 29 [,1] [,2] [,3] [,4] 30 [1,] "l" "o" "v" "elovelove" 31 32 str_split(str2,"",4) # #分割字符串,分隔符,n=分割份数,返回列表 33 [[1]] 34 [1] "l" "o" "v" "elovelove" 35 36 str_glue("pi is {str1}") # 字符串连接变量,{}花括号内是系统变量 37 pi is I love cat, cat cat ! 38 39 str_glue("pi is {pi}") 40 pi is 3.14159265358979 41 42 str_glue("log2(8) is {log2(8)}") 43 log2(8) is 3 44 45 str_glue_data(mtcars, "{rownames(mtcars)} has {hp} hp") #数据框或列表对应行连接字符串 46 Mazda RX4 has 110 hp 47 Mazda RX4 Wag has 110 hp 48 Datsun 710 has 93 hp 49 Hornet 4 Drive has 110 hp 50 Hornet Sportabout has 175 hp 51 Valiant has 105 hp 52 53 str_glue_data(mtcars, "{rownames(mtcars)} has {hp*1000} hp") # 话可以做相应计算 54 Mazda RX4 has 110000 hp 55 Mazda RX4 Wag has 110000 hp 56 Datsun 710 has 93000 hp 57 Hornet 4 Drive has 110000 hp 58 59 str_glue_data(mtcars, "{rownames(mtcars)} has {substr(wt,1,2)} wt") # 子串分割 60 Mazda RX4 has 2. wt 61 Mazda RX4 Wag has 2. wt 62 Datsun 710 has 2. wt 63 Hornet 4 Drive has 3. wt
六、字符串排序
1 str2 2 [1] "lovelovelove" 3 str_order(str2,decreasing = T) # 返回字符串下标 4 [1] 1 5 6 animal 7 [1] "Fow" "Fog" "Fheep" "Foat" "Fig" "Fonkey" "Fat" 8 [8] "Fat" 9 animal[str_order(animal,decreasing = T)] 10 [1] "Fow" "Fonkey" "Fog" "Foat" "Fig" "Fheep" "Fat" 11 [8] "Fat" 12 13 animal 14 [1] "Fow" "Fog" "Fheep" "Foat" "Fig" "Fonkey" "Fat" 15 [8] "Fat" 16 str_sort(animal) #直接对向量字符串排序 17 [1] "Fat" "Fat" "Fheep" "Fig" "Foat" "Fog" "Fonkey" 18 [8] "Fow" 19
作者:天使不设防
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利.