6.文本处理和正则表达式

6.1 常见文本处理工具

6.1.1 查看普通文本内容

cat 命令格式

cat [OPTION]... [FILE]...

# 支持标准输入
With no FILE, or when FILE is -, read standard input.

cat 常用选项

# -A, 显示所有不可见字符（-vET）
$ cat -A file.txt 

# -n, 显示行号
$ cat -n file.txt

# -b, 显示非空行号 
$ cat -b file.txt 

# -s, 合并连续的空行
$ cat -s file.txt

cat 读取标准输入

# With no FILE, or when FILE is -, read standard input.

# FILE is -, 读取多行标准输入，重定向到文本
$ cat - > file.txt 
line1 line1 line1 line1
line2 line2 line2 line2
line3 line3 line3 line3
^C

# no FILE, 读取多行标准输入，重定向到文本
$ cat > file.txt <<EOF
> line1 line1 line1 line1
> line2 line2 line2 line2
> line3 line3 line3 line3
> EOF

6.1.2 查看二进制文本内容

hexdump 十六进制查看文本

# 默认查看 1个字符占2位，2个字符为1组
$ hexdump a.txt 
0000000 6261 6463 6665 0a67 4948 4b4a 4d4c 0a4e
0000010 3231 3433 3635 0a37                    
0000018

# -C, hex+ASCII 方式查看
$ hexdump -C a.txt 
00000000  61 62 63 64 65 66 67 0a  48 49 4a 4b 4c 4d 4e 0a  |abcdefg.HIJKLMN.|
00000010  31 32 33 34 35 36 37 0a                           |1234567.|
00000018

# -n <length>, 查看前length个字符
$ hexdump -n 3 a.txt 
0000000 6261 0063                              
0000003

$ hexdump -Cn 3 a.txt 
00000000  61 62 63                                          |abc|
00000003

6.1.3 分页查看

less 分页查看

# 命令格式, 支持标准输入
$ less FILENAME
$ CMD STDOUT | less

# 常用快捷键，与 vim 部分快捷键类似
  j         *  Forward  one line   (or N lines).
  k         *  Backward one line   (or N lines).
  f  ^F     *  Forward  one window (or N lines).
  b  ^B     *  Backward one window (or N lines).
  d  ^D     *  Forward  one half-window (and set half-window to N).
  u  ^U     *  Backward one half-window (and set half-window to N).

  g         *  Go to first line in file (or line N).
  G         *  Go to last line in file (or line N).

  /pattern  *  Search forward for (N-th) matching line (n,下一个  N,上一个).

  q         *  Exit.

6.1.4 查看前后行

head 查看文件前多少行

# 命令格式
head [OPTION]... [FILE]...

# 常用选项
$ seq 20 > file.txt

# -n, --lines=[-]NUM  指定行显示
  # -n NUM, 查看前num行(1-5)
  $ head -n 5 file.txt

  # -n -NUM, 倒数num行结束(1-15)
  $ head -n -5 file.txt 

# -c, --bytes=[-]NUM  指定字符显示
$ head -c 5 file.txt
$ head -c -5 file.txt

# 取12位随即字符
$ cat /dev/urandom | tr -dc '[:alnum:]' | head -c 12

tail 查看文件后多少行

# 命令格式
tail [OPTION]... [FILE]...

# 常用选项
$ seq 20 > file.txt

# -n, --lines=[+]NUM  指定行显示
  # -n NUM, 查看后num行(16-20)
  $ tail -n 5 file.txt
  # -n +NUM, 正数num开始(5-20)
  $ tail -n +5 file.txt

# -c, --bytes=[+]NUM  指定字符显示
$ tail -c 5 file.txt
$ tail -c +5 file.txt

# -f, 实时显示
$ tail -f file.txt

6.1.5 按列截取

cut 按列截取

# 命令格式
cut OPTION... [FILE]...

# -d, --delimiter=DELIM   指定使用DELIM为分割符，默认是TAB
# -f, --fields=LIST    进显示list中的列
# --output-delimiter=STRING  指定显示的分隔符
$ cut -d ':' -f '1-3,7' --output-delimiter="  " /etc/passwd
root  x  0  /bin/bash
daemon  x  1  /usr/sbin/nologin
bin  x  2  /usr/sbin/nologin

6.1.6 合并多个文件

paste 合并文件

# 命令格式
paste [OPTION]... [FILE]...

# 文件准备
snowing@ubuntus:~/tmp$ cat seq.txt 
1
2
3
4
snowing@ubuntus:~/tmp$ cat file.txt 
a
b

# 默认TAB分隔符
snowing@ubuntus:~/tmp$ paste seq.txt file.txt 
1 a
2 b
3 
4 

# -d, --delimiters=LIST  指定分隔符
snowing@ubuntus:~/tmp$ paste -d ':' seq.txt file.txt 
1:a
2:b
3:
4:

# -s, 一个文件合成一行显示
snowing@ubuntus:~/tmp$ paste -s seq.txt file.txt 
1 2 3 4
a b

# 合成一行，并指定分割符
snowing@ubuntus:~/tmp$ paste -s -d '+' seq.txt 
1+2+3+4

6.1.7 分析文本

wc 统计行数

# 命令格式
wc [OPTION]... [FILE]...
# 支持标准输入
With no FILE, or when FILE is -, read standard input.

# 默认显示行数，单词数，字节数
$ wc a.txt 
  27   42 1469 a.txt

# -l, --lines     print the newline counts
$ wc -l a.txt 
27 a.txt

# -w, --words     print the word counts
$ wc -w a.txt 
42 a.txt

# -c, --bytes     print the byte counts
$ wc -c a.txt 
1469 a.txt

# -m, --chars     print the character counts
$ wc -m a.txt 
1469 a.txt

sort 排序

# 命令格式
sort [OPTION]... [FILE]...

# 不加参数，默认以字符顺序升序排列
$ sort a.txt

# -n, --numeric-sort          以数值大小排序
$ sort -n a.txt

# -r, --reverse               降序排列
$ sort -r a.txt

# -h, --human-numeric-sort    按单位大小排列(e.g., 2K 1G)
$ du -sh /data/* | sort -h

# -k, --key=KEYDEF            指定要排序的列
$ sort -k 3 a.txt

# -t, --field-separator=SEP   指定分隔符SEP
$ sort -t ':' a.txt

# 组合使用，以':'为分隔符，对第3列按数值倒序
$ sort -nr -t ':' -k 3 a.txt

uniq 去重

# 命令格式
uniq [OPTION]... [FILE]...

# -c, --count      行首显示重复次数
$ uniq -c a.txt 

# -d, --repeated   只显示重复的行
$ uniq -d a.txt 

# -u, --unique     只显示不重复的行
$ uniq -u a.txt 


# 实际常和sort组合使用，排序后过滤重复的行
$ sort -n a.txt | uniq -c

6.2 正则表达式

扩展正则表达式：ERE，格式可读性更强，推荐使用
基本正则表达式：BRE，? , + , | , () , {} 需要加转义字符

6.2.1 字符匹配

匹配规则（基本正则和扩展正则没有区别）

.   匹配任意一个字符，可以是汉字
[]  匹配括号内任意一个字符
[^] 匹配不在括号内任意一个字符

. [] [^] 均匹配一个字符

$ grep root /etc/passwd
root:x:0:0:root:/root:/bin/bash
operator:x:11:0:operator:/root:/sbin/nologin

#  . 匹配任意单个字符
$ grep r..t /etc/passwd
root:x:0:0:root:/root:/bin/bash
operator:x:11:0:operator:/root:/sbin/nologin
ftp:x:14:50:FTP User:/var/ftp:/sbin/nologin

# []  匹配括号内任意一个字符
[abc] abc中的任意一个字符，或的关系
[0-9] [a-z] [a-zA-Z] 数字 小写字母 大小写字母

# [^] 匹配不在括号内任意一个字符
[^abc]  除了abc的任意一个字符，非

对 [] 内外的点进行匹配
[.] 方框内的点，\.[] 方框外的点, .[] 任意单个字符

# [.] 方框内的点
$ ls /etc | grep "rc[.0-2]"
rc0.d  rc1.d  rc2.d  rc.d  

$ ls /etc | grep "rc[.l]"
rc.d  rcld

# \.[] 方框外的点
$ ls /etc | grep "rc[.0-2]\."
rc0.d  rc1.d  rc2.d

# .[] 任意字符
$ ls /etc | grep "rc.[.d]"
rc0.d  rc1.d  rc2.d  rcld

不常用匹配规则

[:alnum:] 字母和数字
[:alpha:] 字母大小写(A-Z, a-z)
[:lower:] 小写字母,[a-z]
[:upper:] 大写字母
[:blank:] 空白字符（空格和制表符）
[:space:] 水平和垂直的空白字符（比[:blank:]包含的范围广）
[:cntrl:] 不可打印的控制字符（退格、删除、警铃...）
[:digit:] 十进制数字
[:xdigit:]十六进制数字
[:graph:] 可打印的非空白字符
[:print:] 可打印字符
[:punct:] 标点符号

6.2.2 次数匹配

扩展正则次数匹配

# 扩展正则中不需要加转义
*      0次或任意次
?      0或1次
+      至少一次
{m,n}  m次到n次          {n}:n次    {,n}:最多n次   {m,}:最少m次

范例
.* 任意次的任意字符

6.2.3 位置匹配

位置匹配规则

^ 行首
$ 行尾
\< 或 \b 词首
\> 或 \b 词尾

常用位置匹配组合

    ^$          空行
^PATTERN$       只有PATTERN行
\<PATTERN\>     所有PATTERN单词
^[[:space:]]*$  含任意个空白符的行

范例

# 过滤空白行和注释行
$ grep -v -e "^$" -e "^#" /etc/profile

6.2.4 分组匹配和其他匹配

分组匹配规则

# 扩展正则，基本正则中()和 | 需要加转义
()  分组
\1, \2, ... 引用第一组，第二组 ...
|  或者

分组常用组合

a|b     #a或b
C|cat   #C或cat
(C|c)at #Cat或cat

范例

# 过滤空白行和注释行，三种效果一样，第一种更好
grep -E -v "^($|#)" /etc/profile
# grep -E -v "(^$)|(^#)" /etc/profile
# grep -E -v "^$|^#" /etc/profile

# 第四种，^[]表示有一个字符排除空行，^#表示这个字符不是#排除注释
$ grep -E "^[^#]" /etc/profile

# 查找行首行尾单词相同的行
$ grep -E "^(.*)\>.*\<\1$" /etc/passwd
sync:x:5:0:sync:/sbin:/bin/sync
shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
halt:x:7:0:halt:/sbin:/sbin/halt

6.3 行过滤grep

6.3.1 grep基本格式

基本格式

# PATTERN是正则表达式
grep [OPTION] PATTERN [FILE]

常见选项[OPTION]

-v 显示不匹配的行 
-i 忽略字符大小写  
-n 显示行号  
-c 统计匹配的行数  
-o 仅显示匹配到的字符串  
-w 匹配整个单词 
-e 连接多个匹配条件(或者关系)
-C NUM PATTERN, 显示前后各NUM行 
-m NUM 匹配NUM次就不再进行搜索
-E 使用扩展正则表达式,相当于egrep

不常用选项

-q 静默模式，不输出任何信息  
-f file 根据模式文件处理  
-A # after, 显示后#行  
-B # before, 显示前#行  
-r 递归目录(即对子目录文件也使用grep)，但不处理软链接  
-R 递归目录，但处理软链接  
--color=auto 对匹配到的文本着色显示

6.3.2 grep常见选项

-v：显示不匹配的行

# -v sbin 显示不包含sbin的行
grep -v sbin /etc/passwd

-i：忽略大小写

# grep -i Root /etc/passwd
root:x:0:0:root:/root:/bin/bash
operator:x:11:0:operator:/root:/sbin/nologin

-n：显示行号

# grep -n root /etc/passwd
1:root:x:0:0:root:/root:/bin/bash
10:operator:x:11:0:operator:/root:/sbin/nologin

-c：统计匹配的行数
```
# grep -c root /etc/passwd
2
```
-o：仅显示匹配到的字符串
```
# grep -o root /etc/passwd
root
root
```

-w：匹配整个单词

# grep -w operator /etc/passwd
operator:x:11:0:operator:/root:/sbin/nologin

-e：连接多个匹配条件(或者关系)

# grep -e root -e sync /etc/passwd
root:x:0:0:root:/root:/bin/bash
sync:x:5:0:sync:/sbin:/bin/sync
operator:x:11:0:operator:/root:/sbin/nologin

-C NUM PATTERN,显示前后各NUM行

# grep -C 1 root /etc/passwd
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
--
mail:x:8:12:mail:/var/spool/mail:/sbin/nologin
operator:x:11:0:operator:/root:/sbin/nologin
games:x:12:100:games:/usr/games:/sbin/nologin

-m NUM：匹配NUM次就不再进行搜索

# grep root /etc/passwd
root:x:0:0:root:/root:/bin/bash
operator:x:11:0:operator:/root:/sbin/nologin

# grep -m 1 root /etc/passwd
root:x:0:0:root:/root:/bin/bash

6.3.3 grep实际应用

实际应用样例

# 取分区利用率最大值
$ df | grep "/dev/sd" | tr -s ' ' % | cut -d "%" -f 5|sort -n |head -1

# 查询连接次数最多的
$ ss -nt | grep "^ESTAB" | tr -s ' ' :|cut -d ':' -f 6|sort | uniq -c | sort -nr | head -1
      2 172.32.115.1

4. 文本处理三剑客

4.2 行操作sed

基本格式：

sed [OPTION] '[script1][script2]' [input-file]

其中[script1]用于指定范围，[script2]为操作

4.2.1 常见选项

[OPTION]

-n 不输出模式空间内容到屏幕，即不自动打印
-e 多点编辑
-f /PATH/SCRIPT_FILE 从指定文件中读取编辑脚本
-r, -E 使用扩展正则表达式
-i.bak 备份文件并原处编辑

范例：sed ''打印和 '[script1][p]'打印

# 可以认为sed中有两种打印的方法：sed '' 和 '[script1][p]'打印
# sed ''，不加任何参数，全文打印
[root@centos ~]# sed '' /etc/passwd
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
# '[script1][p]'打印，[script1]不指定，全文打印
[root@centos ~]# sed -n 'p' /etc/passwd
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
# sed '' 和 '[script1][p]' 都使用时会造成重复打印
[root@centos ~]# sed 'p' /etc/passwd
root:x:0:0:root:/root:/bin/bash
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin


# '[script1][p]'，仅打印[script1]指定的行，不关闭sed ''打印
[root@centos ~]# sed '1p' /etc/passwd
root:x:0:0:root:/root:/bin/bash
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
# '[script1][p]'，仅打印[script1]指定的行，关闭sed ''打印
[root@centos ~]# sed -n '1p' /etc/passwd
root:x:0:0:root:/root:/bin/bash

4.2.2 [script1]：地址范围

[][script2]：不指定地址，全文操作

# '[script1][p]'打印，[script1]不指定，全文打印
[root@centos ~]# sed -n 'p' /etc/passwd
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin

[N][script2]：单行指定，单行操作

[root@centos ~]# sed -n '1p' /etc/passwd
root:x:0:0:root:/root:/bin/bash

[root@centos ~]# sed -n '3p' /etc/passwd
daemon:x:2:2:daemon:/sbin:/sbin/nologin

# $表示最后一行
[root@centos ~]# sed -n '$p' /etc/passwd
demo:x:1001:501::/home/zk:/bin/bash

[/PATTERN/][script2]：正则匹配，操作匹配/PATTERN/的行

[root@centos ~]# sed -n '/root/p' /etc/passwd
root:x:0:0:root:/root:/bin/bash
operator:x:11:0:operator:/root:/sbin/nologin

[root@centos ~]# sed -n '/^root/p' /etc/passwd
root:x:0:0:root:/root:/bin/bash

[m,n][script2]：范围匹配，操作[m,n]范围的行

# [m,n]
[root@centos ~]# sed -n '1,3p' /etc/passwd
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin

# [m,+n]
[root@centos ~]# sed -n '1,+1p' /etc/passwd
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin

[/PATTERN1/,/PATTERN2/][script2]：正则范围匹配，操作[/PATTERN1/,/PATTERN2/]范围的行

注意：可用于查找指定时间段的日志，要避免因为/PATTERN2/不存在而导致全文操作

[root@centos ~]# sed -n '/^root/,/^daemon/p' /etc/passwd
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin

# 查找指定时间段的日志
[root@centos ~]# sed -n '/2020-06-20/,/2020-06-21/p' /data/access.log

# 造成全文操作的分析
# /^root/匹配时开始，/^aaa/匹配时停止
# /^root/匹配后，下一行不是/^aaa/，不停止，一直操作到文本结束
[root@centos ~]# sed -n '/^root/,/^aaa/p' /etc/passwd
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin

[m~n][script2]：步长匹配，m为起始行，n为指定步长

# [1~2] 操作奇数行
[root@centos ~]# sed -n '1~2p' /etc/passwd
root:x:0:0:root:/root:/bin/bash
daemon:x:2:2:daemon:/sbin:/sbin/nologin
lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin

# [2~2] 操作偶数行
[root@centos ~]# sed -n '2~2p' /etc/passwd
bin:x:1:1:bin:/bin:/sbin/nologin
adm:x:3:4:adm:/var/adm:/sbin/nologin

4.2.3 [script2]：操作命令

'[p]'：打印匹配的行

[root@centos ~]# sed  '1p' file.txt
num1=20
num1=20
num2=18
num3=33

'[d]'：删除指定的行(不指定参数sed -i时，只是内存删除，不操作源文件)

# 内存删除指定行,不指定-i
[root@centos ~]# sed '1d' file.txt
num2=18
num3=33
# 源文件不变
[root@centos ~]# cat file.txt
num1=20
num2=18
num3=33

'[a]' \STRING ：在下一行添加STRING，支持使用\n实现多行追加
'[i]' \STRING ：在上一行添加STRING
'[c]' \STRING ：在当前行替换为STRING

# a后面的\的规则
# \  表示光标移动到第一行，之后的\n表示光标移动到下一行
# \\  表示第一行为空行
# \\n 表示第一行为空行，第二行为空行
# \STRING         表示第一行为STRING
# \STRING\n       表示第一行为STRING，且光标移动到第二行，第二行为空行
# \STRING\nSTRING 表示第一行为STRING，且光标移动到第二行，第二行为STRING


# \\ 表示第一行为空行
[root@centos ~]# sed '/^root/a\\' /etc/passwd
root:x:0:0:root:/root:/bin/bash
空行空行空行空行空行空行空行空行空行
bin:x:1:1:bin:/bin:/sbin/nologin

# \STRING 表示第一行为STRING
[root@centos ~]# sed '/^root/a\STRING' /etc/passwd
root:x:0:0:root:/root:/bin/bash
STRING
bin:x:1:1:bin:/bin:/sbin/nologin

# \STRING\n 表示第一行为STRING，且光标移动到第二行
[root@centos ~]# sed '/^root/a\STRING\n' /etc/passwd
root:x:0:0:root:/root:/bin/bash
STRING
空行空行空行空行空行空行空行空行空行
bin:x:1:1:bin:/bin:/sbin/nologin

# \STRING\n表示第一行为STRING第二行为n
[root@centos ~]# sed '/^root/a\STRING\nSTRING' /etc/passwd
root:x:0:0:root:/root:/bin/bash
STRING
STRING
bin:x:1:1:bin:/bin:/sbin/nologin

[root@centos ~]# sed '/^root/a\\n' /etc/passwd
root:x:0:0:root:/root:/bin/bash
空行空行空行空行空行空行空行空行空行
空行空行空行空行空行空行空行空行空行
bin:x:1:1:bin:/bin:/sbin/nologin

[w] /PATH/FILENAME：匹配结果保存到指定文件FILENAME中

[root@centos ~]# sed -n '/^root/p' /etc/passwd
root:x:0:0:root:/root:/bin/bash

[root@centos ~]# sed -n '/^root/w demo.txt' /etc/passwd

[root@centos ~]# cat demo.txt
root:x:0:0:root:/root:/bin/bash

[r] /PATH/FILENAME：读取FILENAME中的文件内容到匹配结果的下一行

[root@centos ~]# sed  '/^root/r demo.txt' /etc/passwd
root:x:0:0:root:/root:/bin/bash
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin

[=]：显示行号

# 不实用，行号显示不在同一行
[root@centos ~]# sed '/^root/=' /etc/passwd
1
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin

[!script2]：不匹配的行

# 显示所有非#开头的行
[root@centos ~]# sed -n '/^#/!p' /root/.bash_profile

[s]/PATTERN/STRING/：搜索替代

# 每行首个匹配替换
[root@centos ~]# sed -n 's/root/admin/p' passwd
admin:x:0:0:root:/root:/bin/bash
operator:x:11:0:operator:/admin:/sbin/nologin

# 全局替换  '[s]/PATTERN/STRING/[g]'
[root@centos ~]# sed -n 's/root/admin/gp' passwd
admin:x:0:0:admin:/admin:/bin/bash
operator:x:11:0:operator:/admin:/sbin/nologin

# 匹配行替换  '[script1][s]/PATTERN/STRING/'
[root@centos ~]# sed -nr '/^r..t/s/r..t/SSSSS/gp' passwd
SSSSS:x:0:0:SSSSS:/SSSSS:/bin/bash
[root@centos ~]# grep -n 'r..t' passwd
1:root:x:0:0:root:/root:/bin/bash
10:operator:x:11:0:operator:/root:/sbin/nologin
12:ftp:x:14:50:FTP User:/var/ftp:/sbin/nologin


# 支持分组与后向引用， 注意使用扩展正则参数-r
[root@centos ~]# sed -nr 's/(r..t)/\1ER/gp' passwd
rootER:x:0:0:rootER:/rootER:/bin/bash
operator:x:11:0:operator:/rootER:/sbin/nologin
ftp:x:14:50:FTP User:/var/ftERp:/sbin/nologin

范例：取IP

[root@centos ~]# ifconfig enp0s8 | sed -nr '2s/.*inet (.*) netmask.*/\1/p'
172.32.115.11

范例：修改网卡名称

#查看配置
[root@centos ~]# sed -nr '/^GRUB_CMDLINE_LINUX=/p' /etc/default/grub
GRUB_CMDLINE_LINUX="rd.lvm.lv=centos/root rd.lvm.lv=centos/swap rhgb quiet"
#修改配置，-nr换未-ri.bak
[root@centos ~]# sed -nr '/^GRUB_CMDLINE_LINUX=/s/"$/ net.ifnames=0"/p' /etc/default/grub
GRUB_CMDLINE_LINUX="rd.lvm.lv=centos/root rd.lvm.lv=centos/swap rhgb quiet net.ifnames=0"
# 加载配置
[root@centos ~]# grub2-mkconfig -o /boot/grub2/grub.cfg
# 重启生效

范例：非#开头的行加#，删除所有#行的#

# 查看原文件
[root@centos ~]# cat /etc/fstab

#
# /etc/fstab
# Created by anaconda on Tue Aug  4 18:50:45 2020
#
# Accessible filesystems, by reference, are maintained under '/dev/disk'
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info
#
/dev/mapper/centos-root /                       xfs     defaults        0 0
UUID=1966b747-5b79-47de-b12b-6102f16f7764 /boot                   xfs     defaults        0 0
/dev/mapper/centos-swap swap                    swap    defaults        0 0

# 加#方式一，会漏掉空行，即空行加不上#
[root@centos ~]# sed -rn 's/^[^#](.*)/#\1/p' /etc/fstab
#dev/mapper/centos-root /                       xfs     defaults        0 0
#UID=1966b747-5b79-47de-b12b-6102f16f7764 /boot                   xfs     defaults        0 0
#dev/mapper/centos-swap swap                    swap    defaults        0 0

# 加#方式一，空行也会加上#
[root@centos ~]# sed -rn '/^#/!s@^@#@p' /etc/fstab
#
#/dev/mapper/centos-root /                       xfs     defaults        0 0
#UUID=1966b747-5b79-47de-b12b-6102f16f7764 /boot                   xfs     defaults        0 0
#/dev/mapper/centos-swap swap                    swap    defaults        0 0





# 删除所有#行的#
[root@centos ~]# sed -rn '/^#/s/^#//p' /etc/fstab

 /etc/fstab
 Created by anaconda on Tue Aug  4 18:50:45 2020

 Accessible filesystems, by reference, are maintained under '/dev/disk'
 See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info

[root@centos ~]#

范例：取目录名与基名

# 取目录名
[root@centos ~]# echo /etc/sysconfig/ | sed -rn 's#(.*)/([^/]+)/?#\1#p'
/etc

# 取基名
[root@centos ~]# echo /etc/sysconfig/ | sed -rn 's#(.*)/([^/]+)/?#\2#p'
sysconfig

范例：引用变量生成随机文件名

# 注意使用双引号
[root@centos ~]# echo|sed "s/^/$RANDOM.log/"
24700.log
[root@centos ~]# echo|sed "s/^/$RANDOM.log/"
1994.log
# 或者使用三个单引号
[root@centos ~]# echo|sed 's/^/'''$RANDOM'''.log/'
10244.log

4.2.4 高级用法

常见参数

P 打印模式空间开端至\n内容，并追加到默认输出之前
h 把模式空间中的内容覆盖至保持空间中
H 把模式空间中的内容追加至保持空间中
g 从保持空间取出数据覆盖至模式空间
G 从保持空间取出内容追加至模式空间
x 把模式空间中的内容与保持空间中的内容进行互换
n 读取匹配到的行的下一行覆盖至模式空间
N 读取匹配到的行的下一行追加至模式空间
d 删除模式空间中的行
D 如果模式空间包含换行符，则删除直到第一个换行符的模式空间中的文本，并不会读取新的输入行，而使用合成的模式空间重新启动循环。如果模式空间不包含换行符，则会像发出d命令那样启动正常的新循环

范例

sed -n 'n;p' FILE
sed '1!G;h;$!d' FILE
sed ‘N;D’FILE
seq 10 |sed '3h;9G;9!d'
sed '$!N;$!D' FILE
sed '$!d' FILE
sed ‘G’ FILE
sed ‘g’ FILE
sed ‘/^$/d;G’ FILE
sed 'n;d' FILE
sed -n '1!G;h;$p' FILE

练习

1、删除centos7系统/etc/grub2.cfg文件中所有以空白开头的行行首的空白字符
2、删除/etc/fstab文件中所有以#开头，后面至少跟一个空白字符的行的行首的#和空白字符
3、在centos6系统/root/install.log每一行行首增加#号
4、在/etc/fstab文件中不以#开头的行的行首增加#号
5、处理/etc/fstab路径,使用sed命令取出其目录名和基名
6、利用sed 取出ifconfig命令中本机的IPv4地址
7、统计centos安装光盘中Package目录下的所有rpm文件的以.分隔倒数第二个字段的重复次数
8、统计/etc/init.d/functions文件中每个单词的出现次数，并排序（用grep和sed两种方法分别实现）
9、将文本文件的n和n+1行合并为一行，n为奇数行

posted @ 2022-08-31 00:27 人间丶迷走阅读(111) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

人间丶迷走

哈哈

6.文本处理和正则表达式

6.文本处理和正则表达式

6.1 常见文本处理工具

6.1.1 查看普通文本内容

6.1.2 查看二进制文本内容

6.1.3 分页查看

6.1.4 查看前后行

6.1.5 按列截取

6.1.6 合并多个文件

6.1.7 分析文本

6.2 正则表达式

6.2.1 字符匹配

6.2.2 次数匹配

6.2.3 位置匹配

6.2.4 分组匹配和其他匹配

6.3 行过滤grep

6.3.1 grep基本格式

6.3.2 grep常见选项

6.3.3 grep实际应用

4. 文本处理三剑客

4.2 行操作sed

4.2.1 常见选项

4.2.2 [script1]：地址范围

4.2.3 [script2]：操作命令

4.2.4 高级用法