详细grep、sed、awk
[root@VM_0_7_centos tmp]# cat 1.txt 1 2 3 4 5 6 [root@VM_0_7_centos tmp]# cat 2.txt 4 5 6 7 8 [root@VM_0_7_centos tmp]# grep -f 1.txt 2.txt 4 5 6 [root@VM_0_7_centos tmp]# grep -f -v 1.txt 2.txt grep: -v: No such file or directory [root@VM_0_7_centos tmp]# grep -v -f 1.txt 2.txt 7 8
-e匹配多个模式,-v反转输出,-f匹配多个文件中相同的行
[root@VM_0_7_centos tmp]# echo "11 22 33 44 55" |xargs -n1 | grep -e "11" 11 [root@VM_0_7_centos tmp]# echo "11 22 33 44 55" |xargs -n1 | grep -e "11" -e "22" 11 22
-i忽略大小写
[root@VM_0_7_centos tmp]# echo "AA aa bb " | xargs -n1 | egrep -i "aa" AA aa
-o只输出匹配内容
[root@VM_0_7_centos tmp]# ifconfig eth0 Link encap:Ethernet HWaddr 52:54:00:BE:3B:97 inet addr:172.17.0.7 Bcast:172.17.15.255 Mask:255.255.240.0 UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 RX packets:9272287 errors:0 dropped:0 overruns:0 frame:0 TX packets:9005522 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:1614323123 (1.5 GiB) TX bytes:2170651985 (2.0 GiB) lo Link encap:Local Loopback inet addr:127.0.0.1 Mask:255.0.0.0 UP LOOPBACK RUNNING MTU:65536 Metric:1 RX packets:194402 errors:0 dropped:0 overruns:0 frame:0 TX packets:194402 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:213430544 (203.5 MiB) TX bytes:213430544 (203.5 MiB) [root@VM_0_7_centos tmp]# ifconfig | egrep "172.17.0.7" inet addr:172.17.0.7 Bcast:172.17.15.255 Mask:255.255.240.0 [root@VM_0_7_centos tmp]# ifconfig | egrep -o "172.17.0.7" 172.17.0.7
sed
流编辑器,过滤和替换文本。
工作原理:sed命令将当前处理的行读入模式空间进行处理,处理完把结果输出,并清空模式空间。然后再将下一行读入模式空间进行处理输出,以此类推,直至最后一行。
用法:sed [OPTION]... {script-only-if-no-other-script} [input-file]...
sed [选项] '地址 命令' file
选项 | 描述 |
-n | 不打印模式空间 |
-e | 执行脚本-表达式来处理 |
-f | 脚本文件的内容添加到命令被执行 |
-i | 修改源文件 |
-r | 使用扩展正则表达式 |
命令 | m描述 |
s/regexp/replacement/ | t替换字符串 |
p | d打印当前模式空间 |
P | d打印模式空间的第一行 |
d | s删除模式空间,并且开始下一个循环 |
D | s删除模式空间的第一行,开始下一个循环 |
= | d打印当前行号 |
a \text | d当前行追加文本 |
i \text | d当前行上面插入文本 |
c \text | s所选行替换文本 |
q | l立即退出sed脚本 |
r | z追加文本来自文件 |
地址 | m描述 |
first~step | b步长,每step行,从first开始 |
$ | p匹配最后一行 |
/regexp/ | z正则表达式匹配行 |
number | z只匹配制定行 |
addr1、addr2 | k开始匹配addr1开始,直接addr2行结束 |
addr1,+N | c从addr1行开始,向后的N行 |
addr1,~N | c从addr1行开始,到N行结束 |
[root@VM_0_7_centos tmp]# tail /etc/services nimgtw 48003/udp # Nimbus Gateway 3gpp-cbsp 48049/tcp # 3GPP Cell Broadcast Service Protocol isnetserv 48128/tcp # Image Systems Network Services isnetserv 48128/udp # Image Systems Network Services blp5 48129/tcp # Bloomberg locator blp5 48129/udp # Bloomberg locator com-bardac-dw 48556/tcp # com-bardac-dw com-bardac-dw 48556/udp # com-bardac-dw iqobject 48619/tcp # iqobject iqobject 48619/udp # iqobject
[root@VM_0_7_centos tmp]# tail /etc/services | sed -n "/blp5/p" 匹配第一行 blp5 48129/tcp # Bloomberg locator blp5 48129/udp # Bloomberg locator
[root@VM_0_7_centos tmp]# tail /etc/services | sed -n "1p" 匹配第二行 nimgtw 48003/udp # Nimbus Gateway [root@VM_0_7_centos tmp]# tail /etc/services | sed -n "2p" 3gpp-cbsp 48049/tcp # 3GPP Cell Broadcast Service Protocol
[root@VM_0_7_centos tmp]# tail /etc/services | sed -n "1,3p" 匹配1-3行 nimgtw 48003/udp # Nimbus Gateway 3gpp-cbsp 48049/tcp # 3GPP Cell Broadcast Service Protocol isnetserv 48128/tcp # Image Systems Network Services
[root@VM_0_7_centos tmp]# seq 10 | sed -n "1~2p" 从1开始,每次跳过一行 1 3 5 7 9
[root@VM_0_7_centos tmp]# tail /etc/services | sed -n "$p" [root@VM_0_7_centos tmp]# tail /etc/services | sed -n '$p' 打印最后一行 iqobject 48619/udp # iqobject
[root@VM_0_7_centos tmp]# tail /etc/services | sed -n '$!p' 不打印最后一行 nimgtw 48003/udp # Nimbus Gateway 3gpp-cbsp 48049/tcp # 3GPP Cell Broadcast Service Protocol isnetserv 48128/tcp # Image Systems Network Services isnetserv 48128/udp # Image Systems Network Services blp5 48129/tcp # Bloomberg locator blp5 48129/udp # Bloomberg locator com-bardac-dw 48556/tcp # com-bardac-dw com-bardac-dw 48556/udp # com-bardac-dw iqobject 48619/tcp # iqobject
[root@VM_0_7_centos tmp]# tail /etc/services | sed -n '/^blp5/,/^com/p' 匹配从blp5到com开头(第一个)的 blp5 48129/tcp # Bloomberg locator blp5 48129/udp # Bloomberg locator com-bardac-dw 48556/tcp # com-bardac-dw
[root@VM_0_7_centos tmp]# tail services | sed '/blp5/d' 删除blp5行,不需要加-n nimgtw 48003/udp # Nimbus Gateway 3gpp-cbsp 48049/tcp # 3GPP Cell Broadcast Service Protocol isnetserv 48128/tcp # Image Systems Network Services isnetserv 48128/udp # Image Systems Network Services com-bardac-dw 48556/tcp # com-bardac-dw com-bardac-dw 48556/udp # com-bardac-dw iqobject 48619/tcp # iqobject iqobject 48619/udp # iqobject
[root@VM_0_7_centos tmp]# tail services | sed 's/blp5/test/' 将blp5替换为test,(没有加全局就只修改第一个匹配到的字符串) nimgtw 48003/udp # Nimbus Gateway 3gpp-cbsp 48049/tcp # 3GPP Cell Broadcast Service Protocol isnetserv 48128/tcp # Image Systems Network Services isnetserv 48128/udp # Image Systems Network Services test 48129/tcp # Bloomberg locator test 48129/udp # Bloomberg locator com-bardac-dw 48556/tcp # com-bardac-dw com-bardac-dw 48556/udp # com-bardac-dw iqobject 48619/tcp # iqobject iqobject 48619/udp # iqobject [root@VM_0_7_centos tmp]# tail services | sed 's/blp5/test/g' 替换所有的blp5为test nimgtw 48003/udp # Nimbus Gateway 3gpp-cbsp 48049/tcp # 3GPP Cell Broadcast Service Protocol isnetserv 48128/tcp # Image Systems Network Services isnetserv 48128/udp # Image Systems Network Services test 48129/tcp # Bloomberg locator test 48129/udp # Bloomberg locator com-bardac-dw 48556/tcp # com-bardac-dw com-bardac-dw 48556/udp # com-bardac-dw iqobject 48619/tcp # iqobject iqobject 48619/udp # iqobject
[root@VM_0_7_centos tmp]# tail services |sed '1,5s/blp5/test/g' 替换1-5行的blp5为test
nimgtw 48003/udp # Nimbus Gateway 3gpp-cbsp 48049/tcp # 3GPP Cell Broadcast Service Protocol isnetserv 48128/tcp # Image Systems Network Services isnetserv 48128/udp # Image Systems Network Services test 48129/tcp # Bloomberg locator blp5 48129/udp # Bloomberg locator com-bardac-dw 48556/tcp # com-bardac-dw com-bardac-dw 48556/udp # com-bardac-dw iqobject 48619/tcp # iqobject iqobject 48619/udp # iqobject
[root@VM_0_7_centos tmp]# tail services | sed 's/nimgtw/test/;s/udp/ll/' 对前面匹配到结果中的内容再进行匹配 test 48003/ll # Nimbus Gateway 3gpp-cbsp 48049/tcp # 3GPP Cell Broadcast Service Protocol isnetserv 48128/tcp # Image Systems Network Services isnetserv 48128/ll # Image Systems Network Services blp5 48129/tcp # Bloomberg locator blp5 48129/ll # Bloomberg locator com-bardac-dw 48556/tcp # com-bardac-dw com-bardac-dw 48556/ll # com-bardac-dw iqobject 48619/tcp # iqobject iqobject 48619/ll # iqobject
[root@VM_0_7_centos tmp]# tail services | sed -r 's/(.*) ([0-9]+.*)(#.*)/\1/' nimgtw 3gpp-cbsp isnetserv isnetserv blp5 blp5 com-bardac-dw com-bardac-dw iqobject iqobject [root@VM_0_7_centos tmp]# tail services | sed -r 's/(.*) ([0-9]+.*)(#.*)/\1\2/' nimgtw 48003/udp 3gpp-cbsp 48049/tcp isnetserv 48128/tcp isnetserv 48128/udp blp5 48129/tcp blp5 48129/udp com-bardac-dw 48556/tcp com-bardac-dw 48556/udp iqobject 48619/tcp iqobject 48619/udp [root@VM_0_7_centos tmp]# tail services | sed -r 's/(.*) ([0-9]+.*)(#.*)/\1\2\3/' nimgtw 48003/udp # Nimbus Gateway 3gpp-cbsp 48049/tcp # 3GPP Cell Broadcast Service Protocol isnetserv 48128/tcp # Image Systems Network Services isnetserv 48128/udp # Image Systems Network Services blp5 48129/tcp # Bloomberg locator blp5 48129/udp # Bloomberg locator com-bardac-dw 48556/tcp # com-bardac-dw com-bardac-dw 48556/udp # com-bardac-dw iqobject 48619/tcp # iqobject iqobject 48619/udp # iqobject
[root@VM_0_7_centos tmp]# seq 10 |sed '/5/,+3s/^/#/' 加注释 1 2 3 4 #5 #6 #7 #8 9 10
[root@VM_0_7_centos tmp]# seq 10 | sed '5s/.*/&\ntest/' &的作用是取前面所匹配到的结果 1 2 3 4 5 test 6 7 8 9 10 [root@VM_0_7_centos tmp]# seq 10 | sed '5s/.*/test\n&/' 1 2 3 4 test 5 6 7 8 9 10
[root@VM_0_7_centos tmp]# seq 10 |sed 'n;n;s/^/ll/' 1 2 ll3 4 5 ll6 7 8 ll9 10
awk
awk是一个处理文本的编程语言工具,能用简短的程序处理输入文件、数据排序、计算以及生产报表等等。
在Linux系统下默认awk是gawk,它是awk的GUN版本。可以通过命令查看应用的版本:ll /bin/awk
基本的命令语法:awk option 'patten {action}' file
其中pattern表示AWK在数据中查找的内容,而action是在找到匹配内容时所执行的一系列命令。花括号用于根据特定的模式对一系列指令进行分组。
pattern参数可以是egrep正则表达式的任意一个,使用/re/再加上一些样式匹配,匹配方式与sed类似,也可以使用“,”分开两样式以选个某个范围。
awk处理的工作方式与数据库类似,支持对记录和字段处理,这也是grep和sed不能实现的。
在awk中,缺省的情况下将文本文件中一行视为一个记录,而将一行中的某一部分作为记录中的一个字段,用1,2,3....数字的方式顺序的表示行(记录)中的不同字段。用$后跟数字,引用对应的字段,以逗号分割,0表示整个行。
选项 | m描述 |
-f program-file | c从文件中读取awk程序源文件 |
-F fs | z指定fs为输入字段分隔符 |
-v var=value | b变量赋值 |
--posix | z只支持POSIX正则表达式 |
Pattern | Description |
BEGIN{ } | g给程序赋予初始状态,先执行的工作 |
END{ } | c程序结束之后执行的一些扫尾工作 |
/regular expression/ | w为每个输入记录匹配正则表达式 |
pattern && pattern | l逻辑and,满足两个模式 |
pattern || pattern |
逻辑或,满足其中一个模式 |
! pattern | l逻辑非,不满足模式 |
pattern1,pattern2 | f范围模式,匹配所以1的记录,直到匹配到模式2 |
[root@VM_0_7_centos tmp]# tail services nimgtw 48003/udp # Nimbus Gateway 3gpp-cbsp 48049/tcp # 3GPP Cell Broadcast Service Protocol isnetserv 48128/tcp # Image Systems Network Services isnetserv 48128/udp # Image Systems Network Services blp5 48129/tcp # Bloomberg locator blp5 48129/udp # Bloomberg locator com-bardac-dw 48556/tcp # com-bardac-dw com-bardac-dw 48556/udp # com-bardac-dw iqobject 48619/tcp # iqobject iqobject 48619/udp # iqobject [root@VM_0_7_centos tmp]# tail services | awk -F '/' '{print $1}' 指定/为分割符,打印出第一部分 nimgtw 48003 3gpp-cbsp 48049 isnetserv 48128 isnetserv 48128 blp5 48129 blp5 48129 com-bardac-dw 48556 com-bardac-dw 48556 iqobject 48619 iqobject 48619
[root@VM_0_7_centos ~]# awk -va="123" 'BEGIN{print a}' 123 [root@VM_0_7_centos ~]# a=456 [root@VM_0_7_centos ~]# awk -va=$a 'BEGIN{print a}' 456
[root@Gin scripts]# awk '{print $0}' /etc/passwd root:x:0:0:root:/root:/bin/bash bin:x:1:1:bin:/bin:/sbin/nologin ..................................................... [root@Gin scripts]# echo hhh|awk '{print "hello,world"}' hello,world [root@Gin scripts]# awk '{print "hiya"}' /etc/passwd hiya hiya hiya hiya ...............................................
调用awk时,指定/etc/passwd作为输入文件,执行awk时,它依次对/etc/passwd中的每一行执行print命令。
所有输出都发送到stdout,所得到的结果与执行cat /etc/passwd完全相同。
[root@VM_0_7_centos ~]# awk -F":" '{print $1}' /etc/passwd root bin daemon adm lp sync shutdown halt mail uucp operator games gopher ftp nobody vcsa abrt ntp saslauth postfix sshd dbus tcpdump cairui nginx mysql zabbix [root@VM_0_7_centos ~]# awk -F":" '{print $0}' /etc/passwd root:x:0:0:root:/root:/bin/bash bin:x:1:1:bin:/bin:/sbin/nologin daemon:x:2:2:daemon:/sbin:/sbin/nologin adm:x:3:4:adm:/var/adm:/sbin/nologin lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin sync:x:5:0:sync:/sbin:/bin/sync shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown halt:x:7:0:halt:/sbin:/sbin/halt mail:x:8:12:mail:/var/spool/mail:/sbin/nologin uucp:x:10:14:uucp:/var/spool/uucp:/sbin/nologin operator:x:11:0:operator:/root:/sbin/nologin games:x:12:100:games:/usr/games:/sbin/nologin gopher:x:13:30:gopher:/var/gopher:/sbin/nologin ftp:x:14:50:FTP User:/var/ftp:/sbin/nologin nobody:x:99:99:Nobody:/:/sbin/nologin vcsa:x:69:69:virtual console memory owner:/dev:/sbin/nologin abrt:x:173:173::/etc/abrt:/sbin/nologin ntp:x:38:38::/etc/ntp:/sbin/nologin saslauth:x:499:76:Saslauthd user:/var/empty/saslauth:/sbin/nologin postfix:x:89:89::/var/spool/postfix:/sbin/nologin sshd:x:74:74:Privilege-separated SSH:/var/empty/sshd:/sbin/nologin dbus:x:81:81:System message bus:/:/sbin/nologin tcpdump:x:72:72::/:/sbin/nologin cairui:x:500:500::/home/cairui:/bin/bash nginx:x:501:501::/home/nginx:/sbin/nologin mysql:x:502:502::/home/mysql:/sbin/nologin zabbix:x:503:503::/home/zabbix:/sbin/nologin
打印1个100行文本的第20到30行
[root@VM_0_7_centos ~]# seq 100 | awk '{if(NR>=20 && NR<=30) print $1}' 20 21 22 23 24 25 26 27 28 29 30
[root@VM_0_7_centos tmp]# awk -F"[ ,]+" '{print $3" "$6}' test cai is [root@VM_0_7_centos tmp]# cat test I am cai,my qq is
[root@VM_0_7_centos tmp]# awk '{count++;print $0;} END{print "user count is ",count}' /etc/passwd root:x:0:0:root:/root:/bin/bash bin:x:1:1:bin:/bin:/sbin/nologin daemon:x:2:2:daemon:/sbin:/sbin/nologin adm:x:3:4:adm:/var/adm:/sbin/nologin lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin sync:x:5:0:sync:/sbin:/bin/sync shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown halt:x:7:0:halt:/sbin:/sbin/halt mail:x:8:12:mail:/var/spool/mail:/sbin/nologin uucp:x:10:14:uucp:/var/spool/uucp:/sbin/nologin operator:x:11:0:operator:/root:/sbin/nologin games:x:12:100:games:/usr/games:/sbin/nologin gopher:x:13:30:gopher:/var/gopher:/sbin/nologin ftp:x:14:50:FTP User:/var/ftp:/sbin/nologin nobody:x:99:99:Nobody:/:/sbin/nologin vcsa:x:69:69:virtual console memory owner:/dev:/sbin/nologin abrt:x:173:173::/etc/abrt:/sbin/nologin ntp:x:38:38::/etc/ntp:/sbin/nologin saslauth:x:499:76:Saslauthd user:/var/empty/saslauth:/sbin/nologin postfix:x:89:89::/var/spool/postfix:/sbin/nologin sshd:x:74:74:Privilege-separated SSH:/var/empty/sshd:/sbin/nologin dbus:x:81:81:System message bus:/:/sbin/nologin tcpdump:x:72:72::/:/sbin/nologin cairui:x:500:500::/home/cairui:/bin/bash nginx:x:501:501::/home/nginx:/sbin/nologin mysql:x:502:502::/home/mysql:/sbin/nologin zabbix:x:503:503::/home/zabbix:/sbin/nologin user count is 27
[root@VM_0_7_centos tmp]# ll total 16 -rw-r--r-- 1 root root 12 Mar 21 11:08 1.txt -rw-r--r-- 1 root root 11 Mar 21 11:08 2.txt -rw-r--r-- 1 root root 616 Mar 21 13:44 services -rw-r--r-- 1 root root 18 Mar 22 14:30 test [root@VM_0_7_centos tmp]# ll |awk 'BEGIN{size=0;} {size=size+$5} END{print "[end] size is ",size}' 统计所占的字节数 [end] size is 657
运算符
[root@VM_0_7_centos tmp]# awk 'BEGIN{a=5;a+=5;print a}' 10
[root@VM_0_7_centos tmp]# awk 'BEGIN{a=1;b=2;print (a>2&&b>1,a=1||b>1)}' 0 1
[root@VM_0_7_centos tmp]# awk 'BEGIN{a=11;if(a>=9){print "ok"}}' ok
[root@VM_0_7_centos tmp]# awk -F ":" 'NF==8{print $0}' ll 打印字段数为8的行 zabbix:x:503:503::/home/zabbix:/sbin/nologin:ll [root@VM_0_7_centos tmp]# cat ll tcpdump:x:72:72::/:/sbin/nologin cairui:x:500:500::/home/cairui:/bin/bash nginx:x:501:501::/home/nginx:/sbin/nologin mysql:x:502:502::/home/mysql:/sbin/nologin zabbix:x:503:503::/home/zabbix:/sbin/nologin:ll
[root@VM_0_7_centos tmp]# ifconfig eth0 eth0 Link encap:Ethernet HWaddr 52:54:00:BE:3B:97 inet addr:172.17.0.7 Bcast:172.17.15.255 Mask:255.255.240.0 UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 RX packets:10059201 errors:0 dropped:0 overruns:0 frame:0 TX packets:9797173 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:1673765442 (1.5 GiB) TX bytes:2246121551 (2.0 GiB) [root@VM_0_7_centos tmp]# ifconfig eth0 | awk -F [" ":]+ 'NR==2{print $4}' NR==2是取第二行 172.17.0.7
正则
[root@VM_0_7_centos tmp]# awk '/root/{print $0}' /etc/passwd 打印所有root行 root:x:0:0:root:/root:/bin/bash operator:x:11:0:operator:/root:/sbin/nologin
awk中的if
{ if ($1=="foo"){ if($2=="foo"){ print "uno" }else{ print "one" } }elseif($1=="bar"){ print "two" }else{ print "three" } }
awk参考:https://www.cnblogs.com/ginvip/p/6352157.html
上一篇: 第一次WEB2.0创业日子回顾与总结
下一篇: 利用长尾词引爆淘宝免费流量的技巧