awk的helloworld
程序员文章站
2022-07-12 15:18:39
...
两次过滤
awk -F '"' '{print $2}' icon.txt | awk -F ',' '{print $1}'>add.txt
# awk -f awktest.sh aaa bbb
ARGV[0]= awk
ARGV[1]= aaa
ARGV[2]= bbb
# ./awktest.sh haha haoning gege
ARGV[0]= awk
ARGV[1]= haha
ARGV[2]= haoning
ARGV[3]= gege\
解析个log
时隔3年多,又整了一下
[code="java"]
# cat my.sh
#!/bin/sh
if [ -z "$1" ];
then
echo "please input crawl file name";
exit;
fi
grep "BigPicImg" $1 >tmp_BigPicImg.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[2],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
print "mkdir "tmpname;
for(i=2;i<=slen;i++){
print "wget -O "tmpname"/"tmpname"_big_"i-1".jpg '\''"thisarray[i]"'\''";
}
}
}' tmp_BigPicImg.txt >exe_wget.sh
rm tmp_BigPicImg.txt
#----------------------
grep "SmallPicImg" $1 >tmp_SmallPicImg.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[2],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
print "mkdir "tmpname;
for(i=2;i<=slen;i++){
print "wget -O "tmpname"/"tmpname"_small_"i-1".jpg '\''"thisarray[i]"'\''";
}
}
}' tmp_SmallPicImg.txt >>exe_wget.sh
rm tmp_SmallPicImg.txt
#--------------
grep "Download" $1 >tmp_Download.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[2],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
print "mkdir "tmpname;
for(i=2;i<=slen;i++){
print "wget -O "tmpname"/"tmpname"_apk.apk '\''"thisarray[i]"'\''";
}
}
}' tmp_Download.txt >>exe_wget.sh
rm tmp_Download.txt
chmod 700 exe_wget.sh
./exe_wget.sh >a.log
#
[/code]
[code="java"]
# cat new.sh
#!/bin/sh
if [ -z "$1" ];
then
echo "please input crawl file name";
exit;
fi
grep "IconImg" $1 >tmp_IconImg.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[4],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
tmpvarsion=thisarray[2];
tmppath="/home/7po/"tmpname"/"tmpvarsion;
print "mkdir -p "tmppath;
for(i=3;i<=slen;i++){
print "wget -P "tmppath" '\''"thisarray[i]"'\''";
}
}
}' tmp_IconImg.txt >7po_wget.sh
rm tmp_IconImg.txt
#---------------------------------
grep "PicImg" $1 >tmp_PicImg.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[4],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
tmpvarsion=thisarray[2];
tmppath="/home/7po/"tmpname"/"tmpvarsion;
print "mkdir -p "tmppath;
for(i=3;i<=slen;i++){
split(thisarray[i],thisurl,"?");
print "wget -P "tmppath" '\''"thisurl[1]"'\''";
}
}
}' tmp_PicImg.txt >>7po_wget.sh
rm tmp_PicImg.txt
#--------------------
grep "Download" $1 >tmp_Download.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[4],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
tmpvarsion=thisarray[2];
tmppath="/home/7po/"tmpname"/"tmpvarsion;
print "mkdir -p "tmppath;
for(i=3;i<=slen;i++){
print "wget -O "tmppath"/"tmpname".apk '\''"thisarray[i]"'\''";
}
}
}' tmp_Download.txt >>7po_wget.sh
rm tmp_Download.txt
#
[/code]
awk -F '"' '{print $2}' icon.txt | awk -F ',' '{print $1}'>add.txt
#!/bin/awk -f
BEGIN {
for(i=0;i<ARGC;i++){
printf "ARGV[%d]= %s\n",i,ARGV[i];
}
exit;
}
# awk -f awktest.sh aaa bbb
ARGV[0]= awk
ARGV[1]= aaa
ARGV[2]= bbb
# ./awktest.sh haha haoning gege
ARGV[0]= awk
ARGV[1]= haha
ARGV[2]= haoning
ARGV[3]= gege\
解析个log
#!/bin/sh
echo "<?xml version=¥"1.0¥" encoding=¥"GBK¥"?>" > /opt/log/local3/pms.xml;
echo "<Root>" >> /opt/log/local3/pms.xml;
grep "pms_z" /opt/log/local3/local3.log |awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $(NF-1)" "$(NF);
}
END{
slen=asort(arr,tA);
tmpentry="";
if(slen>1){
for(i=2;i<=slen;i++){
split(tA[i],mon,"|");
split(mon[1],monn,":");
if(tmpentry==mon[1]){
print " <QuoteImageID>"mon[7]"</QuoteImageID>";
}else{
if(i>2){
print " </QuoteImages>";
print " </Images>";
print " </ImageInfo>";
}
print " <ImageInfo>¥n <Page>";
print " <PageID>"monn[2]"</PageID>";
print " <PageLink>"mon[2]"</PageLink>";
print " <Title>"mon[3]"</Title>";
print " <PublishTime>"mon[4]"</PublishTime>";
print " <QuoteUser>"mon[5]"</QuoteUser>";
print " <PageMatrix>"mon[6]"</PageMatrix>";
print " <PageKeywords>"mon[8]"</PageKeywords>";
print " <PageDescription>"mon[9]"</PageDescription>";
print " </Page>";
print " <Images>";
print " <QuoteImages>";
print " <QuoteImageID>"mon[7]"</QuoteImageID>";
};
tmpentry=mon[1];
}
print " </QuoteImages>";
print " </Images>";
print " </ImageInfo>";
}
}' >> /opt/log/local3/pms.xml;
grep "pms_b" /opt/log/local3/local3.log |awk '
BEGIN{
indx=0;arr[0]=0;
}{
indx++;
arr[indx]= $(NF-1)" "$(NF);
}END{
slen=asort(arr,tA);
tmpentry="";
for(i=2;i<=slen;i++){
split(tA[i],mon,"|");
split(mon[1],monn,":");
split(mon[7],monnn,"/");
split(mon[7],monnnn,".");
if(tmpentry==mon[1]){
print " <Image>";
print " <ImageName></ImageName>";
print " <ImageFormat>"monnn[3]"</ImageFormat>";
print " <ImagePath>"mon[7]"</ImagePath>";
print " <ImageSource></ImageSource>";
print " </Image>";
}else{
if(i>2){
print " </ReturnImages>";
print " </Images>";
print " </ImageInfo>";
}
print " <ImageInfo>¥n <Page>";
print " <PageID>"monn[2]"</PageID>";
print " <PageLink>"mon[2]"</PageLink>";
print " <Title>"mon[3]"</Title>";
print " <PublishTime>"mon[4]"</PublishTime>";
print " <QuoteUser>"mon[5]"</QuoteUser>";
print " <PageMatrix>"mon[6]"</PageMatrix>";
print " <PageKeywords>"mon[8]"</PageKeywords>";
print " <PageDescription>"mon[9]"</PageDescription>";
print " </Page>";
print " <Images>";
print " <ReturnImages>";
print " <Image>";
print " <ImageName>"monnn[3]"</ImageName>";
print " <ImageFormat>"monnnn[2]"</ImageFormat>";
print " <ImagePath>"mon[7]"</ImagePath>";
print " <ImageSource></ImageSource>";
print " </Image>";
};
tmpentry=mon[1];
}
print " </ReturnImages>";
print " </Images>";
print " </ImageInfo>";
}' >> /opt/log/local3/pms.xml;
echo "</Root>" >>/opt/log/local3/pms.xml;
#/usr/bin/rsync -av --password-file=/etc/rsyncd.secrets.nieting /opt/log/local3/pms.xml [email protected]::testpmsxml
时隔3年多,又整了一下
[code="java"]
# cat my.sh
#!/bin/sh
if [ -z "$1" ];
then
echo "please input crawl file name";
exit;
fi
grep "BigPicImg" $1 >tmp_BigPicImg.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[2],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
print "mkdir "tmpname;
for(i=2;i<=slen;i++){
print "wget -O "tmpname"/"tmpname"_big_"i-1".jpg '\''"thisarray[i]"'\''";
}
}
}' tmp_BigPicImg.txt >exe_wget.sh
rm tmp_BigPicImg.txt
#----------------------
grep "SmallPicImg" $1 >tmp_SmallPicImg.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[2],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
print "mkdir "tmpname;
for(i=2;i<=slen;i++){
print "wget -O "tmpname"/"tmpname"_small_"i-1".jpg '\''"thisarray[i]"'\''";
}
}
}' tmp_SmallPicImg.txt >>exe_wget.sh
rm tmp_SmallPicImg.txt
#--------------
grep "Download" $1 >tmp_Download.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[2],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
print "mkdir "tmpname;
for(i=2;i<=slen;i++){
print "wget -O "tmpname"/"tmpname"_apk.apk '\''"thisarray[i]"'\''";
}
}
}' tmp_Download.txt >>exe_wget.sh
rm tmp_Download.txt
chmod 700 exe_wget.sh
./exe_wget.sh >a.log
#
[/code]
[code="java"]
# cat new.sh
#!/bin/sh
if [ -z "$1" ];
then
echo "please input crawl file name";
exit;
fi
grep "IconImg" $1 >tmp_IconImg.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[4],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
tmpvarsion=thisarray[2];
tmppath="/home/7po/"tmpname"/"tmpvarsion;
print "mkdir -p "tmppath;
for(i=3;i<=slen;i++){
print "wget -P "tmppath" '\''"thisarray[i]"'\''";
}
}
}' tmp_IconImg.txt >7po_wget.sh
rm tmp_IconImg.txt
#---------------------------------
grep "PicImg" $1 >tmp_PicImg.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[4],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
tmpvarsion=thisarray[2];
tmppath="/home/7po/"tmpname"/"tmpvarsion;
print "mkdir -p "tmppath;
for(i=3;i<=slen;i++){
split(thisarray[i],thisurl,"?");
print "wget -P "tmppath" '\''"thisurl[1]"'\''";
}
}
}' tmp_PicImg.txt >>7po_wget.sh
rm tmp_PicImg.txt
#--------------------
grep "Download" $1 >tmp_Download.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[4],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
tmpvarsion=thisarray[2];
tmppath="/home/7po/"tmpname"/"tmpvarsion;
print "mkdir -p "tmppath;
for(i=3;i<=slen;i++){
print "wget -O "tmppath"/"tmpname".apk '\''"thisarray[i]"'\''";
}
}
}' tmp_Download.txt >>7po_wget.sh
rm tmp_Download.txt
#
[/code]
上一篇: bbd 数据标注信息使用 AWK 抽取
推荐阅读
-
creo4.0同一曲面怎么添加两种不同的颜色?
-
快播还能继续使用吗?快播点播技术关闭后的解决办法
-
AE怎么排列图层? ae由上往下排列的教程
-
暴风影音盒子打不开的解决方法
-
重庆分数线最低的本科大学名单汇总(2021年参考)
-
使用Spring Security控制会话的方法
-
Spring Boot报错:No session repository could be auto-configured, check your configuration的解决方法
-
2021年陕西450分理科能上什么大学?陕西450分的二本学校名单
-
2021年河北450分理科能上什么大学?附河北450分的公办二本名单
-
爱思助手工具箱连接不上的原因及解决方法