欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

awk的helloworld

程序员文章站 2022-07-12 15:18:39
...
两次过滤
awk -F '"' '{print $2}' icon.txt | awk -F ',' '{print $1}'>add.txt



#!/bin/awk -f
BEGIN {
for(i=0;i<ARGC;i++){
printf "ARGV[%d]= %s\n",i,ARGV[i];
}
exit;
}

# awk -f awktest.sh aaa bbb
ARGV[0]= awk
ARGV[1]= aaa
ARGV[2]= bbb

# ./awktest.sh haha haoning gege
ARGV[0]= awk
ARGV[1]= haha
ARGV[2]= haoning
ARGV[3]= gege\


解析个log

#!/bin/sh
echo "<?xml version=¥"1.0¥" encoding=¥"GBK¥"?>" > /opt/log/local3/pms.xml;
echo "<Root>" >> /opt/log/local3/pms.xml;

grep "pms_z" /opt/log/local3/local3.log |awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $(NF-1)" "$(NF);
}
END{
slen=asort(arr,tA);
tmpentry="";
if(slen>1){
for(i=2;i<=slen;i++){
split(tA[i],mon,"|");
split(mon[1],monn,":");
if(tmpentry==mon[1]){
print " <QuoteImageID>"mon[7]"</QuoteImageID>";
}else{
if(i>2){
print " </QuoteImages>";
print " </Images>";
print " </ImageInfo>";
}
print " <ImageInfo>¥n <Page>";
print " <PageID>"monn[2]"</PageID>";
print " <PageLink>"mon[2]"</PageLink>";
print " <Title>"mon[3]"</Title>";
print " <PublishTime>"mon[4]"</PublishTime>";
print " <QuoteUser>"mon[5]"</QuoteUser>";
print " <PageMatrix>"mon[6]"</PageMatrix>";
print " <PageKeywords>"mon[8]"</PageKeywords>";
print " <PageDescription>"mon[9]"</PageDescription>";
print " </Page>";
print " <Images>";
print " <QuoteImages>";
print " <QuoteImageID>"mon[7]"</QuoteImageID>";
};
tmpentry=mon[1];
}
print " </QuoteImages>";
print " </Images>";
print " </ImageInfo>";
}
}' >> /opt/log/local3/pms.xml;

grep "pms_b" /opt/log/local3/local3.log |awk '
BEGIN{
indx=0;arr[0]=0;
}{
indx++;
arr[indx]= $(NF-1)" "$(NF);
}END{
slen=asort(arr,tA);
tmpentry="";
for(i=2;i<=slen;i++){
split(tA[i],mon,"|");
split(mon[1],monn,":");
split(mon[7],monnn,"/");
split(mon[7],monnnn,".");
if(tmpentry==mon[1]){
print " <Image>";
print " <ImageName></ImageName>";
print " <ImageFormat>"monnn[3]"</ImageFormat>";
print " <ImagePath>"mon[7]"</ImagePath>";
print " <ImageSource></ImageSource>";
print " </Image>";
}else{
if(i>2){
print " </ReturnImages>";
print " </Images>";
print " </ImageInfo>";
}
print " <ImageInfo>¥n <Page>";
print " <PageID>"monn[2]"</PageID>";
print " <PageLink>"mon[2]"</PageLink>";
print " <Title>"mon[3]"</Title>";
print " <PublishTime>"mon[4]"</PublishTime>";
print " <QuoteUser>"mon[5]"</QuoteUser>";
print " <PageMatrix>"mon[6]"</PageMatrix>";
print " <PageKeywords>"mon[8]"</PageKeywords>";
print " <PageDescription>"mon[9]"</PageDescription>";
print " </Page>";
print " <Images>";
print " <ReturnImages>";
print " <Image>";
print " <ImageName>"monnn[3]"</ImageName>";
print " <ImageFormat>"monnnn[2]"</ImageFormat>";
print " <ImagePath>"mon[7]"</ImagePath>";
print " <ImageSource></ImageSource>";
print " </Image>";
};
tmpentry=mon[1];
}
print " </ReturnImages>";
print " </Images>";
print " </ImageInfo>";
}' >> /opt/log/local3/pms.xml;

echo "</Root>" >>/opt/log/local3/pms.xml;

#/usr/bin/rsync -av --password-file=/etc/rsyncd.secrets.nieting /opt/log/local3/pms.xml [email protected]::testpmsxml





时隔3年多,又整了一下
[code="java"]
# cat my.sh
#!/bin/sh
if [ -z "$1" ];
then
echo "please input crawl file name";
exit;
fi
grep "BigPicImg" $1 >tmp_BigPicImg.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[2],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
print "mkdir "tmpname;
for(i=2;i<=slen;i++){
print "wget -O "tmpname"/"tmpname"_big_"i-1".jpg '\''"thisarray[i]"'\''";
}
}
}' tmp_BigPicImg.txt >exe_wget.sh
rm tmp_BigPicImg.txt
#----------------------
grep "SmallPicImg" $1 >tmp_SmallPicImg.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[2],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
print "mkdir "tmpname;
for(i=2;i<=slen;i++){
print "wget -O "tmpname"/"tmpname"_small_"i-1".jpg '\''"thisarray[i]"'\''";
}
}
}' tmp_SmallPicImg.txt >>exe_wget.sh
rm tmp_SmallPicImg.txt
#--------------
grep "Download" $1 >tmp_Download.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[2],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
print "mkdir "tmpname;
for(i=2;i<=slen;i++){
print "wget -O "tmpname"/"tmpname"_apk.apk '\''"thisarray[i]"'\''";
}
}
}' tmp_Download.txt >>exe_wget.sh
rm tmp_Download.txt
chmod 700 exe_wget.sh
./exe_wget.sh >a.log
#
[/code]


[code="java"]
# cat new.sh
#!/bin/sh
if [ -z "$1" ];
then
echo "please input crawl file name";
exit;
fi
grep "IconImg" $1 >tmp_IconImg.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[4],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
tmpvarsion=thisarray[2];
tmppath="/home/7po/"tmpname"/"tmpvarsion;
print "mkdir -p "tmppath;
for(i=3;i<=slen;i++){
print "wget -P "tmppath" '\''"thisarray[i]"'\''";
}
}
}' tmp_IconImg.txt >7po_wget.sh
rm tmp_IconImg.txt
#---------------------------------
grep "PicImg" $1 >tmp_PicImg.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[4],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
tmpvarsion=thisarray[2];
tmppath="/home/7po/"tmpname"/"tmpvarsion;
print "mkdir -p "tmppath;
for(i=3;i<=slen;i++){
split(thisarray[i],thisurl,"?");
print "wget -P "tmppath" '\''"thisurl[1]"'\''";
}
}
}' tmp_PicImg.txt >>7po_wget.sh
rm tmp_PicImg.txt
#--------------------
grep "Download" $1 >tmp_Download.txt
awk '
BEGIN{indx=0;arr[0]=0;}
{
indx++;
arr[indx]= $0;
}
END{
for(j=1;j<length(arr);j++){
split(arr[j],monn,"\"");
split(monn[4],thisarray,",");
slen = length(thisarray);
tmpname=thisarray[1];
tmpvarsion=thisarray[2];
tmppath="/home/7po/"tmpname"/"tmpvarsion;
print "mkdir -p "tmppath;
for(i=3;i<=slen;i++){
print "wget -O "tmppath"/"tmpname".apk '\''"thisarray[i]"'\''";
}
}
}' tmp_Download.txt >>7po_wget.sh
rm tmp_Download.txt
#
[/code]
相关标签: awk