python 域名分析工具实现代码
程序员文章站
2022-05-25 17:39:51
代码如下: 复制代码 代码如下:import sys, urllib import datetime,time def getdate(): strday=datetime...
代码如下:
import sys, urllib
import datetime,time
def getdate():
strday=datetime.datetime.now().__str__()
strday=strday.split()[0]
return strday
#url = "http://www.kingnic.com/list/2009-06-16.txt"
def geturl(datestr=none):
baseurl ="http://www.kingnic.com/list/"
if datestr:
return baseurl+datestr+".txt"
thisdate = getdate();
if not thisdate:
print "error date!"
return none;
url = baseurl+thisdate+".txt"
return url
def getsource(url):
source = urllib.urlopen(url).read()
return source
def save(source,filename="domains.txt"):
fp = open(filename,"w")
fp.write(source)
fp.close()
return true;
def loadlist(filename="domains.txt"):
fp = open("domains.txt","r")
source = fp.readlines()
fp.close()
return source;
def getprefix(domain):
return domain.split('.')[0]
def getpostfix(domain):
return domain.split('.')[1]
def hasmidline(domain):
if '-' in domain:
return true
else:
return false
def parser(domains):
max =4
min =0
keyword =('sky','see','job')
result=[]
len_num =0;
mid_line_num =0;
for domain in domains:
prefix = getprefix(domain)
postfix = getpostfix(domain)
domainlen = len(prefix)
if (domainlen < min) or (domainlen > max):
len_num +=1
continue
if hasmidline(prefix):
mid_line_num +=1
continue
result.append(domain)
print " log : \n"
print "all: \t",len(domains)
print "len not in [%s,%s] \t: %s"%(max,min,len_num)
print "contain '-' :\t",mid_line_num
print "remain:\t",len(result)
return result;
if __name__ == "__main__":
url = geturl()
source = getsource(url)
save(source)
domains =loadlist()
result = parser(domains)
save("".join(result),"result.txt")
print("\n\n\nfinished!!")
输出文件:
domains.txt : kingnic.com 据当天释放的 域名;
result.txt : 符合过滤条件的域名;
log输出:
all: 55500
len not in [4,0] : 55019
contain '-' : 32
remain: 449
finished!!
对 后缀、长度和有无“-”过滤,过滤条件有点少,其它以后如有需要再加。
复制代码 代码如下:
import sys, urllib
import datetime,time
def getdate():
strday=datetime.datetime.now().__str__()
strday=strday.split()[0]
return strday
#url = "http://www.kingnic.com/list/2009-06-16.txt"
def geturl(datestr=none):
baseurl ="http://www.kingnic.com/list/"
if datestr:
return baseurl+datestr+".txt"
thisdate = getdate();
if not thisdate:
print "error date!"
return none;
url = baseurl+thisdate+".txt"
return url
def getsource(url):
source = urllib.urlopen(url).read()
return source
def save(source,filename="domains.txt"):
fp = open(filename,"w")
fp.write(source)
fp.close()
return true;
def loadlist(filename="domains.txt"):
fp = open("domains.txt","r")
source = fp.readlines()
fp.close()
return source;
def getprefix(domain):
return domain.split('.')[0]
def getpostfix(domain):
return domain.split('.')[1]
def hasmidline(domain):
if '-' in domain:
return true
else:
return false
def parser(domains):
max =4
min =0
keyword =('sky','see','job')
result=[]
len_num =0;
mid_line_num =0;
for domain in domains:
prefix = getprefix(domain)
postfix = getpostfix(domain)
domainlen = len(prefix)
if (domainlen < min) or (domainlen > max):
len_num +=1
continue
if hasmidline(prefix):
mid_line_num +=1
continue
result.append(domain)
print " log : \n"
print "all: \t",len(domains)
print "len not in [%s,%s] \t: %s"%(max,min,len_num)
print "contain '-' :\t",mid_line_num
print "remain:\t",len(result)
return result;
if __name__ == "__main__":
url = geturl()
source = getsource(url)
save(source)
domains =loadlist()
result = parser(domains)
save("".join(result),"result.txt")
print("\n\n\nfinished!!")
输出文件:
domains.txt : kingnic.com 据当天释放的 域名;
result.txt : 符合过滤条件的域名;
log输出:
复制代码 代码如下:
all: 55500
len not in [4,0] : 55019
contain '-' : 32
remain: 449
finished!!
对 后缀、长度和有无“-”过滤,过滤条件有点少,其它以后如有需要再加。
上一篇: 设计模式之观察者模式(三)
下一篇: JavaScript基础部分经典案例