欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  数据库

贴一段 nginx日志对应放入mysql的python工具_MySQL

程序员文章站 2022-04-12 21:05:46
...
Nginxpython
#!/usr/bin/python# -*- coding: utf-8 -*-"""用于切分 nginx日志nginx日志 格式:log_format  access  '$remote_addr - $remote_user [$time_local] "$request" '              '$status $body_bytes_sent "$http_referer" '              '"$http_user_agent" $http_x_forwarded_for';INSERT INTO `nginxlog` (`status`, `remote_user`, `http_referer`, `remote_addr`, `http_x_forwarded_for`, `hostname`, `request`, `request_type`, `http_user_agent`, `time_local`) VALUES ('2', '2', '2', '2', '2', '2', '2', '2', '2', '2')table sql:CREATE TABLE `nginxlog` (  `status` int(4) DEFAULT NULL,  `remote_user` varchar(20) DEFAULT NULL,  `http_referer` text,  `remote_addr` varchar(20) DEFAULT NULL,  `http_x_forwarded_for` varchar(20) DEFAULT NULL,  `hostname` varchar(50) DEFAULT NULL,  `request` varchar(200) DEFAULT NULL,  `request_type` varchar(10) DEFAULT NULL,  `http_user_agent` varchar(200) DEFAULT NULL,  `time_local` int(10) unsigned DEFAULT NULL,  `server_id` int(5) unsigned DEFAULT NULL) ENGINE=MyISAM DEFAULT CHARSET=utf8"""#--------------------------config----------------------------------------------------------------------#configmysql = {}mysql['mhost'] = 'localhost'mysql['muser'] = 'root'mysql['mpwd'] = ''mysql['mport'] = '3306'mysql['mdbname'] = 'nginxlog' #获取当前执行目录  工作目录ROOT = '/home/nginxweblog/app/'#ftp 目录  用于存放生产 原始 日志Sourcedatadir = '/home/nginxweblog/sourcedata/'#log runLogfile = ROOT+'nginxlog.log'#--------------------------------import--------------------------------------------------------import re,time,osimport MySQLdb#--------------------------------Sublog--------------------------------------------------------#分析 每行的 nginxlog 数据class Sublog:    def __init__(self, logline=None):        if logline:            self.logline = logline.strip()        self.logdate = {}        #self.run()     def run(self):        if self.logline:            self.splitlog()            self.getlog()     def splitlog(self):        # 按照空格切分日志        self.splitlogd = re.split(' ', self.logline)        #print self.splitlogd        return self.splitlogd     def getlog(self):        if len(self.splitlogd) >= 20:            self.logdate['remote_addr'] = self.splitlogd[0]            self.logdate['remote_user'] = self.splitlogd[2]            self.logdate['time_local'] = time.mktime(time.strptime(self.splitlogd[3], '[%d/%b/%Y:%H:%M:%S'))            self.logdate['request_type'] = self.splitlogd[5].replace('/"', '')            self.logdate['request'] = self.splitlogd[6]            self.logdate['status'] = self.splitlogd[8]            self.logdate['http_referer'] = MySQLdb.escape_string(self.splitlogd[10].replace('/"', ''))            self.logdate['http_x_forwarded_for'] = self.splitlogd[len(self.splitlogd)-1]            self.get_http_user_agent()            self.get_hostname()     def get_http_user_agent(self):        agent = re.compile(r'/"(.*?)/"')        self.logdate['http_user_agent'] = MySQLdb.escape_string(agent.findall(self.logline)[2])     def get_hostname(self):        hostname_r = re.compile(r'http://(.*?)/')        ishostname = hostname_r.findall(self.splitlogd[10])        #print ishostname        if len(ishostname) >= 1:            self.logdate['hostname'] = ishostname[0]        else:            self.logdate['hostname'] = '-'         #--------------------------------insert_log---------------------------------------------#   生成 csv 数据  |||  分割 数据  , 主要用于 mysql快速导入class insert_log(Sublog):    def __init__(self, logpath, serverid):        Sublog.__init__(self)        self.nowtime = time.strftime("%Y_%m_%d",time.localtime(time.time()))        self.sunlogfile = "nginxlog_"+self.nowtime+'.csv'        self.logpath = logpath        self.serverid = serverid        self.numbres = 0         #self.Mysql_db_instance = Mysql_db()     def createsubfile(self):        self.opencsvfile()        self.subfile = open(ROOT+'CSV/'+self.sunlogfile, 'a')                     olog = open(self.logpath)        for i in olog:                 #self.logdate = {}            self.logline = i.strip()            self.run()            if self.logdate:                #print self.logdate                self.createsql()                #self.insertdb()                #time.sleep(3)                self.numbres = self.numbres + 1        self.subfile.close()        return [ROOT+'CSV/'+self.sunlogfile, self.sunlogfile, self.numbres]     def insertdb(self):        sql = "INSERT INTO `nginxlog` (`status`, `remote_user`, `http_referer`, `remote_addr`, `http_x_forwarded_for`, `hostname`, `request`, `request_type`, `http_user_agent`, `time_local`) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');" % (self.logdate['status'], self.logdate['remote_user'], self.logdate['http_referer'], self.logdate['remote_addr'], self.logdate['http_x_forwarded_for'], self.logdate['hostname'], self.logdate['request'], self.logdate['request_type'], self.logdate['http_user_agent'], self.logdate['time_local'])        #print sql        self.subfile.write(sql + '/n')        #self.Mysql_db_instance.insertsql(sql)        #self.Mysql_db_instance.commit()    def createsql(self):        c = "%s|||%s|||%s|||%s|||%s|||%s|||%s|||%s|||%s|||%s|||%s" % (self.logdate['status'], self.logdate['remote_user'], self.logdate['http_referer'], self.logdate['remote_addr'], self.logdate['http_x_forwarded_for'], self.logdate['hostname'], self.logdate['request'], self.logdate['request_type'], self.logdate['http_user_agent'], self.logdate['time_local'],self.serverid)        self.subfile.write(c + '/n')    def opencsvfile(self):        if os.path.isdir(ROOT+'CSV'):            pass        else:            os.mkdir(ROOT+'CSV')#--------------------------------function ---------------------------------------------def load_mysql(csvfile):    if mysql['mpwd']:        os.system("""mysql -h%s -u%s -p%s -e  "LOAD DATA INFILE '%s' INTO TABLE nginxlog.nginxlog FIELDS TERMINATED BY '|||';" && rm -rf %s """ % (mysql['mhost'], mysql['muser'], mysql['mpwd'], csvfile, csvfile))    else:        os.system("""mysql -h%s -u%s -e  "LOAD DATA INFILE '%s' INTO TABLE nginxlog.nginxlog FIELDS TERMINATED BY '|||';" && rm -rf %s """ % (mysql['mhost'], mysql['muser'], csvfile, csvfile)) #索引处理 type = 0  删除索引, 1创建索引def mysqlindex(type):    if mysql['mpwd']:        ism = "mysql -h%s -u%s -p%s -e " % (mysql['mhost'], mysql['muser'], mysql['mpwd'])    else:        ism = "mysql -h%s -u%s -e  " % (mysql['mhost'], mysql['muser'])    if type == 0:        os.system(" %s 'alter table nginxlog.nginxlog drop index time_local' " % ism)        os.system(" %s 'alter table nginxlog.nginxlog drop index hostname' " % ism)        os.system(" %s 'alter table nginxlog.nginxlog drop index remote_addr' " % ism)    elif type == 1:        os.system(" %s 'alter table nginxlog.nginxlog add index time_local(time_local)' " % ism)        os.system(" %s 'alter table nginxlog.nginxlog add index hostname(hostname)' " % ism)        os.system(" %s 'alter table nginxlog.nginxlog add index remote_addr(remote_addr)' " % ism)""" 日志记录函数 """def write_logs(logconten):    logfile_path = Logfile    if logfile_path and logconten:        log_write = open(logfile_path, 'a')        log_write.write(logconten+'/n')        log_write.close()##--------------------------------运行---------------------------------------------def run():    list = os.listdir(Sourcedatadir)    print 'start.......'    write_logs('-----------%s start.....----%s-----------------' % ( time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())), str(list)) )     for line in list:        if os.path.isfile(Sourcedatadir+line):                         s = re.split('_', line)            if re.match(r'^/d.*$', s[0]):                write_logs('time(%s) logfile(%s) start ....' % (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())), line))                insert_log_instance = insert_log(Sourcedatadir+line, s[0])                gfiledata = insert_log_instance.createsubfile()                load_mysql(gfiledata[0])                 os.remove(Sourcedatadir+line)                write_logs('time(%s) logfile(%s) log numbers(%s) stop ....' % (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())), line, str(gfiledata[2])) )                print Sourcedatadir+line+ ' OK ....... ' #--------------------------------  exece ---------------------------------------------if __name__ == "__main__":    #run_inotify(Sourcedatadir, run)    run()