nginx+flume+hdfs搭建实时日志收集系统
程序员文章站
2022-04-14 10:29:34
1、配置nginx.conf,添加以下配置
http {
#配置日志格式
log_format lf '$remote_addr^A$msec^A$http_...
1、配置nginx.conf,添加以下配置
http { #配置日志格式 log_format lf '$remote_addr^A$msec^A$http_host^A$request_uri'; server { listen 80; server_name localhost; location / { access_log /home/bxp/Documents/install/tengine-2.2.0/log/nginx/access.log lf; root html; } } }
2、重启nginx
systemctl restart nginx
3、创建flume agent文件,内容如下
#define agent agent.sources = r2 agent.channels = c2 agent.sinks = k2 # defined source agent.sources.r2.type = exec agent.sources.r2.command = tail -f /home/bxp/Documents/install/tengine-2.2.0/log/nginx/access.log agent.sources.r2.shell = /bin/bash -c # defined channel agent.channels.c2.type = memory #设置channel的容量 agent.channels.c2.capacity = 1000 #设置sink每次从channel中拉取的event的数量 agent.channels.c2.transactionCapacity = 100 # defined sinks agent.sinks.k2.type = hdfs agent.sinks.k2.hdfs.path = hdfs://hadoop-series.bxp.com:8020/user/bxp/flume/tracker/%Y/%m/%d agent.sinks.k2.hdfs.fileType = DataStream agent.sinks.k2.hdfs.writeFormat = Text agent.sinks.k2.hdfs.batchSize = 10 agent.sinks.k2.hdfs.useLocalTimeStamp = true #bind the sources and sink to the channel agent.sources.r2.channels = c2 agent.sinks.k2.channel = c2
4、将hdfs和flume集成的jar拷贝到flume的lib目录下,需要的jia如下
{HADOOP_HOME}/share/hadoop/common/lib/commons-configuration-1.6.jar {HADOOP_HOME}/share/hadoop/common/lib/hadoop-auth-2.6.0-cdh5.10.0.jar {HADOOP_HOME}/share/hadoop/common/hadoop-common-2.6.0-cdh5.10.0.jar {HADOOP_HOME}/share/hadoop/hdfs/hadoop-hdfs-2.6.0-cdh5.10.0.jar
5、启动HDFS
6、启动flume
bin/flume-ng agent --conf conf/ --name agent --conf-file conf/agent-commad.conf -Dflume.root.logger=DEBUG,console