nginx 日志json格式化 filebeat采集nginx日志转发kafka
nginx 格式化日志 主要参数是http里的log_format
举例
log_format json_log '{"a":"b","c":"d"}';
access_log logs/access.log json_log;
nginx 配置:
http { log_format json_log '{"commonAnnotations": {"log_province": "8310000","logTime": "$datetime","logID": "zqhw-ue-$time_str","Log_type": "008","dev_type": "Linux 2.6.32-358.el6.x86_64","equ_manuf": "042","dev_name": "yhtyapp051151","dev_mod": "Red Hat 4.4.7-3","devIPv4": "$server_addr","devIPv6": "fe80::250:56ff:febc:6d94/64","devMAC": "00:50:56:BC:6D:94","device_id": "SHWEBZQHWUE"},"busiAnnotations": {"collect_ipv4": "$server_addr","collect_ipv6": "fe80::250:56ff:febc:6d94/64","create_time": "$datetime","priority": "6","severity": "4","src_ipv4": "$x_forwarded_ip","src_ipv6": "","src_port": "$remote_port","protocol": "$server_protocol","dst_ipv4": "$server_addr","dst_ipv6": "","dst_port": "$server_port","http_method": "$request_method","user_name": "hgx","url": "$request_uri","response_code": "$status","bytes": "$body_bytes_sent"}}'; }
server { listen 80 ; server_name kt.jiafeimao.com; root /app/htdocs/jiafeimao; add_header X-Frame-Options SAMEORIGIN; add_header Strict-Transport-Security "max-age=63072000; includeSubdomains; preload"; add_header Content-Security-Policy none; add_header X-Content-Type-Options nosniff; add_header X-XSS-Protection 1; error_page 502 503 504 /404.html; default_type 'text/html'; charset utf-8; location ~ .*\.(gif|jpg|png|jpeg|css|js|flv|ico|swf)(.*) { expires 1d; }
#获取当前时间,并设定变量$date日期 $datetime日期时间 $time_str当前日期时间字符串 if ($time_iso8601 ~ "^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})") { set $date "$1-$2-$3"; set $datetime "$1-$2-$3 $4:$5:$6"; set $time_str "$1$2$3$4$5$6"; } #如果开启了http_x_forwarded 获取真实客户访问ip地址,如果用了反向代理,第一个ip为真实ip 后面一次是反向代理转发经过的代理服务器ip if ($http_x_forwarded_for ~ "^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})") { set $x_forwarded_ip "$1"; } #指定日志生成格式按年与日生成,及日志生成格式为json access_log logs/access-$date.log json_log; #禁止访问目录和文件 location ~* ^/Uploads/.*.(php|php5)$ {deny all;} location ~ /.*\.php/ { rewrite ^(.*?/?)(.*\.php)(.*)$ /$2?s=$3 last; break; } }
filebeat配置:
###################### Filebeat Configuration Example ######################### # This file is an example configuration file highlighting only the most common # options. The filebeat.full.yml file from the same directory contains all the # supported options with more comments. You can use it as a reference. # # You can find the full configuration reference here: # https://www.elastic.co/guide/en/beats/filebeat/index.html #=========================== Filebeat prospectors ============================= filebeat.prospectors: # Each - is a prospector. Most options can be set at the prospector level, so # you can use different prospectors for various configurations. # Below are the prospector specific configurations. - input_type: log # Paths that should be crawled and fetched. Glob based paths. paths: - /app/nginx/logs/access-*.log #启用json格式日志获取 json.keys_under_root: true #覆盖已有的日志字段 json.overwrite_keys: true # 指定json 关键建作为过滤和多行设置,与之关联的值必须是string #- c:\programdata\elasticsearch\logs\* # Exclude lines. A list of regular expressions to match. It drops the lines that are # matching any regular expression from the list. # exclude_lines: ["^DBG"] # Include lines. A list of regular expressions to match. It exports the lines that are # matching any regular expression from the list. #include_lines: ["^ERR", "^WARN"] # Exclude files. A list of regular expressions to match. Filebeat drops the files that # are matching any regular expression from the list. By default, no files are dropped. #exclude_files: [".gz$"] # Optional additional fields. These field can be freely picked # to add additional information to the crawled log files for filtering #fields: # level: debug # review: 1 ### Multiline options # Mutiline can be used for log messages spanning multiple lines. This is common # for Java Stack Traces or C-Line Continuation # The regexp Pattern that has to be matched. The example pattern matches all lines starting with [ #multiline.pattern: ^\[ # Defines if the pattern set under pattern should be negated or not. Default is false. #multiline.negate: false # Match can be set to "after" or "before". It is used to define if lines should be append to a pattern # that was (not) matched before or after or as long as a pattern is not matched based on negate. # Note: After is the equivalent to previous and before is the equivalent to to next in Logstash #multiline.match: after #================================ General ===================================== # The name of the shipper that publishes the network data. It can be used to group # all the transactions sent by a single shipper in the web interface. #name: # The tags of the shipper are included in their own field with each # transaction published. #tags: ["service-X", "web-tier"] # Optional fields that you can specify to add additional information to the # output. #fields: # env: staging #================================ Outputs ===================================== # Configure what outputs to use when sending the data collected by the beat. # Multiple outputs may be used. #-------------------------- Elasticsearch output ------------------------------ # output.elasticsearch: # Array of hosts to connect to. # hosts: ["localhost:9200"] # Optional protocol and basic auth credentials. #protocol: "https" #username: "elastic" #password: "changeme" #----------------------------- Logstash output -------------------------------- #output.logstash: # The Logstash hosts #hosts: ["localhost:5044"] # Optional SSL. By default is off. # List of root certificates for HTTPS server verifications #ssl.certificate_authorities: ["/etc/pki/root/ca.pem"] # Certificate for SSL client authentication #ssl.certificate: "/etc/pki/client/cert.pem" # Client Certificate Key #ssl.key: "/etc/pki/client/cert.key" #----------------------------- Kafka output ------------------------------------ #kafka输出地址 kafka的监听topic 要和此处topic相同 output.kafka: enable: true hosts: ["localhost:8184","10.128.54.63:9092"] topic: "test" #================================ Logging ===================================== # Sets log level. The default log level is info. # Available log levels are: critical, error, warning, info, debug #logging.level: debug # At debug level, you can selectively enable logging only for some components. # To enable all selectors use ["*"]. Examples of other selectors are "beat", # "publish", "service". #logging.selectors: ["*"] #过滤指定变量日志不显示,其中@timestamp和type 这两个在低版本无法去除,高版本网上或官网搜一下教材 processors: - drop_fields: fields: ["@timestamp","sort","beat","input_type","offset","source","type"]
备注:
filebeat 启动命令 :
cd进入filebeat根目录
启动并将日志输出到控制台,用于调试
./filebeat -e -c ./filebeat.yml -d "publish"
启动并将日志输出到缓存
nohup ./filebeat -e -c filebeat.yml >/dev/null 2>&1 &
kafka启动命令
启动zookeeper
./bin/zookeeper-server-start.sh ./config/zookeeper.properties &
启动kafka
./bin/kafka-server-start.sh ./config/server.properties &
创建topic
./bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test
列出topic
./bin/kafka-topics.sh --list --zookeeper localhost:2181
启动生产者并发送消息
./bin/kafka-console-producer.sh --broker-list localhost:9092 --topic test
开启另外一个终端,启动消费者接受消息
./bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic test --from-beginning