[root@ ~]# tar xf jdk-8u71-linux-x64.tar.gz [root@ ~]# mv jdk1.8.0_71 /usr/local/java_1.8.0 [root@ ~]# ln -s /usr/local/java_1.8.0/bin/java /usr/bin/java [root@ ~]# vim /etc/profile # 添加: export JAVA_HOME=/usr/local/java_1.8.0 export CLASSPATH=$CLASSPATH:$JAVA_HOME/lib:$JAVA_HOME/jre/lib export PATH=$PATH:$JAVA_HOME/bin [root@ ~]# source /etc/profile
直接使用rpm包安装:logstash-5.1.1.rpmhtml
[root@ ~]# rpm -ivh logstash-5.1.1.rpm
logstash安装后的所在目录为:/usr/share/logstash/,这里为了方便统一的管理,作规范的配置java
# 统一配置目录,/etc/logstash在安装logstash后就存在了配置文件,启动后又去/usr/share/logstash/config找,why? [root@ ~]# ln -s /etc/logstash /usr/share/logstash/config [root@ ~]# ln -s /usr/share/logstash/bin/* /usr/local/bin/ # 调整jvm使用内存 [root@ ~]# vim /etc/logstash/jvm.options -Xms128m -Xmx256m # 修改logstash基本配置 [root@ ~]# vim /etc/logstash/logstash.yml pipeline: workers: 4 batch: size: 125 delay: 5 path.config: /etc/logstash/conf.d path.logs: /data/logs/logstash http.port: 9600 http.host: "192.168.31.140"
作个简单的测试node
[root@ ~]# logstash -e 'input{stdin{}}output{stdout{codec=>rubydebug}}' Sending Logstash's logs to /data/logs/logstash which is now configured via log4j2.properties The stdin plugin is now waiting for input: hello world { "@timestamp" => 2017-06-21T06:55:00.471Z, "@version" => "1", "host" => "baseos-1", "message" => "hello world", "tags" => [] }
以上的例子从标准输入中获取数据,再从标准输出中输出。mysql
在输出的结果中,"@timestamp"标记事件发生的时间点,"host"标记事件发生的主机,"tags"标记事件的某个方面的属性(这是一个数组,能够有多个属性),"type"标记事件的惟一类型。linux
Es 统一采用UTC时间存成长整型数据,会比北京时间晚八个小时。nginx
数组能够是单个或者多个字符串值。git
path => [ "/var/log/messages", "/var/log/*.log" ] path => "/data/mysql/mysql.log"
若是指定了屡次,追加数组。此实例path数组包含三个字符串元素。github
布尔值必须是TRUE或者false。true和false不能有引号。redis
ssl_enable => true
指定字节单位。支持的单位有SI (k M G T P E Z Y) 和 Binary (Ki Mi Gi Ti Pi Ei Zi Yi)。Binary单位基于1024,SI单位基于1000。不区分大小写和忽略值与单位之间的空格。若是没有指定单位,默认是byte。sql
my_bytes => "1113" # 1113 bytes my_bytes => "10MiB" # 10485760 bytes my_bytes => "100kib" # 102400 bytes my_bytes => "180 mb" # 180000000 bytes
logstash编码名称用来表示数据编码。用于input和output段。便于数据的处理。若是input和output使用合适的编码,就无需单独的filter对数据进行处理。
codec => "json"
键值对,注意多个键值对用空格分隔,而不是逗号。
match => { "field1" => "value1" "field2" => "value2" ... }
必须是有效的数值,浮点数或者整数。
port => 33
一个单独的字符串。
my_password => "password"
一个表明有效的操做系统路径。
my_path => "/tmp/logstash"
name => "Hello world" name => 'It\'s a beautiful day'
Logstash必需要有input和output。
这里只介绍 stdin、file 和beats 插件
一个标准输入配置实例:
input { stdin { add_field => {"key" => "value"} codec => "plain" tags => ["add"] type => "stdin" } }
输出结果:
The stdin plugin is now waiting for input: hello { "@timestamp" => 2017-06-21T07:34:57.899Z, "@version" => "1", "host" => "baseos-1", "message" => "hello", "type" => "stdin", "key" => "value", "tags" => [ [0] "add" ] }
参数:
文件读取插件主要用来抓取文件的变化信息,将变化信息封装成Event进程处理或者传递。
Logstash使用一个叫FileWatch的 Ruby Gem 库来监听文件变化。这个库支持glob展开文件路劲(只支持绝对路径,不会自动递归目录),并且会记录一个隐藏的数据文件来跟踪被监听的日志文件的当前读取位置(默认这个文件叫.sincedb)。该数据库文件记录了每一个被监听文件的inode、major number、minor number 和 pos。
一个读取文件配置实例:
input { file { path => ["/data/logs/nginx/*.log"] exclude => ["/data/logs/nginx/nginx.pid","/data/logs/nginx/error.log"] discover_interval => 5 sincedb_path => "/data/database/logstash/.sincedb_nginx_log" sincedb_write_interval => 10 type => "nginx_log" start_position => "beginning" } }
参数:
详细参考:https://www.elastic.co/guide/en/logstash/5.0/plugins-inputs-file.html
[root@ ~]# vim /etc/logstash/conf.d/nginx_access.conf input { file { path => ["/data/logs/nginx/*.log"] exclude => ["/data/logs/nginx/nginx.pid","/data/logs/nginx/error.log"] discover_interval => 5 sincedb_path => "/data/database/logstash/.sincedb_nginx_log" sincedb_write_interval => 10 type => "nginx_log" start_position => "beginning" } } output { stdout { codec => rubydebug } }
测试效果:
[root@baseos-1_192.168.31.140 ~]# logstash -f /etc/logstash/conf.d/nginx_access.conf Sending Logstash's logs to /data/logs/logstash which is now configured via log4j2.properties { "path" => "/data/logs/nginx/logstash.wangshenjin.com_access.log", "@timestamp" => 2017-06-21T07:10:36.270Z, "@version" => "1", "host" => "baseos-1", "message" => "192.168.31.140 - - [21/Jun/2017:15:10:36 +0800] \"GET / HTTP/1.1\" 200 33 \"-\" \"curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7 NSS/3.27.1 zlib/1.2.3 libidn/1.18 libssh2/1.4.2\" \"-\"", "type" => "nginx_log", "tags" => [] }
Beats插件用于创建监听服务,接收Filebeat或者其余beat发送的Events
filebeat 是基于原先 logstash-forwarder 的源码改造出来的。换句话说:filebeat 就是新版的 logstash-forwarder,也会是 Elastic Stack 在 shipper 端的第一选择。
filebeat 配置:
filebeat: prospectors: - paths: - /usr/local/nginx/logs/*.com.log input_type: log document_type: nginx-access tail_files: true output: logstash: hosts: ["192.168.31.140:5044"] shipper: tags: ["nginx_log"]
logstash 配置:
input { beats { port => 5044 } } output { stdout { codec => rubydebug } }
效果:
[root@ conf.d]# logstash -f filebeat.conf Sending Logstash's logs to /data/logs/logstash which is now configured via log4j2.properties { "@timestamp" => 2017-06-21T10:49:12.596Z, "offset" => 605, "@version" => "1", "input_type" => "log", "beat" => { "hostname" => "salt-master", "name" => "salt-master", "version" => "5.1.1" }, "host" => "salt-master", "source" => "/usr/local/nginx/logs/access.log", "message" => "192.168.31.1 - - [24/Feb/2017:17:00:59 +0800] \"GET / HTTP/1.1\" 301 184 \"-\" \"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36\"", "type" => "nginx-access", "tags" => [ [0] "beats_input_codec_plain_applied" ] }
参数:
详细参考:https://www.elastic.co/guide/en/logstash/5.0/plugins-inputs-beats.html
这里采用 easyrsa 生成logstash证书。
若是logstash使用IP访问,证书里的subjectAltName字段须要包含logstash的IP。easyrsa签署证书时,能够采用--subject-alt-name来添加:
./easyrsa gen-req logstash_server nopass # 生成证书签署文件 ./easyrsa --subject-alt-name="IP:192.168.31.140" sign server logstash_server # 签署证书
Logstash使用grok模块对任意文本解析并结构化输出,Logstash默认带有120中匹配模式。
grok的语法格式为 %{SYNTAX:SEMANTIC},前面是grok-pattrens中定义的变量,后面能够自定义变量的名称, 若是有双引号""或者中括号[],须要加 进行转义。
SYNTAX 是要匹配的模式,例如3.14匹配 NUMBER 模式,127.0.0.1 匹配 IP 模式。
SEMANTIC 是匹配到的文本片断的标识,例如 “3.14” 能够是一个时间的持续时间,因此能够简单地叫作"duration" ,字符串"55.3.244.1"能够被标识为“client”。
因此,grok过滤器表达式能够写成: %{NUMBER:duration} %{IP:client}
默认状况下,全部的SEMANTIC是以字符串的方式保存,若是想要转换一个SEMANTIC的数据类型,例如转换一个字符串为整形,能够写成以下的方式: %{NUMBER:num:int}
一个例子,示例日志以下所示:
55.3.244.1 GET /index.html 15824 0.043
filter 配置以下所示:
filter { grok { match => { "message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}" } } }
效果以下所示:
55.3.244.1 GET /index.html 15824 0.043 { "duration" => "0.043", "request" => "/index.html", "@timestamp" => 2017-06-22T01:49:27.773Z, "method" => "GET", "bytes" => "15824", "@version" => "1", "host" => "baseos-1", "client" => "55.3.244.1", "message" => "55.3.244.1 GET /index.html 15824 0.043", "tags" => [] }
nginx日志格式:
'$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for"';
filter 配置以下所示:
filter { if [type] == "nginx-access-log" { grok { match => { "message" => "%{IPORHOST:remote_addr} - %{USERNAME:remote_user} \[%{HTTPDATE:[@metadata][timestamp]}\] \"%{DATA:request}\" %{INT:status} %{INT :body_bytes_sent} \"%{DATA:http_referer}\" \"%{DATA:http_user_agent}\" \"%{USERNAME:http_x_forwarded_for}\"" } } } }
效果以下所示:
Sending Logstash's logs to /data/logs/logstash which is now configured via log4j2.properties { "remote_addr" => "192.168.31.130", "request" => "GET / HTTP/1.1", "body_bytes_sent" => "33", "message" => "192.168.31.130 - - [22/Jun/2017:13:50:33 +0800] \"GET / HTTP/1.1\" 200 33 \"-\" \"curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7 NSS/3.14.0.0 zlib/1.2.3 libidn/1.18 libssh2/1.4.2\" \"-\"", "type" => "nginx-access-log", "tags" => [], "http_user_agent" => "curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7 NSS/3.14.0.0 zlib/1.2.3 libidn/1.18 libssh2/1.4.2", "remote_user" => "-", "path" => "/data/logs/nginx/logstash.wangshenjin.com_access.log", "@timestamp" => 2017-06-22T05:50:34.860Z, "http_referer" => "-", "@version" => "1", "host" => "baseos-1", "http_x_forwarded_for" => "-", "status" => "200" }
message是每段读进来的日志,IPORHOST、USERNAME、HTTPDATE等都是patterns/grok-patterns中定义好的正则格式名称,对照日志进行编写。
logstash 默认自带120种正则格式,参考:https://github.com/logstash-plugins/logstash-patterns-core/tree/master/patterns
不少时候,日志格式都是自定义的,这时候咱们须要根据实际状况自定义正则。
这里以 remote_addr、request为例子,定义三个正则:
IPADDR [0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3} REQUESTPATH (?:/[\\A-Za-z0-9$.+!*'(){},~:;=@#% \[\]_<>^\-&?]*)+ REQUESTPRO ([^"]*)
使用上面的自定义正则:
IPADDR [0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3} REQUESTPRO ([^"]*) REQUESTPATH (?:/[\\A-Za-z0-9$.+!*'(){},~:;=@#% \[\]_<>^\-&?]*)+ NGXACCESSLOG %{IPADDR:client_ip} - (%{USERNAME:user}|-) \[%{HTTPDATE:log_timestamp}\] \"%{WORD:request_mathod} %{REQUESTPATH:request_path} %{REQUESTPRO:request_protocol}\" %{NUMBER:http_status} %{NUMBER:body_bytes_sent} (%{GREEDYDATA:http_referer}|-) \"%{DATA:http_user_agent}\" \"%{USERNAME:http_x_forwarded_for}\"
logstash 的配置:
filter { grok { patterns_dir => "/etc/logstash/conf.d/patterns/mypattern" match => { "message" => "%{NGXACCESSLOG}" } } } output { stdout { codec => rubydebug } }
效果以下所示:
{ "log_timestamp" => "11/Oct/2017:19:32:22 +0800", "body_bytes_sent" => "13", "message" => "192.168.31.1 - - [11/Oct/2017:19:32:22 +0800] \"GET /test/t/ HTTP/1.1\" 200 13 \"-\" \"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36\" \"-\"", "type" => "logstash", "request_mathod" => "GET", "http_user_agent" => "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36", "path" => "/data/logs/nginx/logstash.wangshenjin.com_access.log", "@timestamp" => 2017-10-30T04:08:08.412Z, "http_referer" => "\"-\"", "@version" => "1", "host" => "baseos-1", "http_x_forwarded_for" => "-", "request_path" => "/test/t/", "client_ip" => "192.168.31.1", "http_status" => "200", "user" => "-", "request_protocol" => "HTTP/1.1" }
正则调试:https://grokdebug.herokuapp.com
# 新建一个目录,统一存放自定义grok正则 [root@ conf.d]# mkdir patterns [root@ conf.d]# vim patterns/nginx_access NGINXACCESS %{IPORHOST:remote_addr} - %{USERNAME:remote_user} \[%{HTTPDATE:log_timestamp}\] \"%{DATA:request}\" %{INT:status} %{INT:body_bytes_sent} \"%{DATA:http_referer}\" \"%{DATA:http_user_agent}\" \"%{USERNAME:http_x_forwarded_for}\" [root@ conf.d]# vim nginx_access.conf **** filter { if [type] == "nginx-access-log" { grok { patterns_dir => "/etc/logstash/conf.d/patterns" //设置自定义正则路径 match => { "message" => "%{NGINXACCESS}" } } } } ****
格式化日期
date { match => [ "log_timestamp", "YYYY-MM-dd HH:mm:ss"] }
借助GeoIP数据库来实现显示请求来源的地理位置,GeoIP 库能够根据 IP 地址提供对应的地域信息,包括国别,省市,经纬度等。
获取GeoIP数据库
wget http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz gzip -d GeoLite2-City.mmdb.gz
配置:
filter { *** geoip { source => "client_ip" fields => ["city_name" , "country_name" ,"continent_code" , "continent_name" ] database => "/etc/logstash/GeoLite2-City.mmdb" } }
output配置以下:
output { redis { host => "127.0.0.1" port => 6000 password => "8a6715" data_type => "channel" key => "logstash-%{+yyyy.MM.dd}" } }
redis效果:
127.0.0.1:6000> subscribe logstash-2017.06.22 Reading messages... (press Ctrl-C to quit) 1) "subscribe" 2) "logstash-2017.06.22" 3) (integer) 1 1) "message" 2) "logstash-2017.06.22" 3) "{\"remote_addr\":\"192.168.31.130\",\"request\":\"GET / HTTP/1.1\",\"body_bytes_sent\":\"33\",\"message\":\"192.168.31.130 - - [22/Jun/2017:15:51:22 +0800] \\\"GET / HTTP/1.1\\\" 200 33 \\\"-\\\" \\\"curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7 NSS/3.14.0.0 zlib/1.2.3 libidn/1.18 libssh2/1.4.2\\\" \\\"-\\\"\",\"type\":\"nginx-access-log\",\"tags\":[],\"http_user_agent\":\"curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7 NSS/3.14.0.0 zlib/1.2.3 libidn/1.18 libssh2/1.4.2\",\"remote_user\":\"-\",\"path\":\"/data/logs/nginx/logstash.wangshenjin.com_access.log\",\"@timestamp\":\"2017-06-22T07:51:23.484Z\",\"http_referer\":\"-\",\"@version\":\"1\",\"host\":\"baseos-1\",\"http_x_forwarded_for\":\"-\",\"status\":\"200\"}"
参数:
output配置以下:
output { elasticsearch { hosts => ["192.168.1.147:9200","192.168.1.151:9200"] index => "nginx-access-log-%{+YYYY.MM.dd}" #定义index pattern } }
参数:
timeout:Set the timeout for network operations and requests sent Elasticsearch. If a timeout occurs,the request will be retried.
详细参考:https://www.elastic.co/guide/en/logstash/5.0/plugins-outputs-elasticsearch.html