Monday, December 26, 2016

AWS Athena - ELB Log Analaysis



Thoughts


Able to get working Athena on our production logs.

No more ELK Stack, Hadoop , Map reduce, Big Query.


Create database apilogs2

CREATE EXTERNAL TABLE IF NOT EXISTS apilogs2.elb_logs_raw_native (
  request_timestamp string,
  elb_name string,
  request_ip string,
  request_port int,
  backend_ip string,
  backend_port int,
  request_processing_time double,
  backend_processing_time double,
  client_response_time double,
  elb_response_code string,
  backend_response_code string,
  received_bytes bigint,
  sent_bytes bigint,
  request_verb string,
  url string,
  protocol string,
  user_agent string,
  ssl_cipher string,
  ssl_protocol string )
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'

WITH SERDEPROPERTIES (
         'serialization.format' = '1','input.regex' = '([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*):([0-9]*) ([.0-9]*) ([.0-9]*) ([.0-9]*) (-|[0-9]*) (-|[0-9]*) ([-0-9]*) ([-0-9]*) \\\"([^ ]*) ([^ ]*) (- |[^ ]*)\\\" (\"[^\"]*\") ([A-Z0-9-]+) ([A-Za-z0-9.-]*)$' )

LOCATION 's3://....';


SELECT * FROM apilogs2.elb_logs_raw_native WHERE elb_response_code = '200' LIMIT 100;


SELECT * FROM apilogs2.elb_logs_raw_native WHERE elb_response_code = '200' and url like '%serialNumber%' LIMIT 100;


SELECT * FROM apilogs2.elb_logs_raw_native WHERE elb_response_code = '200'
and url like '%serialNumber%'
and user_agent like '%ok%' LIMIT 100; --18