Thoughts
Able to get working Athena on our production logs.
No more ELK Stack, Hadoop , Map reduce, Big Query.
Create database apilogs2
CREATE EXTERNAL TABLE IF NOT EXISTS apilogs2.elb_logs_raw_native (
request_timestamp string,
elb_name string,
request_ip string,
request_port int,
backend_ip string,
backend_port int,
request_processing_time double,
backend_processing_time double,
client_response_time double,
elb_response_code string,
backend_response_code string,
received_bytes bigint,
sent_bytes bigint,
request_verb string,
url string,
protocol string,
user_agent string,
ssl_cipher string,
ssl_protocol string )
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1','input.regex' = '([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*):([0-9]*) ([.0-9]*) ([.0-9]*) ([.0-9]*) (-|[0-9]*) (-|[0-9]*) ([-0-9]*) ([-0-9]*) \\\"([^ ]*) ([^ ]*) (- |[^ ]*)\\\" (\"[^\"]*\") ([A-Z0-9-]+) ([A-Za-z0-9.-]*)$' )
LOCATION 's3://....';
SELECT * FROM apilogs2.elb_logs_raw_native WHERE elb_response_code = '200' LIMIT 100;
SELECT * FROM apilogs2.elb_logs_raw_native WHERE elb_response_code = '200' and url like '%serialNumber%' LIMIT 100;
SELECT * FROM apilogs2.elb_logs_raw_native WHERE elb_response_code = '200'
and url like '%serialNumber%'
and user_agent like '%ok%' LIMIT 100; --18