in athena_glue_service_logs/elb_classic.py [0:0]
def _build_storage_descriptor(self, partition_values=None):
if partition_values is None:
partition_values = []
return {
"Columns": [
{"Name": "time", "Type": "string"},
{"Name": "elb", "Type": "string"},
{"Name": "client_ip_port", "Type": "string"},
{"Name": "target_ip_port", "Type": "string"},
{"Name": "request_processing_time", "Type": "double"},
{"Name": "target_processing_time", "Type": "double"},
{"Name": "response_processing_time", "Type": "double"},
{"Name": "elb_status_code", "Type": "string"},
{"Name": "target_status_code", "Type": "string"},
{"Name": "received_bytes", "Type": "bigint"},
{"Name": "sent_bytes", "Type": "bigint"},
{"Name": "request_verb", "Type": "string"},
{"Name": "request_url", "Type": "string"},
{"Name": "request_proto", "Type": "string"},
{"Name": "user_agent", "Type": "string"},
{"Name": "ssl_cipher", "Type": "string"},
{"Name": "ssl_protocol", "Type": "string"}
],
"Location": self.partitioner.build_partitioned_path(partition_values),
"InputFormat": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"SerdeInfo": {
"SerializationLibrary": "com.amazonaws.glue.serde.GrokSerDe",
"Parameters": {
"input.format": "%{NOTSPACE:time} %{NOTSPACE:elb} %{NOTSPACE:client_ip_port} %{NOTSPACE:target_ip_port} %{BASE10NUM:request_processing_time:double} %{BASE10NUM:target_processing_time:double} %{BASE10NUM:response_processing_time:double} %{NOTSPACE:elb_status_code} %{NOTSPACE:target_status_code} %{NOTSPACE:received_bytes:int} %{NOTSPACE:sent_bytes:int} \"%{NOTSPACE:request_verb} %{NOTSPACE:request_url} %{INSIDE_QS:request_proto}\" %{QS:user_agent} %{NOTSPACE:ssl_cipher} %{NOTSPACE:ssl_protocol}", # noqa pylint: disable=C0301
"input.grokCustomPatterns": "INSIDE_QS ([^\\\"]*)"
}
},
"BucketColumns": [], # Required or SHOW CREATE TABLE fails
"Parameters": {} # Required or create_dynamic_frame.from_catalog fails for partitions
}