| Current File : /home/jvzmxxx/wiki1/extensions/EventLogging/server/bin/eventlogging-processor |
#!/usr/bin/env python -OO
# -*- coding: utf-8 -*-
"""
eventlogging-processor
-----------------------------
Transform raw log stream to JSON event stream
usage: eventlogging-processor [-h] [--sid SID] format \
input output [output ...]
positional arguments:
format Format string
input URI of raw input stream
output URIs of output streams
optional arguments:
-h, --help show this help message and exit
--sid SID set input socket identity
--output-invalid URI of stream which to send invalid events
--etcd-uri ETCD_URI
formatters:
%h Client IP
%j JSON object
%q Query-string-encoded JSON
%t Timestamp in NCSA format.
:copyright: (c) 2012 by Ori Livneh <ori@wikimedia.org>
:license: GNU General Public Licence 2.0 or later
"""
from __future__ import unicode_literals
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import argparse
import logging
from eventlogging import (capsule_uuid, create_event_error, LogParser,
get_reader, get_writer, validate, setup_logging,
uri_force_raw, uri_append_query_items, keyhasher,
SharedRotatingToken, parse_etcd_uri)
from jsonschema import ValidationError
setup_logging()
ap = argparse.ArgumentParser(description='Raw log -> JSON stream',
fromfile_prefix_chars='@')
ap.add_argument('format', help='Format string')
# Read in raw events. This keeps the reader
# attempting to parse the input as json.
ap.add_argument('input', help='URI of raw input stream', type=uri_force_raw)
ap.add_argument('output', nargs='+', help='URIs of output streams')
ap.add_argument(
'--sid',
help='Set ZeroMQ/Kafka identity. '
'Only use this if your input URI starts with tcp:// or kafka://'
)
ap.add_argument(
'--output-invalid',
const=True,
default=False,
nargs='?',
action='store',
help='URI of output stream for invalid events. '
'If this is given without a value, the first of the output URIs will be '
'used to write invalid events. Invalid events are written using the '
'EventError schema.'
)
ap.add_argument(
'--etcd-uri',
help='If set, etcd will be used to get a rotating shared hash token '
'to use for anonymizing IP addresses during processing of raw data. '
'This is useful if you are running multiple eventlogging-processors '
'and want them all to consistently hash client IPs. '
'ETCD_URI is of the form: http(s)://hostA:1234,hostB:2345?cert=/my/cert',
default=False,
action='store'
)
args = ap.parse_args()
# If --etcd-uri was given, then parse it and instantiate
# a SharedRotatingToken named 'ip_hash' to use with the
# keyhasher function. This function will be used by the
# LogParser to hash client IPs read in from each event.
parser_kwargs = {}
if args.etcd_uri:
etcd_kwargs = parse_etcd_uri(args.etcd_uri)
parser_kwargs['ip_hasher'] = keyhasher(
SharedRotatingToken('ip_hash', **etcd_kwargs)
)
# Instantiate the LogParser to use for this processor.
parser = LogParser(args.format, **parser_kwargs)
# Valid events will be sent to each of these writers
writers_list = []
for output_uri in args.output:
writers_list.append(get_writer(output_uri))
logging.info('Publishing valid JSON events to %s.', output_uri)
if args.output_invalid:
# If --output-invalid was supplied without a value,
# use the same writer for both invalid and valid events.
if args.output_invalid is True:
args.output_invalid = args.output[0]
writer_invalid = writers_list[0]
else:
writer_invalid = get_writer(args.output_invalid)
logging.info('Publishing invalid raw events to %s.', args.output_invalid)
else:
writer_invalid = None
if args.sid:
args.input = uri_append_query_items(args.input, {'identity': args.sid})
def write_event_error(
writer,
raw_event,
error_message,
error_code,
parsed_event=None
):
"""
Constructs an EventError object and sends it to writer.
"""
try:
writer.send(
create_event_error(
raw_event,
error_message,
error_code,
parsed_event
)
)
except Exception as e:
logging.error('Unable to create EventError object: %s' % e.message)
for raw_event in get_reader(args.input):
event = None
try:
event = parser.parse(raw_event)
event.pop('clientValidated', None)
event.pop('isTruncated', None)
validate(event)
event['uuid'] = capsule_uuid(event)
except ValidationError as e:
logging.error('Unable to validate: %s (%s)', raw_event, e.message)
if writer_invalid:
write_event_error(
writer_invalid, raw_event, e.message, 'validation', event
)
except Exception as e:
logging.error('Unable to process: %s (%s)', raw_event, e.message)
if writer_invalid:
write_event_error(
writer_invalid, raw_event, e.message, 'processor', event
)
else:
for w in writers_list:
w.send(event)