in tools/scraper/parser.py [0:0]
def __init__(self, _log_index, _instance, _lineno, _line, _comn, _router, opaque=None):
"""
Process a naked qpid-dispatch log line
A log line looks like this:
2018-07-20 10:58:40.179187 -0400 SERVER (trace) [2]:0 -> @begin(17) [next-outgoing-id=0, incoming-window=2147483647, outgoing-window=2147483647] (/home/chug/git/qpid-dispatch/src/server.c:106)
The process is:
1. If the line ends with a filename:fileline then strip that away
2. Peel off the leading time of day and put that into data.datetime.
Lines with no datetime are presumed start-of-epoch.
3. Find (SERVER) or (POLICY). If absent then raise to reject message.
4. If connection number in square brackets '[2]' is missing then raise.
5. Extract connection number; save in data.conn_num
6. Create decorated data.conn_id "A0_2"
7. Extract data.channel if present. Raise if malformed.
8. Create a web_show_str for lines that may not parse any further. Like policy lines.
9. Extract the direction arrows
The log line is now reduced to a described type:
@describedtypename(num) [key=val [, key=val ...]]
except for transfers that have the funky transfer data at end.
:param _log_index: The router prefix index 0 for A, 1 for B, ...
:param _instance The router instance
:param _lineno:
:param _line:
:param _comn:
:param _router:
"""
verbatim_module = None
if len(_comn.verbatim_include_list) > 0:
for modx in _comn.verbatim_include_list:
if _comn.module_key_in_line(modx, _line):
verbatim_module = modx
break
if not (_comn.module_key_in_line(self.server_trace_key, _line) or
_comn.module_key_in_line(self.protocol_trace_key, _line) or
(_comn.module_key_in_line(self.policy_trace_key, _line) and "lookup_user:" in _line) or # open (not begin, attach)
_comn.module_key_in_line(self.server_info_key, _line) or
_comn.module_key_in_line(self.router_ls_key, _line) or
verbatim_module is not None):
raise ValueError("Line is not a candidate for parsing")
self.index = _log_index # router prefix 0 for A, 1 for B
self.instance = _instance # router instance in log file
self.lineno = _lineno # log line number
self.comn = _comn
self.router = _router
self.opaque = opaque
self.prefixi = common.log_letter_of(self.index) + str(self.instance) # prefix+instance A0
self.fid = "f_" + self.prefixi + "_" + str(self.lineno) # frame id A0_100
self.shorteners = _comn.shorteners # name shorteners
self.line = _line # working line chopped, trimmed
self.data = LogLineData() # parsed line fact store
# strip optional trailing file:line field
self.line = self.line.rstrip()
hasFileLine = False
if self.line.endswith(')'):
idxOP = self.line.rfind('(')
idxColon = self.line.rfind(':')
if idxOP != -1 and idxColon != -1:
if idxColon > idxOP:
lNumStr = self.line[(idxColon + 1): (-1)]
try:
lnum = int(lNumStr)
hasFileLine = True
except:
pass
if hasFileLine:
self.line = self.line[:self.line.rfind('(')].rstrip()
# Handle optional timestamp
# This whole project is brain dead without a timestamp. Just sayin'.
self.datetime = None
try:
self.datetime = datetime.strptime(self.line[:26], '%Y-%m-%d %H:%M:%S.%f')
except:
# old routers flub the timestamp and don't print leading zero in uS time
# 2018-11-18 11:31:08.269 should be 2018-11-18 11:31:08.000269
td = self.line[:26]
parts = td.split('.')
us = parts[1]
parts_us = us.split(' ')
if len(parts_us[0]) < 6:
parts_us[0] = '0' * (6 - len(parts_us[0])) + parts_us[0]
parts[1] = ' '.join(parts_us)
td = '.'.join(parts)
try:
self.datetime = datetime.strptime(td[:26], '%Y-%m-%d %H:%M:%S.%f')
except:
self.datetime = datetime(1970, 1, 1)
# Apply time-of-day filters
if self.datetime is not None:
if self.comn.args.time_start is not None:
if self.datetime < self.comn.args.time_start:
raise ValueError("Line too early outside time-of-day limits")
if self.comn.args.time_end is not None:
if self.datetime > self.comn.args.time_end:
raise ValueError("Line too late outside time-of-day limits")
# Pull out scraper literal logs
if verbatim_module is not None:
sti = self.line.find(verbatim_module)
if sti > 0:
# strip datetime and show literal string
sti += len(verbatim_module)
self.data.is_scraper = True
self.data.web_show_str = ("<strong>%s</strong> %s" % (verbatim_module, common.html_escape(self.line[sti:])))
stcp = self.line[sti:].find(')') # close paren after log level
if stcp < 0:
stcp = 0
self.data.sdorg_str = self.line[sti + stcp + 1:]
return
else:
assert False # verbatim module was found only moments ago...
# extract connection number
sti = self.line.find(self.server_trace_key)
if sti < 0:
sti = self.line.find(self.protocol_trace_key)
if sti < 0:
sti = self.line.find(self.policy_trace_key)
if sti < 0:
sti = self.line.find(self.server_info_key)
if sti < 0:
sti = self.line.find(self.router_ls_key)
if sti < 0:
raise ValueError("Log keyword/level not found in line %s" % (self.line))
else:
self.line = self.line[sti + len(self.router_ls_key):]
self.data.is_router_ls = True
# this has no relationship to AMQP log lines
return
else:
self.line = self.line[sti + len(self.server_info_key):]
self.data.is_server_info = True
else:
self.line = self.line[sti + len(self.policy_trace_key):]
self.data.is_policy_trace = True
else:
self.line = self.line[sti + len(self.protocol_trace_key):]
else:
self.line = self.line[sti + len(self.server_trace_key):]
ste = self.line.find(']')
if ste < 0:
print("Failed to parse line ", _lineno, " : ", _line)
traceback.print_exc()
raise ValueError("'%s' not found in line %s" % ("]", self.line))
self.data.conn_num = self.line[:ste]
if self.data.conn_num.startswith("C"):
self.data.conn_num = self.data.conn_num[1:]
self.line = self.line[ste + 1:]
# create decorated connection id
self.data.conn_id = self.prefixi + "_" + self.data.conn_num
# get the session (channel) number
if self.line.startswith(':'):
self.line = self.line[1:]
if self.line.startswith(self.proton_frame_key):
self.line = self.line[len(self.proton_frame_key):]
sti = self.line.find(' ')
if sti < 0:
raise ValueError("space not found after channel number at head of line %s" % (self.line))
if sti > 0:
self.data.channel = self.line[:sti]
self.line = self.line[sti + 1:]
self.line = self.line.lstrip()
# cover for traces that don't get any better
self.data.web_show_str = ("<strong>%s</strong>" % self.line)
# policy lines have no direction and described type fields
if self.data.is_policy_trace or self.data.is_server_info:
return
# direction
if self.line.startswith('<') or self.line.startswith('-'):
self.data.direction = self.line[:2]
self.line = self.line[3:]
self.data.web_show_str = ("<strong>%s</strong>" % self.line)
# The log line is now reduced to a described type:
# @describedtypename(num) [key=val [, key=val ...]]
# extract descriptor name
dname = self.line.split()[0]
self.line = self.line[(len(dname) + 1):]
# Dispose of the transfer data
if dname == self.transfer_key:
# Look for the '] (NNN) "' that separates the described type fields
# from the '(size) "data"'. Stick the required '(size) data' into
# data.transfer_data and delete it from the line.
rz = re.compile(r'\] \(\d+\) \"').search(self.line)
# aborted transfers may or may not have size/data in the log line
if rz is not None and len(rz.regs) > 0:
splitSt, splitTo = rz.regs[0]
self.data.transfer_size = self.line[splitSt + 3: splitTo - 3]
self.data.transfer_data = self.line[splitTo - 1:] # discard (NNN) size field
self.line = self.line[: splitSt + 1]
# try to isolate the bare message
sti = self.data.transfer_data.find(r"\x00Ss")
if sti > 0:
self.data.transfer_hdr_annos = self.data.transfer_data[:sti]
self.data.transfer_bare = self.data.transfer_data[sti:]
else:
self.data.transfer_hdr_annos = ''
self.data.transfer_bare = self.data.transfer_data
else:
self.data.transfer_size = "0"
self.data.transfer_data = "(none)"
if DescribedType.is_dtype_name(dname):
self.data.described_type.parse_dtype_line(dname, self.line)
# data fron incoming line is now parsed out into facts in .data
# Now cook the data to get useful displays
self.extract_facts()