in automation/tinc/main/ext/qautils/gppylib/logfilter.py [0:0]
def FilterLogEntries(iterable,
msgfile=sys.stderr,
verbose=False,
beginstamp=None,
endstamp=None,
include=None,
exclude=None,
filters=[],
ibegin=0,
jend=None):
"""
Generator to consume the lines of a GPDB log file from iterable,
yield the lines which satisfy the given criteria, and skip the rest.
iterable should be a sequence of strings, an already-open input file,
or some object which supports iteration and yields strings.
verbose, if True, causes status messages to be written to msgfile,
which should be an already-open output file.
For our purposes, a log entry consists of a line which starts with a
timestamp in YYYY-MM-DD HH:MM:SS[.fraction] format, followed by zero
or more lines having the same timestamp or no timestamp.
beginstamp should be a datetime.datetime or datetime.date object, or None.
Log entries are skipped if their timestamp is less than the specified
date and time. Fractional seconds and timezones are ignored. If a
date object is given, it is converted to a datetime with time 00:00:00.
endstamp is like beginstamp, except that it causes log entries to be
skipped if their timestamp is greater than or equal to the specified
date and time.
include should be a regular expression object returned by the re.compile()
method; or a string specifying a regular expression according to the rules
of the re package in the Python standard library; or a list of such objects
and/or strings; or None. A log entry is skipped if there is an include
regex which - in every line of the entry - fails to match.
exclude is like include, except that it causes a log entry to be skipped
if there is an exclude regex which matches in some line of the entry.
filters is a sequence of callables. Each callable will be called just
once, with one argument: an input stream, which will be an iterator
yielding groups. (Here a 'group' is a sequence of strings: the lines
of a log entry.) The callable should return an iterator yielding
filtered groups. The filters are applied in the order given,
downstream of the begin/end/include/exclude filters. For example,
this filter selects log entries with 'DEBUGn:' in the first line...
lambda(iterable): MatchInFirstLine(iterable, r'DEBUG\d:')
The filterize() function, defined later in this module, is useful for
building the list of filters.
ibegin and jend should be integers or None. They can be specified like
the bounds of a Python slice, to select a subrange of the log entries
which satisfy all the preceding criteria. Values >= 0 are counted from
the beginning of the stream; values < 0 are counted from the end of the
stream. 0 is before the first qualifying log entry; 1 is after the first
and before the second; -1 is before the last. Entries coming before the
ibegin point or after the jend point are skipped. For example, jend=3
to select only the first 3 qualifying log entries; or ibegin=-3 to
extract the last 3 entries.
Regular expression syntax is at http://docs.python.org/lib/re-syntax.html
At the beginning of a log file before the first timestamped line there
could be some lines with no timestamp. If beginstamp or endstamp
is not None, any such lines are skipped. Otherwise they are grouped
together and treated as one log entry.
"""
iterable = iter(iterable)
spyIn = countIn = spyMid = spyMatch = countOut = None
if jend is not None and jend == sys.maxint:
jend = None
# Collect unfiltered input statistics
if verbose:
iterable = spyIn = TimestampSpy(iterable)
# Build filter pipeline
if include or exclude or filters or ibegin or (jend is not None):
# We want patterns to be tested entry-by-entry rather than line-by-line,
# so group together the lines of each entry.
iterable = GroupByTimestamp(iterable)
# Count the unfiltered log entries
if verbose:
iterable = countIn = Count(iterable)
# Select log entries such that beginstamp <= timestamp < endstamp
if beginstamp or endstamp:
iterable = TimestampInBounds(iterable, beginstamp, endstamp)
if verbose:
iterable = spyMid = TimestampSpy(iterable)
# Include matching log entries.
if (isinstance(include, basestring) or # one string
hasattr(include, 'search')): # or compiled regex
include = [include]
if include:
for regex in include:
iterable = MatchRegex(iterable, regex)
# Exclude non-matching log entries.
if (isinstance(exclude, basestring) or # one string
hasattr(exclude, 'search')): # or compiled regex
exclude = [exclude]
if exclude:
for regex in exclude:
iterable = NoMatchRegex(iterable, regex)
# Append caller's filters to the pipeline.
for func in filters:
iterable = func(iterable)
# Collect match/filter statistics
if verbose and iterable is not (spyMid or countIn):
iterable = spyMatch = TimestampSpy(iterable)
# After all other filtering, extract slice of qualifying log entries.
if ibegin or jend is not None:
iterable = Slice(iterable, ibegin, jend)
# Count final output log entries
if verbose:
iterable = countOut = Count(iterable)
# Break the groups back down into lines for output.
iterable = Ungroup(iterable)
# Collect final statistics
if verbose:
iterable = spyOut = TimestampSpy(iterable)
elif beginstamp or endstamp:
# Select log entries such that beginstamp <= timestamp < endstamp
iterable = TimestampInBounds(iterable, beginstamp, endstamp)
# Collect final statistics
if verbose:
iterable = spyOut = spyMid = TimestampSpy(iterable)
else:
# Caller didn't request any filtering.
spyOut = spyIn
# Pull filtered lines out of the pipeline and yield them to caller
for line in iterable:
yield line
# Display statistics if requested
if verbose:
# Did we even try to read any input?
if spyIn.items == 0 and spyOut.items == 0 and not spyIn.eod:
print >>msgfile, ('%7d lines processed; an unsatisfiable condition '
'was specified' % 0)
return
# Unfiltered input statistics
srange = spyIn.str_range()
msg = ' in: %7d lines' % spyIn.lines
if countIn:
msg += ', %7d log entries' % countIn.count()
if srange:
msg += '; timestamps from %s to %s' % srange
else:
msg += '; no timestamps found'
if not spyIn.eod:
msg += '; stopped before end of input'
print >>msgfile, msg
# Entries where begin <= timestamp < end
if spyMid:
srange = spyMid.str_range()
msg = ' time ok: %7d lines' % spyMid.lines
if spyMid.groups:
msg += ', %7d log entries' % spyMid.groups
if srange:
msg += '; timestamps from %s to %s' % srange
print >>msgfile, msg
# After applying include/exclude/filters
if spyMatch:
srange = spyMatch.str_range()
msg = ' match: %7d lines' % spyMatch.lines
if spyMatch.groups:
msg += ', %7d log entries' % spyMatch.groups
if srange:
msg += '; timestamps from %s to %s' % srange
print >>msgfile, msg
# Final output statistics
srange = spyOut.str_range()
msg = ' out: %7d lines' % spyOut.lines
if countOut:
msg += ', %7d log entries' % countOut.count()
if srange:
msg += '; timestamps from %s to %s' % srange
print >>msgfile, msg