def FilterLogEntries()

in automation/tinc/main/ext/qautils/gppylib/logfilter.py [0:0]


def FilterLogEntries(iterable,
                     msgfile=sys.stderr,
                     verbose=False,
                     beginstamp=None,
                     endstamp=None,
                     include=None,
                     exclude=None,
                     filters=[],
                     ibegin=0,
                     jend=None):
    """
    Generator to consume the lines of a GPDB log file from iterable,
    yield the lines which satisfy the given criteria, and skip the rest.

    iterable should be a sequence of strings, an already-open input file,
    or some object which supports iteration and yields strings.

    verbose, if True, causes status messages to be written to msgfile,
    which should be an already-open output file.

    For our purposes, a log entry consists of a line which starts with a
    timestamp in YYYY-MM-DD HH:MM:SS[.fraction] format, followed by zero
    or more lines having the same timestamp or no timestamp.

    beginstamp should be a datetime.datetime or datetime.date object, or None.
    Log entries are skipped if their timestamp is less than the specified
    date and time.  Fractional seconds and timezones are ignored.  If a
    date object is given, it is converted to a datetime with time 00:00:00.

    endstamp is like beginstamp, except that it causes log entries to be
    skipped if their timestamp is greater than or equal to the specified
    date and time.

    include should be a regular expression object returned by the re.compile()
    method; or a string specifying a regular expression according to the rules
    of the re package in the Python standard library; or a list of such objects
    and/or strings; or None.  A log entry is skipped if there is an include
    regex which - in every line of the entry - fails to match.

    exclude is like include, except that it causes a log entry to be skipped
    if there is an exclude regex which matches in some line of the entry.

    filters is a sequence of callables.  Each callable will be called just
    once, with one argument: an input stream, which will be an iterator
    yielding groups.  (Here a 'group' is a sequence of strings: the lines
    of a log entry.)  The callable should return an iterator yielding
    filtered groups.  The filters are applied in the order given,
    downstream of the begin/end/include/exclude filters.  For example,
    this filter selects log entries with 'DEBUGn:' in the first line...
        lambda(iterable): MatchInFirstLine(iterable, r'DEBUG\d:')
    The filterize() function, defined later in this module, is useful for
    building the list of filters.

    ibegin and jend should be integers or None.  They can be specified like
    the bounds of a Python slice, to select a subrange of the log entries
    which satisfy all the preceding criteria.  Values >= 0 are counted from
    the beginning of the stream; values < 0 are counted from the end of the
    stream.  0 is before the first qualifying log entry; 1 is after the first
    and before the second; -1 is before the last.  Entries coming before the
    ibegin point or after the jend point are skipped.  For example, jend=3
    to select only the first 3 qualifying log entries; or ibegin=-3 to
    extract the last 3 entries.

    Regular expression syntax is at http://docs.python.org/lib/re-syntax.html

    At the beginning of a log file before the first timestamped line there
    could be some lines with no timestamp.  If beginstamp or endstamp
    is not None, any such lines are skipped.  Otherwise they are grouped
    together and treated as one log entry.
    """
    iterable = iter(iterable)
    spyIn = countIn = spyMid = spyMatch = countOut = None
    if jend is not None and jend == sys.maxint:
        jend = None

    # Collect unfiltered input statistics
    if verbose:
        iterable = spyIn = TimestampSpy(iterable)

    # Build filter pipeline
    if include or exclude or filters or ibegin or (jend is not None):
        # We want patterns to be tested entry-by-entry rather than line-by-line,
        # so group together the lines of each entry.
        iterable = GroupByTimestamp(iterable)

        # Count the unfiltered log entries
        if verbose:
            iterable = countIn = Count(iterable)

        # Select log entries such that beginstamp <= timestamp < endstamp
        if beginstamp or endstamp:
            iterable = TimestampInBounds(iterable, beginstamp, endstamp)
            if verbose:
                iterable = spyMid = TimestampSpy(iterable)

        # Include matching log entries.
        if (isinstance(include, basestring) or   # one string
            hasattr(include, 'search')):         # or compiled regex
            include = [include]
        if include:
            for regex in include:
                iterable = MatchRegex(iterable, regex)

        # Exclude non-matching log entries.
        if (isinstance(exclude, basestring) or   # one string
            hasattr(exclude, 'search')):         # or compiled regex
            exclude = [exclude]
        if exclude:
            for regex in exclude:
                iterable = NoMatchRegex(iterable, regex)

        # Append caller's filters to the pipeline.
        for func in filters:
            iterable = func(iterable)

        # Collect match/filter statistics
        if verbose and iterable is not (spyMid or countIn):
            iterable = spyMatch = TimestampSpy(iterable)

        # After all other filtering, extract slice of qualifying log entries.
        if ibegin or jend is not None:
            iterable = Slice(iterable, ibegin, jend)

        # Count final output log entries
        if verbose:
            iterable = countOut = Count(iterable)

        # Break the groups back down into lines for output.
        iterable = Ungroup(iterable)

        # Collect final statistics
        if verbose:
            iterable = spyOut = TimestampSpy(iterable)

    elif beginstamp or endstamp:
        # Select log entries such that beginstamp <= timestamp < endstamp
        iterable = TimestampInBounds(iterable, beginstamp, endstamp)

        # Collect final statistics
        if verbose:
            iterable = spyOut = spyMid = TimestampSpy(iterable)

    else:
        # Caller didn't request any filtering.
        spyOut = spyIn

    # Pull filtered lines out of the pipeline and yield them to caller
    for line in iterable:
        yield line

    # Display statistics if requested
    if verbose:
        # Did we even try to read any input?
        if spyIn.items == 0 and spyOut.items == 0 and not spyIn.eod:
            print >>msgfile, ('%7d lines processed; an unsatisfiable condition '
                              'was specified' % 0)
            return

        # Unfiltered input statistics
        srange = spyIn.str_range()
        msg = '       in: %7d lines' % spyIn.lines
        if countIn:
            msg += ', %7d log entries' % countIn.count()
        if srange:
            msg += '; timestamps from %s to %s' % srange
        else:
            msg += '; no timestamps found'
        if not spyIn.eod:
            msg += '; stopped before end of input'
        print >>msgfile, msg

        # Entries where begin <= timestamp < end
        if spyMid:
            srange = spyMid.str_range()
            msg = '  time ok: %7d lines' % spyMid.lines
            if spyMid.groups:
                msg += ', %7d log entries' % spyMid.groups
            if srange:
                msg += '; timestamps from %s to %s' % srange
            print >>msgfile, msg

        # After applying include/exclude/filters
        if spyMatch:
            srange = spyMatch.str_range()
            msg = '    match: %7d lines' % spyMatch.lines
            if spyMatch.groups:
                msg += ', %7d log entries' % spyMatch.groups
            if srange:
                msg += '; timestamps from %s to %s' % srange
            print >>msgfile, msg

        # Final output statistics
        srange = spyOut.str_range()
        msg = '      out: %7d lines' % spyOut.lines
        if countOut:
            msg += ', %7d log entries' % countOut.count()
        if srange:
            msg += '; timestamps from %s to %s' % srange
        print >>msgfile, msg