def histgrep()

in eden/scm/edenscm/mercurial/commands/__init__.py [0:0]
208 lines of code
62 McCabe index (conditional complexity)

def histgrep(ui, repo, pattern, *pats, **opts):
    """search backwards through history for a pattern in the specified files

    Search revision history for a regular expression in the specified
    files or the entire project.

    By default, grep prints the most recent revision number for each
    file in which it finds a match. To get it to print every revision
    that contains a change in match status ("-" for a match that becomes
    a non-match, or "+" for a non-match that becomes a match), use the
    --all flag.

    PATTERN can be any Python (roughly Perl-compatible) regular
    expression.

    If no FILEs are specified (and -f/--follow isn't set), all files in
    the repository are searched, including those that don't exist in the
    current branch or have been deleted in a prior changeset.

    .. container:: verbose

      ``histgrep.allowfullrepogrep`` controls whether the entire repo can be
      queried without any patterns, which can be expensive in big repositories.

    Returns 0 if a match is found, 1 otherwise.
    """
    if not util.istest():
        ui.deprecate(
            "hg-histgrep",
            "histgrep is deprecated because it does not scale - use diffgrep instead",
        )
    if not pats and not ui.configbool("histgrep", "allowfullrepogrep"):
        m = _("can't run histgrep on the whole repo, please provide filenames")
        h = _("this is disabled to avoid very slow greps over the whole repo")
        raise error.Abort(m, hint=h)

    reflags = re.M
    if opts.get("ignore_case"):
        reflags |= re.I
    try:
        regexp = re.compile(pattern, reflags)
    except re.error as inst:
        ui.warn(_("grep: invalid match pattern: %s\n") % inst)
        return 1
    sep, eol = ":", "\n"
    if opts.get("print0"):
        sep = eol = "\0"

    getfile = util.lrucachefunc(repo.file)

    def matchlines(body):
        body = pycompat.decodeutf8(body, errors="replace")
        begin = 0
        linenum = 0
        while begin < len(body):
            match = regexp.search(body, begin)
            if not match:
                break
            mstart, mend = match.span()
            linenum += body.count("\n", begin, mstart) + 1
            lstart = body.rfind("\n", begin, mstart) + 1 or begin
            begin = body.find("\n", mend) + 1 or len(body) + 1
            lend = begin - 1
            yield linenum, mstart - lstart, mend - lstart, body[lstart:lend]

    class linestate(object):
        def __init__(self, line, linenum, colstart, colend):
            self.line = line
            self.linenum = linenum
            self.colstart = colstart
            self.colend = colend

        def __hash__(self):
            return hash((self.linenum, self.line))

        def __eq__(self, other):
            return self.line == other.line

        def findpos(self):
            """Iterate all (start, end) indices of matches"""
            yield self.colstart, self.colend
            p = self.colend
            while p < len(self.line):
                m = regexp.search(self.line, p)
                if not m:
                    break
                yield m.span()
                p = m.end()

    matches = {}
    copies = {}

    def grepbody(fn, rev, body):
        matches[rev].setdefault(fn, [])
        m = matches[rev][fn]
        for lnum, cstart, cend, line in matchlines(body):
            s = linestate(line, lnum, cstart, cend)
            m.append(s)

    def difflinestates(a, b):
        sm = difflib.SequenceMatcher(None, a, b)
        for tag, alo, ahi, blo, bhi in sm.get_opcodes():
            if tag == "insert":
                for i in range(blo, bhi):
                    yield ("+", b[i])
            elif tag == "delete":
                for i in range(alo, ahi):
                    yield ("-", a[i])
            elif tag == "replace":
                for i in range(alo, ahi):
                    yield ("-", a[i])
                for i in range(blo, bhi):
                    yield ("+", b[i])

    def display(fm, fn, ctx, pstates, states):
        rev = ctx.rev()
        node = ctx.node()
        if fm.isplain():
            formatuser = ui.shortuser
        else:
            formatuser = str
        if ui.quiet:
            datefmt = "%Y-%m-%d"
        else:
            datefmt = "%a %b %d %H:%M:%S %Y %1%2"
        found = False

        @util.cachefunc
        def binary():
            flog = getfile(fn)
            return util.binary(flog.read(ctx.filenode(fn)))

        fieldnamemap = {"filename": "file", "linenumber": "line_number"}
        if opts.get("all"):
            iter = difflinestates(pstates, states)
        else:
            iter = [("", l) for l in states]
        for change, l in iter:
            fm.startitem()
            fm.data(node=fm.hexfunc(ctx.node()))
            cols = [
                ("filename", fn, True),
                ("rev", rev, False),
                ("node", fm.hexfunc(node), True),
                ("linenumber", l.linenum, opts.get("line_number")),
            ]
            if opts.get("all"):
                cols.append(("change", change, True))
            cols.extend(
                [
                    ("user", formatuser(ctx.user()), opts.get("user")),
                    ("date", fm.formatdate(ctx.date(), datefmt), opts.get("date")),
                ]
            )
            lastcol = next(name for name, data, cond in reversed(cols) if cond)
            for name, data, cond in cols:
                field = fieldnamemap.get(name, name)
                fm.condwrite(cond, field, "%s", data, label="grep.%s" % name)
                if cond and name != lastcol:
                    fm.plain(sep, label="grep.sep")
            if not opts.get("files_with_matches"):
                fm.plain(sep, label="grep.sep")
                if not opts.get("text") and binary():
                    fm.plain(_(" Binary file matches"))
                else:
                    displaymatches(fm.nested("texts"), l)
            fm.plain(eol)
            found = True
            if opts.get("files_with_matches"):
                break
        return found

    def displaymatches(fm, l):
        p = 0
        for s, e in l.findpos():
            if p < s:
                fm.startitem()
                fm.write("text", "%s", l.line[p:s])
                fm.data(matched=False)
            fm.startitem()
            fm.write("text", "%s", l.line[s:e], label="grep.match")
            fm.data(matched=True)
            p = e
        if p < len(l.line):
            fm.startitem()
            fm.write("text", "%s", l.line[p:])
            fm.data(matched=False)
        fm.end()

    skip = {}
    revfiles = {}
    match = scmutil.match(repo[None], pats, opts)
    found = False
    follow = opts.get("follow")

    def prep(ctx, fns):
        rev = ctx.rev()
        pctx = ctx.p1()
        parent = pctx.rev()
        matches.setdefault(rev, {})
        matches.setdefault(parent, {})
        files = revfiles.setdefault(rev, [])
        for fn in fns:
            flog = getfile(fn)
            try:
                fnode = ctx.filenode(fn)
            except error.LookupError:
                continue

            copied = flog.renamed(fnode)
            copy = follow and copied and copied[0]
            if copy:
                copies.setdefault(rev, {})[fn] = copy
            if fn in skip:
                if copy:
                    skip[copy] = True
                continue
            files.append(fn)

            if fn not in matches[rev]:
                grepbody(fn, rev, flog.read(fnode))

            pfn = copy or fn
            if pfn not in matches[parent]:
                try:
                    fnode = pctx.filenode(pfn)
                    grepbody(pfn, parent, flog.read(fnode))
                except error.LookupError:
                    pass

    ui.pager("grep")
    fm = ui.formatter("grep", opts)
    for ctx in cmdutil.walkchangerevs(repo, match, opts, prep):
        rev = ctx.rev()
        parent = ctx.p1().rev()
        for fn in sorted(revfiles.get(rev, [])):
            states = matches[rev][fn]
            copy = copies.get(rev, {}).get(fn)
            if fn in skip:
                if copy:
                    skip[copy] = True
                continue
            pstates = matches.get(parent, {}).get(copy or fn, [])
            if pstates or states:
                r = display(fm, fn, ctx, pstates, states)
                found = found or r
                if r and not opts.get("all"):
                    skip[fn] = True
                    if copy:
                        skip[copy] = True
        del matches[rev]
        del revfiles[rev]
    fm.end()

    return not found