in eden/scm/edenscm/mercurial/commands/__init__.py [0:0]
def grep(ui, repo, pattern, *pats, **opts):
"""search for a pattern in tracked files in the working directory
The default regexp style is POSIX basic regexps. If no FILE parameters are
passed in, the current directory and its subdirectories will be searched.
For the old 'hg grep', which searches through history, see 'histgrep'."""
# XXX: The current implementation heavily depends on external programs like
# grep, xargs and biggrep. Command-line flag support is a bit messy. It
# does not look like a source control command (ex. no --rev support).
# Ideally, `grep` is just `histgrep -r 'wdir()'` and they can be merged.
# Possible future work are:
# - For "searching many files in a single revision" use-case, utilize
# ripgrep's logic. That "single revision" includes "wdir()". Get rid
# of shelling out to grep or xargs.
# - For "searching few files in a range of revisions" use-case, maybe
# try using fastannoate logic.
# - Find a cleaner way to integrate with FB-only biggrep and fallback
# gracefully.
grepcommandstr = ui.config("grep", "command")
# Use shlex.split() to split up grepcommandstr into multiple arguments.
# this allows users to specify a command plus arguments (e.g., "grep -i").
# We don't use a real shell to execute this, which ensures we won't do
# bad stuff if their command includes redirects, semicolons, or other
# special characters etc.
cmd = shlex.split(grepcommandstr) + [
"--no-messages",
"--binary-files=without-match",
"--with-filename",
"--regexp=" + pattern,
]
biggrepclient = ui.config(
"grep",
"biggrepclient",
"/usr/local/fbprojects/packages/biggrep.client/stable/biggrep_client",
)
biggreptier = ui.config("grep", "biggreptier", "biggrep.master")
biggrepcorpus = ui.config("grep", "biggrepcorpus")
# If true, we'll use biggrepclient to perform the grep against some
# externally maintained index. We don't provide an implementation
# of that tool with this repo, just the optional client interface.
biggrep = ui.configbool("grep", "usebiggrep", None)
if biggrep is None:
if (
"eden" in repo.requirements
and biggrepcorpus
and os.path.exists(biggrepclient)
):
biggrep = True
# Ask big grep to strip out the corpus dir (stripdir) and to include
# the corpus revision on the first line.
biggrepcmd = [
biggrepclient,
"--stripdir",
"-r",
"--expression",
pattern,
biggreptier,
biggrepcorpus,
"re2",
]
args = []
if opts.get("after_context"):
args.append("-A")
args.append(opts.get("after_context"))
if opts.get("before_context"):
args.append("-B")
args.append(opts.get("before_context"))
if opts.get("context"):
args.append("-C")
args.append(opts.get("context"))
if opts.get("ignore_case"):
args.append("-i")
if opts.get("files_with_matches"):
args.append("-l")
if opts.get("line_number"):
cmd.append("-n")
if opts.get("invert_match"):
if biggrep:
raise error.Abort("Cannot use invert_match option with big grep")
cmd.append("-v")
if opts.get("word_regexp"):
cmd.append("-w")
biggrepcmd[4] = "\\b%s\\b" % pattern
if opts.get("extended_regexp"):
cmd.append("-E")
# re2 is already mostly compatible by default, so there are no options
# to apply for this.
if opts.get("fixed_strings"):
cmd.append("-F")
# using bgs rather than bgr switches the engine to fixed string matches
biggrepcmd[0] = "bgs"
if opts.get("perl_regexp"):
cmd.append("-P")
# re2 is already mostly pcre compatible, so there are no options
# to apply for this.
biggrepcmd += args
cmd += args
# color support, using the color extension
colormode = getattr(ui, "_colormode", "")
if colormode == "ansi":
cmd.append("--color=always")
biggrepcmd.append("--color=on")
# Copy match specific options
match_opts = {}
for k in ("include", "exclude"):
if k in opts:
match_opts[k] = opts.get(k)
reporoot = os.path.dirname(repo.path)
wctx = repo[None]
if not pats:
# Search everything in the current directory
m = scmutil.match(wctx, ["."], match_opts)
# Scope biggrep to the cwd equivalent path, relative to the root
# of its corpus.
if repo.getcwd():
biggrepcmd += ["-f", repo.getcwd()]
else:
# Search using the specified patterns
m = scmutil.match(wctx, pats, match_opts)
# Scope biggrep to the same set of patterns. Ideally we'd have
# a way to translate the matcher object to a regex, but we don't
# so we cross fingers and hope that the patterns are simple filenames.
biggrepcmd += [
"-f",
"(%s)"
% "|".join(
[os.path.normpath(os.path.join(repo.getcwd(), f)) for f in pats]
),
]
# Add '--' to make sure grep recognizes all remaining arguments
# (passed in by xargs) as filenames.
cmd.append("--")
if biggrep:
p = subprocess.Popen(
biggrepcmd,
bufsize=-1,
close_fds=util.closefds,
stdout=subprocess.PIPE,
cwd=reporoot,
)
out, err = p.communicate()
lines = pycompat.decodeutf8(out.rstrip()).split("\n")
revisionline = lines[0][1:]
# Biggrep has two output formats. If the query only hit one shard, it
# returns a "#HASH:timestamp" format indicating the revision and time of
# the shards snapshot. If it hits multiple shards, it returns a
# "#name1=HASH:timestamp,name2=HASH:timestamp,name3=..." format.
if "=" in revisionline:
corpusrevs = []
shards = revisionline.split(",")
for shard in shards:
name, info = shard.split("=")
# biggrep doesn't have a consistent format
if ":" in info:
corpusrev, timestamp = info.split(":")
else:
corpusrev = info
corpusrevs.append(corpusrev)
if not corpusrevs:
raise error.Abort(
_("unable to resolve biggrep revision: %s") % revisionline,
hint=_("pass `--config grep.usebiggrep=False` to bypass biggrep"),
)
# Sort so our choice of revision is deterministic
corpusrev = sorted(corpusrevs)[0]
else:
corpusrev, timestamp = revisionline.split(":", 1)
lines = lines[1:]
resultsbyfile = {}
includelineno = opts.get("line_number")
fileswithmatches = opts.get("files_with_matches")
for line in lines:
try:
filename, lineno, colno, context = line.split(":", 3)
except Exception:
binaryfile = re.match("Binary file (.*) matches", line)
if binaryfile:
filename = binaryfile.group(1)
lineno = 0
colno = 0
context = None
elif fileswithmatches:
filename = line
else:
# If we couldn't parse the line, just pass it thru
ui.write(line)
ui.write("\n")
continue
unescapedfilename = util.stripansiescapes(filename)
# filter to just the files that match the list supplied
# by the caller
if m(unescapedfilename):
# relativize the path to the CWD. Note that `filename` will
# often have escape sequences, so we do a substring replacement
filename = filename.replace(unescapedfilename, m.rel(unescapedfilename))
if unescapedfilename not in resultsbyfile:
resultsbyfile[unescapedfilename] = []
# re-assemble the output
if fileswithmatches:
resultsbyfile[unescapedfilename].append("%s\n" % filename)
elif lineno == 0 and colno == 0 and context is None:
# Take care with binary file matches!
resultsbyfile[unescapedfilename].append(
"Binary file %s matches\n" % filename
)
elif includelineno:
resultsbyfile[unescapedfilename].append(
"%s:%s:%s\n" % (filename, lineno, context)
)
else:
resultsbyfile[unescapedfilename].append(
"%s:%s\n" % (filename, context)
)
# Now check to see what has changed since the corpusrev
# we're going to need to grep those and stitch the results together
try:
corpusbin = bin(corpusrev)
changes = repo.status(corpusbin, None, m)
except error.RepoLookupError:
# We don't have the rev locally, so go get it.
if not ui.quiet:
ui.write_err(_("pulling biggrep corpus commit %s\n") % (hex(corpusbin)))
# Redirect the pull output to stderr so that we don't break folks
# that are parsing the `hg grep` output
try:
fout = ui.fout.swap(ui.ferr)
pull = cmdutil.findcmd("pull", table)[1][0]
pull(ui, repo)
finally:
ui.fout.swap(fout)
# Try to resolve that rev again now
try:
changes = repo.status(corpusbin, None, m)
except error.RepoLookupError:
# print the results we've gathered so far. We're not sure
# how things differ, so we'll follow up with a warning.
ui.pager("grep")
for lines in resultsbyfile.values():
for line in lines:
ui.write(line)
ui.warn(
_(
"The results above are based on revision %s\n"
"which is not available locally and thus may be inaccurate.\n"
"To get accurate results, run `hg pull` and re-run "
"your grep.\n"
)
% corpusrev
)
return
# which files we're going to search locally
filestogrep = set()
# files that have been changed or added need to be searched again
for f in changes.modified:
resultsbyfile.pop(f, None)
filestogrep.add(f)
for f in changes.added:
resultsbyfile.pop(f, None)
filestogrep.add(f)
# files that have been removed since the corpus rev cannot match
for f in changes.removed:
resultsbyfile.pop(f, None)
for f in changes.deleted:
resultsbyfile.pop(f, None)
# Having filtered out the changed files from the big grep results,
# we can now print those that remain.
ui.pager("grep")
for lines in resultsbyfile.values():
for line in lines:
ui.write(line)
# pass on any changed files to the local grep
if len(filestogrep) > 0:
# Ensure that the biggrep results are flushed before we
# start to intermingle with the local grep process output
ui.flush()
return _rungrep(ui, cmd, sorted(filestogrep), m)
return 0
islink = repo.wvfs.islink
status = repo.dirstate.status(m, False, True, False)
files = sorted(status.clean + status.modified + status.added)
files = [file for file in files if not islink(file)]
ui.pager("grep")
return _rungrep(ui, cmd, files, m)