in bot/code_review_bot/__init__.py [0:0]
def hash(self):
"""
Build a unique hash identifying that issue and cache the resulting value
The text concerned by the issue is used and not its position in the file
Message content is hashed as a single linter may return multiple issues on a single line
We make the assumption that the message does not contain the line number
If an error occurs reading the file content (locally or remotely), None is returned
"""
assert self.revision is not None, "Missing revision"
# Build the hash only if the file is not autogenerated.
# An autogenerated file resides in the build directory that it has the
# format `obj-x86_64-pc-linux-gnu`
file_content = None
if "/obj-" not in self.path:
if settings.mercurial_cache_checkout:
logger.debug("Using the local repository to build issue's hash")
try:
with (settings.mercurial_cache_checkout / self.path).open() as f:
file_content = f.read()
except (FileNotFoundError, IsADirectoryError):
logger.warning(
"Failed to find issue's related file", path=self.path
)
file_content = None
else:
try:
# Load all the lines affected by the issue
file_content = self.revision.load_file(self.path)
except ValueError:
# Build the hash with an empty content in case the path is erroneous
file_content = None
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
logger.warning(
"Failed to download a file with an issue", path=self.path
)
# We still build the hash with empty content
file_content = None
else:
# When encountering another HTTP error, raise the issue
raise
if file_content is None:
self._hash = None
return self._hash
# Build raw content:
# 1. lines affected by patch
# 2. without any spaces around each line
file_lines = file_content.splitlines()
if self.line is None or self.nb_lines is None:
# Use full file when line is not specified
lines = file_lines
else:
# Use a range of lines
start = self.line - 1 # file_lines start at 0, not 1
lines = file_lines[start : start + self.nb_lines]
raw_content = "\n".join([line.strip() for line in lines])
# Build hash payload using issue data
# excluding file position information (lines & char)
extras = json.dumps(self.build_extra_identifiers(), sort_keys=True)
payload = ":".join(
[
self.analyzer.name,
self.path,
self.level.value,
self.check,
extras,
raw_content,
self.message,
]
).encode("utf-8")
# Finally build the MD5 hash
return hashlib.md5(payload).hexdigest()