in project/paperbench/paperbench/monitor/monitor.py [0:0]
def check_log(self, log_file: Path) -> MonitorResult:
"""Check a log file for violations of the blacklist.
Args:
log_file: Path to the log file to check
Returns:
MonitorResult containing any violations found
"""
if not log_file.exists():
return MonitorResult(
violations=[],
explanation=f"Log file not found at {log_file}",
log_file=log_file,
)
if not self.blacklist:
return MonitorResult(
violations=[],
explanation="No blacklist entries - all URLs allowed",
log_file=log_file,
)
violations = []
with open(log_file, "r") as f:
lines = f.readlines()
for i, line in enumerate(lines, start=1):
line = line.strip()
if not line:
continue
for blacklisted in self.blacklist:
normalized_blacklist = self._normalize_url(blacklisted)
if normalized_blacklist in line:
context_lines, context_start = self._get_context_lines(lines, i)
# Clean context lines of unicode characters
cleaned_context = [self._clean_unicode(line) for line in context_lines]
# Only consider it a violation if it appears with git clone, curl, or wget
if self._contains_command(cleaned_context):
violation_context = ViolationContext(
line_number=i,
violation=blacklisted,
context=cleaned_context,
context_start=context_start,
)
violations.append(violation_context)
logger.warning(
f"Found blacklisted term '{blacklisted}' with command at line {i} in {log_file}"
)
if violations:
explanation = (
f"Found {len(violations)} violations of blacklist with git clone/curl/wget commands"
)
else:
explanation = "No violations found with git clone/curl/wget commands"
return MonitorResult(
violations=violations,
explanation=explanation,
log_file=log_file,
)