in aardvark.py [0:0]
def __init__(self, config_file: str = "aardvark.yaml"):
""" Load and parse the config """
# Type checking hints for mypy
self.scan_times: typing.List[float]
self.last_batches: typing.List[float]
self.processing_times: typing.List[float]
self.offenders: typing.Set[str]
self.spamurls: typing.Set[re.Pattern]
self.postmatches: typing.Set[re.Pattern]
self.multispam_auxiliary: typing.Set[re.Pattern]
self.multispam_required: typing.Set[re.Pattern]
# Init vars with defaults
self.config = {} # Our config, unless otherwise specified in init
self.myuid = str(uuid.uuid4())
self.debug = False # Debug prints, spammy!
self.persistence = False # Persistent block list
self.block_msg = DEFAULT_BLOCK_MSG
self.proxy_url = DEFAULT_BACKEND # Backend URL to proxy to
self.max_request_size = DEFAULT_MAX_REQUEST_SIZE
self.port = DEFAULT_PORT # Port we listen on
self.ipheader = DEFAULT_IPHEADER # Standard IP forward header
self.savepath = DEFAULT_SAVE_PATH # File path for saving offender data
self.suppress_repeats = DEBUG_SUPPRESS # Whether to suppress logging of repeat offenders
self.asyncwrite = False # Only works on later Linux (>=4.18)
self.last_batches = [] # Last batches of requests for stats
self.scan_times = [] # Scan times for stats
self.processing_times = [] # Request proxy processing times for stats
self.postmatches = set() # SPAM POST data simple matches
self.spamurls = set() # Honey pot URLs
self.ignoreurls = set() # URLs we should not scan
self.multispam_required = set() # Multi-Match required matches
self.multispam_auxiliary = set() # Auxiliary Multi-Match strings
self.offenders = set() # List of already known offenders (block right out!)
self.naive_threshold = DEFAULT_SPAM_NAIVE_THRESHOLD
self.enable_naive = DEFAULT_NAIVE
self.lock = asyncio.Lock()
if platform.system() == 'Linux':
major, minor, _ = platform.release().split('.', 2)
if major > "4" or (major >= "4" and minor >= "18"):
self.asyncwrite = True
if self.asyncwrite:
print("Utilizing kernel support for asynchronous writing of files")
else:
print("Kernel does not support asynchronous writing of files, falling back to synced writing")
# If config file, load that into the vars
if config_file:
self.config = yaml.safe_load(open(config_file, "r"))
self.debug = self.config.get("debug", self.debug)
self.proxy_url = self.config.get("proxy_url", self.proxy_url)
self.max_request_size = self.config.get("max_request_size", self.max_request_size)
self.port = int(self.config.get("port", self.port))
self.ipheader = self.config.get("ipheader", self.ipheader)
self.savepath = self.config.get("savedata", self.savepath)
self.persistence = self.config.get("persistence", self.persistence)
self.suppress_repeats = self.config.get("suppress_repeats", self.suppress_repeats)
self.block_msg = self.config.get("spam_response", self.block_msg)
self.enable_naive = self.config.get("enable_naive_scan", self.enable_naive)
self.naive_threshold = self.config.get("naive_spam_threshold", self.naive_threshold)
for pm in self.config.get("postmatches", []):
r = re.compile(bytes(pm, encoding="utf-8"), flags=re.IGNORECASE)
self.postmatches.add(r)
for su in self.config.get("spamurls", []):
r = re.compile(su, flags=re.IGNORECASE)
self.spamurls.add(r)
self.ignoreurls = self.config.get("ignoreurls", [])
multimatch = self.config.get("multimatch", {})
if multimatch:
for req in multimatch.get("required", []):
r = re.compile(bytes(req, encoding="utf-8"), flags=re.IGNORECASE)
self.multispam_required.add(r)
for req in multimatch.get("auxiliary", []):
r = re.compile(bytes(req, encoding="utf-8"), flags=re.IGNORECASE)
self.multispam_auxiliary.add(r)
if self.persistence:
if os.path.exists(BLOCKFILE):
offenders = 0
with open(BLOCKFILE, "r") as bl:
for line in bl:
if line.strip() and not line.startswith("#"):
offenders += 1
self.offenders.add(line.strip())
print(f"Loaded {offenders} offenders from persistent storage.")
if self.enable_naive:
print("Loading Naïve Bayesian spam filter...")
self.spamfilter = spamfilter.BayesScanner()