in aristotle/aristotle.py [0:0]
def filter_ruleset(self, metadata_filter=None):
"""Applies boolean filter against the ruleset and returns list of matching SIDs.
:param metadata_filter: A string that defines the desired outcome based on
Boolean logic, and uses the metadata key-value pairs as values in the
Boolean algebra. Defaults to ``self.metadata_filter`` which must be set
if this parameter is not set.
:type metadata_filter: string, optional
:returns: list of matching SIDs
:rtype: list
:raises: `AristotleException`
"""
if not metadata_filter:
metadata_filter = self.metadata_filter
if metadata_filter is None:
print_error("No metadata_filter set or passed to filter_ruleset()", fatal=True)
metadata_filter_original = metadata_filter
# the boolean.py library uses tokenize which isn't designed to
# handle multi-word tokens (and doesn't support quoting). So
# just replace and map to single word. This way we can still
# leverage boolean.py to do simplifying and building of the tree.
mytokens = re.findall(r'\x22[a-zA-Z0-9_]+[^\x22]+\x22', metadata_filter, re.DOTALL)
if not mytokens or len(mytokens) == 0:
# nothing to filter on so exit
print_error("metadata_filter string contains no tokens", fatal=True)
for t in mytokens:
# key-value pairs are case insensitive; make everything lower case unless key is "msg_regex" or "rule_regex"
tsplit = [e.strip() for e in t.strip('"').strip().split(' ', 1)]
tsplit[0] = tsplit[0].lower()
if len(tsplit) == 2:
if not tsplit[0] in ["msg_regex", "rule_regex"]:
tsplit[1] = tsplit[1].lower()
tstrip = ' '.join(tsplit)
else:
# if just key provided (no value), match on all values
tstrip = "{} <all>".format(tstrip)
print_debug(tstrip)
# if token begins with digit, the tokenizer doesn't like it
hashstr = "D" + hashlib.md5(tstrip.encode()).hexdigest()
# add to mapp dict
self.metadata_map[hashstr] = tstrip
# replace in filter str
metadata_filter = metadata_filter.replace(t, hashstr)
print_debug("{}".format(metadata_filter_original))
print_debug("\t{}".format(metadata_filter))
try:
algebra = boolean.BooleanAlgebra()
mytree = algebra.parse(metadata_filter).literalize().simplify()
return self.evaluate(mytree)
except Exception as e:
print_error("Problem processing metadata_filter string:\n\n{}\n\nError:\n{}".format(metadata_filter_original, e), fatal=True)