def filter_ruleset()

in aristotle/aristotle.py [0:0]


    def filter_ruleset(self, metadata_filter=None):
        """Applies boolean filter against the ruleset and returns list of matching SIDs.

        :param metadata_filter: A string that defines the desired outcome based on
            Boolean logic, and uses the metadata key-value pairs as values in the
            Boolean algebra. Defaults to ``self.metadata_filter`` which must be set
            if this parameter is not set.
        :type metadata_filter: string, optional
        :returns: list of matching SIDs
        :rtype: list
        :raises: `AristotleException`
        """
        if not metadata_filter:
            metadata_filter = self.metadata_filter
        if metadata_filter is None:
            print_error("No metadata_filter set or passed to filter_ruleset()", fatal=True)
        metadata_filter_original = metadata_filter
        # the boolean.py library uses tokenize which isn't designed to
        # handle multi-word tokens (and doesn't support quoting). So
        # just replace and map to single word. This way we can still
        # leverage boolean.py to do simplifying and building of the tree.
        mytokens = re.findall(r'\x22[a-zA-Z0-9_]+[^\x22]+\x22', metadata_filter, re.DOTALL)
        if not mytokens or len(mytokens) == 0:
            # nothing to filter on so exit
            print_error("metadata_filter string contains no tokens", fatal=True)
        for t in mytokens:
            # key-value pairs are case insensitive; make everything lower case unless key is "msg_regex" or "rule_regex"
            tsplit = [e.strip() for e in t.strip('"').strip().split(' ', 1)]
            tsplit[0] = tsplit[0].lower()
            if len(tsplit) == 2:
                if not tsplit[0] in ["msg_regex", "rule_regex"]:
                    tsplit[1] = tsplit[1].lower()
                tstrip = ' '.join(tsplit)
            else:
                # if just key provided (no value), match on all values
                tstrip = "{} <all>".format(tstrip)
            print_debug(tstrip)
            # if token begins with digit, the tokenizer doesn't like it
            hashstr = "D" + hashlib.md5(tstrip.encode()).hexdigest()
            # add to mapp dict
            self.metadata_map[hashstr] = tstrip
            # replace in filter str
            metadata_filter = metadata_filter.replace(t, hashstr)

        print_debug("{}".format(metadata_filter_original))
        print_debug("\t{}".format(metadata_filter))
        try:
            algebra = boolean.BooleanAlgebra()
            mytree = algebra.parse(metadata_filter).literalize().simplify()
            return self.evaluate(mytree)
        except Exception as e:
            print_error("Problem processing metadata_filter string:\n\n{}\n\nError:\n{}".format(metadata_filter_original, e), fatal=True)