in aristotle/aristotle.py [0:0]
def parse_rules(self, rules, filename=None):
"""Parses the given rules and builds/updates necessary data structures.
:param rules: rules (one per line) to parse and build/update the necessary data structures
:type rules: string, required
:param filename: if the passed in rules came from a file, the filename of that file
:type filename: string, optional
"""
try:
for lineno, line in enumerate(rules.splitlines()):
# ignore comments and blank lines
is_disabled_rule = False
if len(line.strip()) == 0:
continue
if line.lstrip().startswith('#'):
if disabled_rule_re.match(line.strip()):
is_disabled_rule = True
line = line.lstrip()[1:].strip()
else:
# valid comment (not disabled rule)
print_debug("Skipping comment: {}".format(line))
continue
# extract sid
matchobj = sid_re.search(line)
if not matchobj:
print_error("Invalid rule on line {}:\n{}".format(lineno, line), fatal=True)
sid = int(matchobj.group("SID"))
# extract classtype. This only grabs the first one; some engines support multiple
# 'classtype' keywords in rules but it practice it is rarely, if ever, done.
classtype = None
matchobj = classtype_keyword_re.search(line)
if matchobj:
classtype = matchobj.group("CLASSTYPE")
else:
print_debug("No 'classtype' keyword found in sid {}".format(sid))
# extract metadata keyword value
metadata_str = ""
matchobj = metadata_keyword_re.search(line)
if matchobj:
metadata_str = matchobj.group("METADATA")
else:
print_warning("No 'metatdata' keyword found in sid {}".format(sid))
if (lineno % 1000 == 0):
print_debug("metadata_str for sid {}:\n{}".format(sid, metadata_str))
# extract 'msg' field
matchobj = rule_msg_re.search(line)
if not matchobj:
print_warning("Unable to extract rule msg from SID '{}'.".format(sid))
msg = ""
else:
msg = matchobj.group("MSG")
# build dict
if sid in self.metadata_dict.keys():
# include the first encountered enabled rule; if they are all disabled, include the first encountered.
print_warning("Duplicate sid '{}' found{}".format(sid, "!" if not filename else " in file '{}'!".format(filename)))
if is_disabled_rule:
print_warning("Ignoring disabled rule with duplicate sid: {}".format(line))
continue
if self.metadata_dict[sid]['disabled']:
print_warning("Ignoring disabled rule with duplicate sid: {}".format(self.metadata_dict[sid]['raw_rule']))
else:
print_warning("Ignoring rule with duplicate sid: {}".format(line))
continue
self.metadata_dict[sid] = {'metadata': {},
'msg': msg,
'disabled': False if self.enable_all_rules else is_disabled_rule,
'originally_disabled': is_disabled_rule,
'raw_rule': line
}
metadata_pairs = []
if len(metadata_str) > 0:
metadata_pairs.extend(metadata_str.split(','))
if classtype and not self.ignore_classtype_keyword:
# add classtype from keyword as pseudo metadata key
metadata_pairs.append("classtype {}".format(classtype))
if filename and not self.ignore_filename:
metadata_pairs.append("filename {}".format(filename))
for kvpair in metadata_pairs:
# key-value pairs are case insensitive; make everything lower case
# also remove extra spaces before, after, and between key and value
kvsplit = [e.strip() for e in kvpair.lower().strip().split(' ', 1)]
if len(kvsplit) < 2:
# just a single word in metadata. warn and skip
print_warning("Single word metadata value found, ignoring '{}' in sid {}".format(kvpair, sid))
continue
k, v = kvsplit
if k == "sid" and int(v) != sid:
# this is in violation of the BETTER schema, throw warning
print_warning("line {}: 'sid' metadata key value '{}' does not match rule sid '{}'. This may lead to unexpected results".format(lineno, v, sid))
# normalize_better() returns a list b/c in rare cases it will produce more than one key/value pair.
# Because of that, make everything a(nother) list, even though most of the time it will be
# a one element list
if self.normalize:
kvs = self.normalize_better(k, v, sid)
else:
kvs = [kvsplit]
for current_kvp in kvs:
k, v = current_kvp
self.add_metadata(sid, k, v)
for k in self.metadata_dict[sid]['metadata'].keys():
# remove duplicate values for the same key
self.metadata_dict[sid]['metadata'][k] = list(set(self.metadata_dict[sid]['metadata'][k]))
# add sid as pseudo metadata key unless it already exists
if 'sid' not in self.metadata_dict[sid]['metadata'].keys():
# keys and values are strings; variable "sid" is int so must
# be cast as str when used the same way other keys and values are used.
self.metadata_dict[sid]['metadata']['sid'] = [str(sid)]
self.keys_dict['sid'][str(sid)] = [sid]
# add 'originally_disabled' as pseudo metadata key so it can be filtered on
if 'originally_disabled' in self.metadata_dict[sid]['metadata'].keys():
print_warning("Metadata key 'originally_disabled' found in SID {}. "
"This is an internal metadata key used by Aristotle. "
"The value '{}' found in the rule will be ignored.".format(sid, self.metadata_dict[sid]['metadata']['originally_disabled']))
self.delete_metadata(sid, 'originally_disabled')
self.add_metadata(sid, 'originally_disabled', str(self.metadata_dict[sid]['originally_disabled']))
except Exception as e:
traceback.print_exc(e)
print_error("Problem loading rules: {}".format(e), fatal=True)