analyzer.py (118 lines of code) (raw):

import time import pickle import os import sys import math score_threshold = 5.0 bmp = [0x42, 0x4D] doc = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1] gif = [0x47, 0x49, 0x46, 0x38] jpg = [0xFF, 0xD8, 0xFF] mz = [0x4D, 0x5A] pdf = [0x25, 0x50, 0x44, 0x46] pk = [0x50, 0x4B] png = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A] zip = [0x1F, 0x8B] known_headers = {} known_headers['bmp'] = ''.join(map(chr,bmp)).encode() known_headers['dll'] = ''.join(map(chr,mz)).encode() known_headers['doc'] = ''.join(map(chr,doc)).encode() known_headers['docx'] = ''.join(map(chr,pk)).encode() known_headers['exe'] = ''.join(map(chr,mz)).encode() known_headers['gif'] = ''.join(map(chr,gif)).encode() known_headers['jpg'] = ''.join(map(chr,jpg)).encode() known_headers['pdf'] = ''.join(map(chr,pdf)).encode() known_headers['png'] = ''.join(map(chr,png)).encode() known_headers['pptx'] = ''.join(map(chr,pk)).encode() known_headers['xlsx'] = ''.join(map(chr,pk)).encode() known_headers['zip'] = ''.join(map(chr,zip)).encode() entropy_max = {} entropy_max['bmp'] = 7.5 entropy_max['c'] = 7.0 entropy_max['cpp'] = 7.0 entropy_max['dll'] = 7.5 entropy_max['doc'] = 7.5 entropy_max['docx'] = 7.5 entropy_max['exe'] = 7.5 entropy_max['gif'] = 7.5 entropy_max['h'] = 7.0 entropy_max['jpg'] = 7.5 entropy_max['pdf'] = 7.5 entropy_max['png'] = 7.5 entropy_max['pptx'] = 7.5 entropy_max['rtf'] = 7.0 entropy_max['txt'] = 7.0 entropy_max['xlsx'] = 7.5 entropy_max['zip'] = 7.5 def analyze(log_path): f = open(log_path, "rb") total_files = 0 system_alert_score = 0.0 original_data = None while True: try: original_data = pickle.load(f) except: break total_files += 1 # individual event analysis try: original_data['path'] = original_data['path'].decode('utf-8') original_data['operation'] = original_data['operation'].decode('utf-8') original_data['pid'] = original_data['pid'].decode('utf-8') except: break pid = original_data['pid'] file_name = os.path.basename(original_data['path']) file_extension = os.path.splitext(original_data['path'])[1][1:] print('=' * 20) print('pid: ', pid) print('file_name: ', file_name) print('operation: ', original_data['operation']) print('original_data contents length: ', len(original_data['contents'])) if original_data['operation'] == 'RENAME': prev_path = original_data['prev_path'].decode('utf-8') prev_file_extension = os.path.splitext(prev_path)[1][1:] print('previous extension: ', prev_file_extension) # 1) header mismatch if original_data['operation'] == 'RENAME': if prev_file_extension in known_headers: if len(original_data['contents']) >= len(known_headers[prev_file_extension][1:]): if not original_data['contents'].startswith(known_headers[prev_file_extension][1:]): print('*** renamed file header mismatch ***') system_alert_score += 4.0 elif file_extension in known_headers: if len(original_data['contents']) >= len(known_headers[file_extension][1:]): if not original_data['contents'].startswith(known_headers[file_extension][1:]): print('*** header mismatch ***') system_alert_score += 2.0 # 2) entropy analysis entropy = calculate_entropy(original_data['contents']) print('entropy: ', entropy) if original_data['operation'] == 'RENAME': if prev_file_extension in entropy_max: print('*** renamed file exceeds expected entropy max ***') system_alert_score += 4.0 elif file_extension in entropy_max: if entropy > entropy_max[file_extension]: print('*** file exceeds expected entropy max ***') system_alert_score += 2.0 print('') print('-' * 20) print('Total Files Analyzed: ', total_files) print('Total Alert Score: ', system_alert_score) if score_threshold < system_alert_score: print('***** Alert Score Exceeded Threshold *****') def calculate_entropy(data): if len(data) == 0: return 0.0 entropy = 0.0 for x in range(256): p_x = float(data.count(x))/len(data) if p_x > 0: entropy += - p_x*math.log(p_x, 2) return entropy if __name__ == "__main__": if len(sys.argv) == 1: log_path = "C:\\python_log\\python_log.dcart" else: log_path = sys.argv[1] if os.path.exists(log_path): analyze(log_path)