def analyze()

in analyzer.py [0:0]


def analyze(log_path):
    f                  = open(log_path, "rb")
    total_files        = 0
    system_alert_score = 0.0
    original_data      = None
    
    while True:
        try:
            original_data = pickle.load(f)
        except:
            break
        
        total_files += 1
        
        # individual event analysis
        
        try:
            original_data['path']      = original_data['path'].decode('utf-8')
            original_data['operation'] = original_data['operation'].decode('utf-8')
            original_data['pid']       = original_data['pid'].decode('utf-8')
        except:
            break

        pid            = original_data['pid']
        file_name      = os.path.basename(original_data['path'])
        file_extension = os.path.splitext(original_data['path'])[1][1:]

        print('=' * 20)
        print('pid: ', pid)
        print('file_name: ', file_name)
        print('operation: ', original_data['operation'])
        print('original_data contents length: ', len(original_data['contents']))

        if original_data['operation'] == 'RENAME':
            prev_path           = original_data['prev_path'].decode('utf-8')
            prev_file_extension = os.path.splitext(prev_path)[1][1:]
            print('previous extension: ', prev_file_extension)

        # 1) header mismatch

        if original_data['operation'] == 'RENAME':
            if prev_file_extension in known_headers:
                if len(original_data['contents']) >= len(known_headers[prev_file_extension][1:]):
                    if not original_data['contents'].startswith(known_headers[prev_file_extension][1:]):
                        print('*** renamed file header mismatch ***')
                        system_alert_score += 4.0
        elif file_extension in known_headers:
            if len(original_data['contents']) >= len(known_headers[file_extension][1:]):
                if not original_data['contents'].startswith(known_headers[file_extension][1:]):
                    print('*** header mismatch ***')
                    system_alert_score += 2.0
        
        # 2) entropy analysis
        entropy = calculate_entropy(original_data['contents'])
        print('entropy: ', entropy)

        if original_data['operation'] == 'RENAME':
            if prev_file_extension in entropy_max:
                print('*** renamed file exceeds expected entropy max ***')
                system_alert_score += 4.0
        elif file_extension in entropy_max:
            if entropy > entropy_max[file_extension]:
                print('*** file exceeds expected entropy max ***')
                system_alert_score += 2.0
        
    print('')
    print('-' * 20)
    print('Total Files Analyzed: ', total_files)
    print('Total Alert Score: ', system_alert_score)
    if score_threshold < system_alert_score:
        print('***** Alert Score Exceeded Threshold *****')