def get_parsed_data_from_file()

in src/read_log_file.py [0:0]


def get_parsed_data_from_file(logfile, ignore_crashes = False):
    regex_capture_string, column_names, data_types = get_parsing_groups()    
       
    table = __manyMatch_LineSearch(regex_capture_string, logfile)
    
    # Construct a dictionary to hold column names, and associated data
    table_groups = {}

    
    group_number = 0
    for column, data_type in zip(column_names, data_types):
        if column:
            if column not in table_groups:
                # Create a new column, with the associated indicies of the data & datatype
                table_groups[column] = [group_number], [data_type]
            else:
                # Add to the column of associated indicies, and datatypes
                table_groups[column][0].append(group_number)
                table_groups[column][1].append(data_type)

            group_number += 1 # Not done in for loop, because if not column, then index doesnt changeß
    
    # For each unique column name, select from the table the non-zero values for the associated
    # column(s), and return a list in that correct data type. Then, update the dictionary's value
    # to be that updated list. If no value, 'None' lives in the row
    for column in table_groups:  
        table_groups[column] = __create_column(table, # data
                                               table_groups[column][1],  # DATATYPES
                                               table_groups[column][0])  # table indicies

    ### Special Case ###
    # Updates the eventtype column to properly put "Safepoint" at the eventtype, rather than None
    table_groups["EventType"] = set_safepoints_eventype(
                                table_groups["EventType"], 
                                table_groups["SafepointName"], 
                                table_groups["TimeToStopApplication_seconds"])
    df = pd.DataFrame(table_groups)
    
    ## Clean data, apply resrictions as needed **
    df.replace({np.nan: None}, inplace= True)    
    if ignore_crashes:
        if not assert_no_timing_errors(df):
            df = fix_timing_errors(df)


    return df