def process_raw_data()

in app/python/process.py [0:0]


def process_raw_data(temp_datafile):
    """
    Process local file, producing aggregate data
    """
    logging.info("  process_raw_data: start processing data")

    aggregate = {}
    ignored_records = 0
    counted_records = 0

    with open(temp_datafile) as f:
        csv_data = csv.DictReader(f)
        data = [row for row in csv_data]

        logging.info(f"Processing {len(data)} records from {RAW_DATA_FILE}")

    # Process each row.
    for row in data:

        # Ignore any records with incomplete data
        process = True
        for facet in FACETS:
            if row[facet] == "" or row[facet] == "?":
                ignored_records += 1
                process = False

        if process:
            # Build aggregate identifier
            row_key = "/".join([row[f] for f in FACETS])

            # Build the base data structure on first interaction
            if row_key not in aggregate.keys():
                aggregate[row_key] = {"_counter": 0}
                for segment in SEGMENTS:
                    if segment not in aggregate[row_key].keys():
                        aggregate[row_key][segment] = 0

            # Record the relevant data
            for segment in SEGMENTS:
                if row[segment] == "true":
                    aggregate[row_key][segment] += 1

            # Increment counters
            aggregate[row_key]["_counter"] += 1
            counted_records += 1

    logging.info(
        f"  process_raw_data: processed {counted_records} records,"
        f" removed {ignored_records}."
    )
    return aggregate