in app/python/process.py [0:0]
def process_raw_data(temp_datafile):
"""
Process local file, producing aggregate data
"""
logging.info(" process_raw_data: start processing data")
aggregate = {}
ignored_records = 0
counted_records = 0
with open(temp_datafile) as f:
csv_data = csv.DictReader(f)
data = [row for row in csv_data]
logging.info(f"Processing {len(data)} records from {RAW_DATA_FILE}")
# Process each row.
for row in data:
# Ignore any records with incomplete data
process = True
for facet in FACETS:
if row[facet] == "" or row[facet] == "?":
ignored_records += 1
process = False
if process:
# Build aggregate identifier
row_key = "/".join([row[f] for f in FACETS])
# Build the base data structure on first interaction
if row_key not in aggregate.keys():
aggregate[row_key] = {"_counter": 0}
for segment in SEGMENTS:
if segment not in aggregate[row_key].keys():
aggregate[row_key][segment] = 0
# Record the relevant data
for segment in SEGMENTS:
if row[segment] == "true":
aggregate[row_key][segment] += 1
# Increment counters
aggregate[row_key]["_counter"] += 1
counted_records += 1
logging.info(
f" process_raw_data: processed {counted_records} records,"
f" removed {ignored_records}."
)
return aggregate