in nyc_taxis/_tools/parse.py [0:0]
def to_json(f):
fields = []
for field in f.readline().strip().split(','):
field = to_underscore(field)
if field.startswith('tpep_') or field.startswith('lpep_'):
field = field[5:]
elif field == 'ratecode_id':
field = 'rate_code_id'
fields.append(field)
for line in f.readlines():
cols = line.strip().split(',')
if len(cols) < len(fields):
raise Exception("Cannot parse '%s': number of fields does not match '%s'" %(line, ",".join(fields)))
try:
d = {}
for i in range(len(fields)):
field = fields[i]
value = cols[i]
if value != '': # the way csv says the field does not exist
d[field] = value
to_geo_point(d, 'pickup')
to_geo_point(d, 'dropoff')
for (k, v) in d.items():
if k not in types:
raise Exception("Unknown field '%s'" %k)
t = types[k]
try:
if t == 'integer':
d[k] = int(v)
elif t == 'float':
d[k] = float(v)
except Exception as cause:
raise Exception("Cannot parse (%s,%s)" %(k, v)) from cause
print(json.dumps(d))
except KeyboardInterrupt:
break
except Exception as e:
print("Skipping malformed entry '%s' because of %s" %(line, str(e)), file=sys.stderr)