in nyc_taxis/_tools/parse.py [0:0]
def to_json(f):
fields = []
for field in f.readline().strip().split(","):
field = to_underscore(field)
if field.startswith("tpep_") or field.startswith("lpep_"):
field = field[5:]
elif field == "ratecode_id":
field = "rate_code_id"
fields.append(field)
for line in f.readlines():
cols = line.strip().split(",")
if len(cols) < len(fields):
raise Exception("Cannot parse '%s': number of fields does not match '%s'" % (line, ",".join(fields)))
try:
d = {}
for i in range(len(fields)):
field = fields[i]
value = cols[i]
if value != "": # the way csv says the field does not exist
d[field] = value
to_geo_point(d, "pickup")
to_geo_point(d, "dropoff")
for k, v in d.items():
if k not in types:
raise Exception("Unknown field '%s'" % k)
t = types[k]
try:
if t == "integer":
d[k] = int(v)
elif t == "float":
d[k] = float(v)
except Exception as cause:
raise Exception("Cannot parse (%s,%s)" % (k, v)) from cause
print(json.dumps(d))
except KeyboardInterrupt:
break
except Exception as e:
print(
"Skipping malformed entry '%s' because of %s" % (line, str(e)),
file=sys.stderr,
)