def to_json()

in nyc_taxis/_tools/parse.py [0:0]


def to_json(f):
  fields = []
  for field in f.readline().strip().split(','):
    field = to_underscore(field)
    if field.startswith('tpep_') or field.startswith('lpep_'):
      field = field[5:]
    elif field == 'ratecode_id':
      field = 'rate_code_id'
    fields.append(field)
  for line in f.readlines():
    cols = line.strip().split(',')
    if len(cols) < len(fields):
      raise Exception("Cannot parse '%s': number of fields does not match '%s'" %(line, ",".join(fields)))

    try:
      d = {}
      for i in range(len(fields)):
        field = fields[i]
        value = cols[i]
        if value != '': # the way csv says the field does not exist
          d[field] = value

      to_geo_point(d, 'pickup')
      to_geo_point(d, 'dropoff')

      for (k, v) in d.items():
        if k not in types:
          raise Exception("Unknown field '%s'" %k)
        t = types[k]
        try:
          if t == 'integer':
            d[k] = int(v)
          elif t == 'float':
            d[k] = float(v)
        except Exception as cause:
          raise Exception("Cannot parse (%s,%s)" %(k, v)) from cause

      print(json.dumps(d))
    except KeyboardInterrupt:
      break
    except Exception as e:
      print("Skipping malformed entry '%s' because of %s" %(line, str(e)), file=sys.stderr)