def write_csv()

in mozetl/utils.py [0:0]


def write_csv(dataframe, path, header=True):
    """Write a dataframe to local disk.

    Disclaimer: Do not write csv files larger than driver memory. This
    is ~15GB for ec2 c3.xlarge (due to caching overhead).
    """

    # NOTE: Before spark 2.1, toLocalIterator will timeout on some dataframes
    # because rdd materialization can take a long time. Instead of using
    # an iterator over all partitions, collect everything into driver memory.
    logger.info("Writing {} rows to {}".format(dataframe.count(), path))

    with open(path, "wb") if six.PY2 else open(path, "w", newline="") as fout:
        writer = csv.writer(fout)

        if header:
            writer.writerow(dataframe.columns)

        for row in dataframe.collect():
            row = [text_type(s).encode("utf-8") for s in row] if six.PY2 else row
            writer.writerow(row)