in example_zoo/tensorflow/models/ncf_main/official/datasets/movielens.py [0:0]
def _transform_csv(input_path, output_path, names, skip_first, separator=","):
"""Transform csv to a regularized format.
Args:
input_path: The path of the raw csv.
output_path: The path of the cleaned csv.
names: The csv column names.
skip_first: Boolean of whether to skip the first line of the raw csv.
separator: Character used to separate fields in the raw csv.
"""
if six.PY2:
names = [n.decode("utf-8") for n in names]
with tf.gfile.Open(output_path, "wb") as f_out, \
tf.gfile.Open(input_path, "rb") as f_in:
# Write column names to the csv.
f_out.write(",".join(names).encode("utf-8"))
f_out.write(b"\n")
for i, line in enumerate(f_in):
if i == 0 and skip_first:
continue # ignore existing labels in the csv
line = line.decode("utf-8", errors="ignore")
fields = line.split(separator)
if separator != ",":
fields = ['"{}"'.format(field) if "," in field else field
for field in fields]
f_out.write(",".join(fields).encode("utf-8"))