in recommended-item-search/data_preparation.py [0:0]
def main(_):
tf.logging.info('Download {} ...'.format(FLAGS.filename))
movies, ratings = load_movielens_data()
movieid_to_index = dict(zip(movies['movie_id'].values, np.arange(len(movies))))
movies['movie_id2'] = movies['movie_id'].apply(lambda x: movieid_to_index[x])
ratings['movie_id2'] = ratings['movie_id'].apply(lambda x: movieid_to_index[x])
tf.logging.info('Converting dataset ...')
ratings = ratings[ratings['rating'] > FLAGS.rating_threshold]
rawdata = (
ratings[['user_id', 'movie_id2']]
.groupby('user_id', as_index=False).aggregate(lambda x: list(x)))
if tf.gfile.Exists(FLAGS.export_dir):
tf.logging.info('Remove {} ...'.format(FLAGS.export_dir))
tf.gfile.DeleteRecursively(FLAGS.export_dir)
tf.gfile.MakeDirs(FLAGS.export_dir)
tf.logging.info('Exporting TFRecord to {}'.format(FLAGS.export_dir))
train_inputs, eval_inputs = split_dataframe(rawdata)
make_tfrecord_files(dataframe=train_inputs, file_type='train', num_files=8)
make_tfrecord_files(dataframe=eval_inputs, file_type='eval', num_files=1)
tf.logging.info('Exporting metadata to {}'.format(FLAGS.export_dir))
with tempfile.TemporaryDirectory() as tmp_dir:
filename = 'metadata.pickle'
metadata = {'N': len(movies), 'movies': movies, 'rawdata': rawdata}
old_path = os.path.join(tmp_dir, filename)
new_path = os.path.join(FLAGS.export_dir, filename)
with open(old_path, 'wb') as f:
pickle.dump(metadata, f)
tf.gfile.Copy(old_path, new_path, overwrite=True)
tf.logging.info('Exporting an index file for TensorBoard projector')
with tempfile.TemporaryDirectory() as tmp_dir:
filename = 'projector_index.tsv'
old_path = os.path.join(tmp_dir, filename)
new_path = os.path.join(FLAGS.export_dir, filename)
movies[['movie_id2', 'title']].to_csv(
old_path, header=True, index=False, sep='\t')
tf.gfile.Copy(old_path, new_path, overwrite=True)