def main()

in recommended-item-search/data_preparation.py [0:0]


def main(_):
  tf.logging.info('Download {} ...'.format(FLAGS.filename))
  movies, ratings = load_movielens_data()
  movieid_to_index = dict(zip(movies['movie_id'].values, np.arange(len(movies))))
  movies['movie_id2'] = movies['movie_id'].apply(lambda x: movieid_to_index[x])
  ratings['movie_id2'] = ratings['movie_id'].apply(lambda x: movieid_to_index[x])
  
  tf.logging.info('Converting dataset ...')
  ratings = ratings[ratings['rating'] > FLAGS.rating_threshold]
  rawdata = (
      ratings[['user_id', 'movie_id2']]
      .groupby('user_id', as_index=False).aggregate(lambda x: list(x)))
  
  if tf.gfile.Exists(FLAGS.export_dir):
    tf.logging.info('Remove {} ...'.format(FLAGS.export_dir))
    tf.gfile.DeleteRecursively(FLAGS.export_dir)
  tf.gfile.MakeDirs(FLAGS.export_dir)
  
  tf.logging.info('Exporting TFRecord to {}'.format(FLAGS.export_dir))
  train_inputs, eval_inputs = split_dataframe(rawdata)
  make_tfrecord_files(dataframe=train_inputs, file_type='train', num_files=8)
  make_tfrecord_files(dataframe=eval_inputs, file_type='eval', num_files=1)
  
  tf.logging.info('Exporting metadata to {}'.format(FLAGS.export_dir))
  with tempfile.TemporaryDirectory() as tmp_dir:
    filename = 'metadata.pickle'
    metadata = {'N': len(movies), 'movies': movies, 'rawdata': rawdata}
    old_path = os.path.join(tmp_dir, filename)
    new_path = os.path.join(FLAGS.export_dir, filename)
    with open(old_path, 'wb') as f:
      pickle.dump(metadata, f)
    tf.gfile.Copy(old_path, new_path, overwrite=True)
    
  tf.logging.info('Exporting an index file for TensorBoard projector')
  with tempfile.TemporaryDirectory() as tmp_dir:
    filename = 'projector_index.tsv'
    old_path = os.path.join(tmp_dir, filename)
    new_path = os.path.join(FLAGS.export_dir, filename)
    movies[['movie_id2', 'title']].to_csv(
        old_path, header=True, index=False, sep='\t')
    tf.gfile.Copy(old_path, new_path, overwrite=True)