def _generate_fake_data()

in tensorflow_examples/lite/model_maker/core/data_util/recommendation_testutil.py [0:0]


def _generate_fake_data(data_dir):
  """Generates fake data to files.

  It generates 3 files.
  - movies.dat: movies data, with format per line:
               MovieID::Title::Genres.
  - users.dat: users data, with format per line:
               UserID::Gender::Age::Occupation::Zip-code.
  - ratings.dat: movie ratings by users, with format per line:
               UserID::MovieID::Rating::Timestamp
  It aligns with movielens dataset. IDs start from 1, and 0 is reserved for OOV.

  Args:
    data_dir: str, dir name to generate dataset.
  """
  if not tf.io.gfile.exists(data_dir):
    tf.io.gfile.makedirs(data_dir)

  # Movies:
  # MovieID::Title::Genres
  movies = ['{i}::title{i}::genre1|genre2'.format(i=i) for i in range(1, 101)]
  movies.append('999::title999::genere10')  # Add a movie with a larger id.
  movie_file = os.path.join(data_dir, 'movies.dat')
  _write_file_by_lines(movies, movie_file)

  # Users:
  # UserID::Gender::Age::Occupation::Zip-code
  users = ['{user}::F::0::0::00000'.format(user=user) for user in range(1, 51)]
  user_file = os.path.join(data_dir, 'users.dat')
  _write_file_by_lines(users, user_file)

  # Ratings:
  # UserID::MovieID::Rating::Timestamp
  ratings = []
  for user in range(1, 51):
    ratings += [
        '{user}::{movie}::5::{timestamp}'.format(
            user=user, movie=movie, timestamp=978000000 + 1)
        for movie in range(1, 101)
    ]
  rating_file = os.path.join(data_dir, 'ratings.dat')
  _write_file_by_lines(ratings, rating_file)