in tensorflow_examples/lite/model_maker/core/data_util/recommendation_testutil.py [0:0]
def _generate_fake_data(data_dir):
"""Generates fake data to files.
It generates 3 files.
- movies.dat: movies data, with format per line:
MovieID::Title::Genres.
- users.dat: users data, with format per line:
UserID::Gender::Age::Occupation::Zip-code.
- ratings.dat: movie ratings by users, with format per line:
UserID::MovieID::Rating::Timestamp
It aligns with movielens dataset. IDs start from 1, and 0 is reserved for OOV.
Args:
data_dir: str, dir name to generate dataset.
"""
if not tf.io.gfile.exists(data_dir):
tf.io.gfile.makedirs(data_dir)
# Movies:
# MovieID::Title::Genres
movies = ['{i}::title{i}::genre1|genre2'.format(i=i) for i in range(1, 101)]
movies.append('999::title999::genere10') # Add a movie with a larger id.
movie_file = os.path.join(data_dir, 'movies.dat')
_write_file_by_lines(movies, movie_file)
# Users:
# UserID::Gender::Age::Occupation::Zip-code
users = ['{user}::F::0::0::00000'.format(user=user) for user in range(1, 51)]
user_file = os.path.join(data_dir, 'users.dat')
_write_file_by_lines(users, user_file)
# Ratings:
# UserID::MovieID::Rating::Timestamp
ratings = []
for user in range(1, 51):
ratings += [
'{user}::{movie}::5::{timestamp}'.format(
user=user, movie=movie, timestamp=978000000 + 1)
for movie in range(1, 101)
]
rating_file = os.path.join(data_dir, 'ratings.dat')
_write_file_by_lines(ratings, rating_file)