in tensorflow_datasets/structured/movielens.py [0:0]
def _info(self) -> tfds.core.DatasetInfo:
"""Returns DatasetInfo according to self.builder_config."""
movie_features_dict = {
'movie_id':
tf.string,
'movie_title':
tf.string,
'movie_genres':
tfds.features.Sequence(
tfds.features.ClassLabel(names=[
'Action',
'Adventure',
'Animation',
'Children',
'Comedy',
'Crime',
'Documentary',
'Drama',
'Fantasy',
'Film-Noir',
'Horror',
'IMAX',
'Musical',
'Mystery',
'Romance',
'Sci-Fi',
'Thriller',
'Unknown',
'War',
'Western',
'(no genres listed)',
]),),
}
rating_features_dict = {
'user_id': tf.string,
'user_rating': tf.float32,
# Using int64 since tfds currently does not support float64.
'timestamp': tf.int64,
}
demographic_features_dict = {
'user_gender':
tf.bool,
'bucketized_user_age':
tf.float32,
'user_occupation_label':
tfds.features.ClassLabel(names=[
'academic/educator',
'artist',
'clerical/admin',
'customer service',
'doctor/health care',
'entertainment',
'executive/managerial',
'farmer',
'homemaker',
'lawyer',
'librarian',
'other/not specified',
'programmer',
'retired',
'sales/marketing',
'scientist',
'self-employed',
'student',
'technician/engineer',
'tradesman/craftsman',
'unemployed',
'writer',
]),
'user_occupation_text':
tf.string,
'user_zip_code':
tf.string,
}
features_dict = {}
if self.builder_config.table_option == 'movies':
features_dict.update(movie_features_dict)
# For the other cases, self.builder_config.table_option == 'ratings'.
# Older versions of MovieLens (1m, 100k) have demographic features.
elif self.builder_config.format_version == '1m':
features_dict.update(movie_features_dict)
features_dict.update(rating_features_dict)
features_dict.update(demographic_features_dict)
elif self.builder_config.format_version == '100k':
# Only the 100k dataset contains exact user ages. The 1m dataset
# contains only bucketized age values.
features_dict.update(movie_features_dict)
features_dict.update(rating_features_dict)
features_dict.update(demographic_features_dict)
features_dict.update(raw_user_age=tf.float32)
else:
features_dict.update(movie_features_dict)
features_dict.update(rating_features_dict)
return tfds.core.DatasetInfo(
builder=self,
description=_DESCRIPTION,
features=tfds.features.FeaturesDict(features_dict),
supervised_keys=None,
homepage='https://grouplens.org/datasets/movielens/',
citation=_CITATION,
)