in src/graph_notebook/notebooks/03-Neptune-ML/neptune_ml_utils.py [0:0]
def __process_ratings_users(self):
# Create ratings vertices and add edges on both sides
print('Processing Ratings', end='\r')
ratings_vertices = pd.read_csv(os.path.join(
self.raw_directory, 'ml-100k/u.data'), sep='\t', encoding='ISO-8859-1',
names=['~from', '~to', 'score:Int', 'timestamp'])
ratings_vertices['~from'] = ratings_vertices['~from'].apply(
lambda x: f'user_{x}')
ratings_vertices['~to'] = ratings_vertices['~to'].apply(
lambda x: f'movie_{x}')
rated_edges = ratings_vertices.copy(deep=True)
ratings_vertices['~id'] = ratings_vertices['~from'].str.cat(
ratings_vertices['~to'], sep=":")
ratings_vertices['~label'] = "rating"
dict = {}
edges = {}
for index, row in ratings_vertices.iterrows():
id_from = row['~from']
id_to = row['~to']
id_id = row['~id']
dict[index * 2] = {'~id': f"{id_from}-wrote-{id_id}", '~label': 'wrote',
'~from': id_from, '~to': id_id}
dict[index * 2 + 1] = {'~id': f"{id_id}-about-{id_to}", '~label': 'about',
'~from': id_id, '~to': id_to}
score = row['score:Int']
scale = ''
if score == 1:
scale = 'Hate'
elif score == 2:
scale = 'Dislike'
elif score == 3:
scale = 'Neutral'
elif score == 4:
scale = 'Like'
elif score == 5:
scale = 'Love'
edges[index] = {'~id': f"{id_from}-rated-{id_to}", '~label': 'rated',
'~from': id_from, '~to': id_to, 'score:Int': score, 'scale': scale}
rating_edges_df = pd.DataFrame.from_dict(dict, "index")
# Remove the from and to columns and write this out as a vertex now
ratings_vertices = ratings_vertices.drop(columns=['~from', '~to'])
ratings_vertices.to_csv(os.path.join(self.formatted_directory,
'ratings_vertices.csv'), index=False)
# Write out the rating vertex edges for wrote and about
rating_edges_df.to_csv(os.path.join(self.formatted_directory,
'ratings_vertex_edges.csv'), index=False)
# Write out the rated edges
rated_edges_df = pd.DataFrame.from_dict(edges, "index")
rated_edges_df.to_csv(os.path.join(self.formatted_directory,
'rated_edges.csv'), index=False)