def __process_ratings_users()

in src/graph_notebook/notebooks/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/neptune_ml_utils.py [0:0]


    def __process_ratings_users(self):
        # Create ratings vertices and add edges on both sides
        print('Processing Ratings', end='\r')
        ratings_vertices = pd.read_csv(os.path.join(
            self.raw_directory, 'ml-100k/u.data'), sep='\t', encoding='ISO-8859-1',
            names=['~from', '~to', 'score:Int', 'timestamp'])
        ratings_vertices['~from'] = ratings_vertices['~from'].apply(
            lambda x: f'user_{x}')
        ratings_vertices['~to'] = ratings_vertices['~to'].apply(
            lambda x: f'movie_{x}')
        rated_edges = ratings_vertices.copy(deep=True)

        ratings_vertices['~id'] = ratings_vertices['~from'].str.cat(
            ratings_vertices['~to'], sep=":")
        ratings_vertices['~label'] = "rating"
        dict = {}
        edges = {}
        for index, row in ratings_vertices.iterrows():
            id_from = row['~from']
            id_to = row['~to']
            id_id = row['~id']
            dict[index * 2] = {'~id': f"{id_from}-wrote-{id_id}", '~label': 'wrote',
                               '~from': id_from, '~to': id_id}
            dict[index * 2 + 1] = {'~id': f"{id_id}-about-{id_to}", '~label': 'about',
                                   '~from': id_id, '~to': id_to}
            score = row['score:Int']
            scale = ''
            if score == 1:
                scale = 'Hate'
            elif score == 2:
                scale = 'Dislike'
            elif score == 3:
                scale = 'Neutral'
            elif score == 4:
                scale = 'Like'
            elif score == 5:
                scale = 'Love'
            edges[index] = {'~id': f"{id_from}-rated-{id_to}", '~label': 'rated',
                            '~from': id_from, '~to': id_to, 'score:Int': score, 'scale': scale}
        rating_edges_df = pd.DataFrame.from_dict(dict, "index")

        # Remove the from and to columns and write this out as a vertex now
        ratings_vertices = ratings_vertices.drop(columns=['~from', '~to'])
        ratings_vertices.to_csv(os.path.join(self.formatted_directory,
                                             'ratings_vertices.csv'), index=False)
        # Write out the rating vertex edges for wrote and about
        rating_edges_df.to_csv(os.path.join(self.formatted_directory,
                                            'ratings_vertex_edges.csv'), index=False)
        # Write out the rated edges
        rated_edges_df = pd.DataFrame.from_dict(edges, "index")
        rated_edges_df.to_csv(os.path.join(self.formatted_directory,
                                           'rated_edges.csv'), index=False)