def __process_ratings_users_rdf()

in src/graph_notebook/notebooks/03-Neptune-ML/02-SPARQL/neptune_ml_sparql_utils.py [0:0]


    def __process_ratings_users_rdf(self):
        # Create ratings vertices and add edges on both sides
        print('Processing Ratings to RDF')
        ratings_vertices = pd.read_csv(
            os.path.join(self.raw_directory, 'ml-100k/u.data'),
            sep='\t',
            encoding='ISO-8859-1',
            names=['~from', '~to', 'score:Int', 'timestamp']
        )
        ratings_vertices['~from'] = ratings_vertices['~from'].apply(lambda x: f'user_{x}')
        ratings_vertices['~to'] = ratings_vertices['~to'].apply(lambda x: f'movie_{x}')
        ratings_vertices['~id'] = ratings_vertices['~from'].str.cat(ratings_vertices['~to'], sep="_")
        ratings_vertices['~label'] = "rating"

        ratings_graph = ConjunctiveGraph()

        averages_graph = ConjunctiveGraph()

        for index, row in ratings_vertices.groupby('~to').mean(numeric_only=True).iterrows():
            score = int(round(row['score:Int']))
            averages_graph.add((
                self.ns_resource[index], self.ns_ontology.criticScore, Literal(score, datatype=XSD.integer),
                self.ns_ontology.Rating
            ))

        for index, row in ratings_vertices.iterrows():
            uri = urllib.parse.quote_plus(row['~id'])
            ratings_graph.add((
                self.ns_resource[uri], RDF.type, self.ns_ontology.Rating, self.ns_ontology.Rating
            ))
            ratings_graph.add((
                self.ns_resource[uri], self.ns_ontology.score,
                Literal(row['score:Int'], datatype=XSD.integer),
                self.ns_ontology.Rating
            ))
            ratings_graph.add((
                self.ns_resource[uri], self.ns_ontology.timestamp, Literal(row['timestamp']),
                self.ns_ontology.Rating
            ))
            ratings_graph.add((
                self.ns_resource[uri], self.ns_ontology.forMovie, self.ns_resource[row['~to']],
                self.ns_ontology.Rating
            ))
            ratings_graph.add((
                self.ns_resource[uri], self.ns_ontology.byUser, self.ns_resource[row['~from']],
                self.ns_ontology.Rating
            ))
            if row['score:Int'] > 3:
                ratings_graph.add((
                    self.ns_resource[row['~from']], self.ns_ontology.recommended, self.ns_resource[row['~to']],
                    self.ns_ontology.Rating
                ))
                ratings_graph.add((
                    self.ns_resource[row['~to']], self.ns_ontology.wasRecommendedBy, self.ns_resource[row['~from']],
                    self.ns_ontology.Rating
                ))

        ratings_rdf_file = os.path.join(self.formatted_directory, 'user_movie_ratings.nq')
        averages_graph_file = os.path.join(self.formatted_directory, 'critic_movie_scores.nq')

        ratings_graph.serialize(format='nquads', destination=ratings_rdf_file)
        averages_graph.serialize(format='nquads', destination=averages_graph_file)