flink-ml-python/pyflink/ml/recommendation/swing.py (138 lines of code) (raw):

################################################################################ # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ################################################################################ import typing from pyflink.ml.common.param import HasOutputCol, HasSeed from pyflink.ml.param import Param, StringParam, IntParam, FloatParam, ParamValidators from pyflink.ml.recommendation.common import JavaRecommendationAlgoOperator from pyflink.ml.wrapper import JavaWithParams class _SwingParams( JavaWithParams, HasOutputCol, HasSeed ): """ Params for :class:`Swing`. """ USER_COL: Param[str] = StringParam( "user_col", "User column name.", "user", ParamValidators.not_null()) ITEM_COL: Param[str] = StringParam( "item_col", "Item column name.", "item", ParamValidators.not_null()) K: Param[int] = IntParam( "k", "The max number of similar items to output for each item.", 100, ParamValidators.gt(0)) MAX_USER_NUM_PER_ITEM: Param[int] = IntParam( "max_user_num_per_item", "The max number of users(purchasers) for each item. If the number of users " + "is greater than this value, then only maxUserNumPerItem users will " + "be sampled and used in the computation of similarity between two items.", 1000, ParamValidators.gt(0)) MIN_USER_BEHAVIOR: Param[int] = IntParam( "min_user_behavior", "The min number of items that a user purchases. If the items purchased by a user is " + "smaller than this value, then this user is filtered out and will not be used in the " + "computation.", 10, ParamValidators.gt(0)) MAX_USER_BEHAVIOR: Param[int] = IntParam( "max_user_behavior", "The max number of items for a user purchases. If the items purchased by a user is " + "greater than this value, then this user is filtered out and will not be used in the " + "computation.", 1000, ParamValidators.gt(0)) ALPHA1: Param[int] = IntParam( "alpha1", "Smooth factor for number of users that have purchased one item. The higher alpha1 is," + " the less purchasing behavior contributes to the similarity score.", 15, ParamValidators.gt_eq(0)) ALPHA2: Param[int] = IntParam( "alpha2", "Smooth factor for number of users that have purchased the two target items. The higher " + "alpha2 is, the less purchasing behavior contributes to the similarity score.", 0, ParamValidators.gt_eq(0)) BETA: Param[float] = FloatParam( "beta", "Decay factor for number of users that have purchased one item. The higher beta is, the " + "less purchasing behavior contributes to the similarity score.", 0.3, ParamValidators.gt_eq(0)) def __init__(self, java_params): super(_SwingParams, self).__init__(java_params) def set_user_col(self, value: str): return typing.cast(_SwingParams, self.set(self.USER_COL, value)) def get_user_col(self) -> str: return self.get(self.USER_COL) def set_item_col(self, value: str): return typing.cast(_SwingParams, self.set(self.ITEM_COL, value)) def get_item_col(self) -> str: return self.get(self.ITEM_COL) def set_k(self, value: int): return typing.cast(_SwingParams, self.set(self.K, value)) def get_k(self) -> int: return self.get(self.K) def set_max_user_num_per_item(self, value: int): return typing.cast(_SwingParams, self.set(self.MAX_USER_NUM_PER_ITEM, value)) def get_max_user_num_per_item(self) -> int: return self.get(self.MAX_USER_NUM_PER_ITEM) def set_min_user_behavior(self, value: int): return typing.cast(_SwingParams, self.set(self.MIN_USER_BEHAVIOR, value)) def get_min_user_behavior(self) -> int: return self.get(self.MIN_USER_BEHAVIOR) def set_max_user_behavior(self, value: int): return typing.cast(_SwingParams, self.set(self.MAX_USER_BEHAVIOR, value)) def get_max_user_behavior(self) -> int: return self.get(self.MAX_USER_BEHAVIOR) def set_alpha1(self, value: int): return typing.cast(_SwingParams, self.set(self.ALPHA1, value)) def get_alpha1(self) -> int: return self.get(self.ALPHA1) def set_alpha2(self, value: int): return typing.cast(_SwingParams, self.set(self.ALPHA2, value)) def get_alpha2(self) -> int: return self.get(self.ALPHA2) def set_beta(self, value: float): return typing.cast(_SwingParams, self.set(self.BETA, value)) def get_beta(self) -> float: return self.get(self.BETA) @property def user_col(self) -> str: return self.get_user_col() @property def item_col(self) -> str: return self.get_item_col() @property def k(self) -> int: return self.get_k() @property def max_user_num_per_item(self) -> int: return self.get_max_user_num_per_item() @property def min_user_behavior(self) -> int: return self.get_min_user_behavior() @property def max_user_behavior(self) -> int: return self.get_max_user_behavior() @property def alpha1(self) -> int: return self.get_alpha1() @property def alpha2(self) -> float: return self.get_alpha2() @property def beta(self) -> float: return self.get_beta() class Swing(JavaRecommendationAlgoOperator, _SwingParams): """ An AlgoOperator which implements the Swing algorithm. Swing is an item recall algorithm. The topology of user-item graph usually can be described as user-item-user or item-user-item, which are like 'swing'. For example, if both user <em>u</em> and user <em>v</em> have purchased the same commodity <em>i</em>, they will form a relationship diagram similar to a swing. If <em>u</em> and <em>v</em> have purchased commodity <em>j</em> in addition to <em>i</em>, it is supposed <em>i</em> and <em>j</em> are similar. The similarity between items in Swing is defined as $$ w_{(i,j)}=\\sum_{u\\in U_i\\cap U_j}\\sum_{v\\in U_i\\cap U_j}{\\frac{1}{{(|I_u|+\\alpha_1)}^\\beta}}*{\\frac{1}{{(|I_v|+\\alpha_1)}^\\beta}}*{\\frac{1}{\\alpha_2+|I_u\\cap I_v|}} $$ Note that alpha1 and alpha2 could be zero here. If one of $$|I_u|, |I_v| and |I_u\\cap I_v|$$ is zero, then the similarity of <em>i</em> and <em>j</em> is zero. See "<a href="https://arxiv.org/pdf/2010.05525.pdf">Large Scale Product Graph Construction for Recommendation in E-commerce</a>" by Xiaoyong Yang, Yadong Zhu and Yi Zhang. """ def __init__(self, java_algo_operator=None): super(Swing, self).__init__(java_algo_operator) @classmethod def _java_algo_operator_package_name(cls) -> str: return "swing" @classmethod def _java_algo_operator_class_name(cls) -> str: return "Swing"