in econml/grf/_criterion.pyx [0:0]
def __cinit__(self, SIZE_t n_outputs, SIZE_t n_relevant_outputs, SIZE_t n_features, SIZE_t n_y,
SIZE_t n_samples, SIZE_t max_node_samples, UINT32_t random_state):
""" Initialize parameters. See `LinearMomentGRFCriterion.__cinit__`.
"""
# Most initializations are handled by __cinit__ of RegressionCriterion
# which is always called in cython. We initialize the extras.
# If n_outputs is small, then we can invert the child jacobian matrix quickly and calculate
# exact children impurities and exact impurity improvements. Otherwise, we use the heterogeneity
# based calculations for min impurity decrease so as to avoid matrix inversion when evaluating
# a split, and we only use a jacobian re-weighted heterogeneity score, as a proxy for the impurity
# improvement to find the best split.
if self.n_outputs > 2:
self.proxy_children_impurity = True
else:
self.proxy_children_impurity = False
# Allocate accumulators. Make sure they are NULL, not uninitialized,
# before an exception can be raised (which triggers __dealloc__).
self.J_left = NULL
self.J_right = NULL
if self.n_outputs <= 2:
self.invJ_left = NULL
self.invJ_right = NULL
self.parameter_pre_left = NULL
self.parameter_pre_right = NULL
self.parameter_left = NULL
self.parameter_right = NULL
# Allocate memory for the proxy for y, which rho in the generalized random forest
# Since rho is node dependent it needs to be re-calculated and stored for each sample
# in the node for every node we are investigating
self.J_left = <double *> calloc(n_outputs * n_outputs, sizeof(double))
self.J_right = <double *> calloc(n_outputs * n_outputs, sizeof(double))
if self.n_outputs <= 2:
self.invJ_left = <double *> calloc(n_outputs * n_outputs, sizeof(double))
self.invJ_right = <double *> calloc(n_outputs * n_outputs, sizeof(double))
self.parameter_pre_left = <double *> calloc(n_outputs, sizeof(double))
self.parameter_pre_right = <double *> calloc(n_outputs, sizeof(double))
self.parameter_left = <double *> calloc(n_outputs, sizeof(double))
self.parameter_right = <double *> calloc(n_outputs, sizeof(double))
if (self.J_left == NULL or
self.J_right == NULL):
raise MemoryError()
if self.n_outputs <= 2 and (self.invJ_left == NULL or
self.invJ_right == NULL or
self.parameter_pre_left == NULL or
self.parameter_pre_right == NULL or
self.parameter_left == NULL or
self.parameter_right == NULL):
raise MemoryError()