in python-package/lightgbm/basic.py [0:0]
def _lazy_init(self, data, label=None, reference=None,
weight=None, group=None, init_score=None, predictor=None,
feature_name='auto', categorical_feature='auto', params=None):
if data is None:
self.handle = None
return self
if reference is not None:
self.pandas_categorical = reference.pandas_categorical
categorical_feature = reference.categorical_feature
data, feature_name, categorical_feature, self.pandas_categorical = _data_from_pandas(data,
feature_name,
categorical_feature,
self.pandas_categorical)
label = _label_from_pandas(label)
# process for args
params = {} if params is None else params
args_names = (getattr(self.__class__, '_lazy_init')
.__code__
.co_varnames[:getattr(self.__class__, '_lazy_init').__code__.co_argcount])
for key in params.keys():
if key in args_names:
_log_warning(f'{key} keyword has been found in `params` and will be ignored.\n'
f'Please use {key} argument of the Dataset constructor to pass this parameter.')
# get categorical features
if categorical_feature is not None:
categorical_indices = set()
feature_dict = {}
if feature_name is not None:
feature_dict = {name: i for i, name in enumerate(feature_name)}
for name in categorical_feature:
if isinstance(name, str) and name in feature_dict:
categorical_indices.add(feature_dict[name])
elif isinstance(name, int):
categorical_indices.add(name)
else:
raise TypeError(f"Wrong type({type(name).__name__}) or unknown name({name}) in categorical_feature")
if categorical_indices:
for cat_alias in _ConfigAliases.get("categorical_feature"):
if cat_alias in params:
# If the params[cat_alias] is equal to categorical_indices, do not report the warning.
if not(isinstance(params[cat_alias], list) and set(params[cat_alias]) == categorical_indices):
_log_warning(f'{cat_alias} in param dict is overridden.')
params.pop(cat_alias, None)
params['categorical_column'] = sorted(categorical_indices)
params_str = param_dict_to_str(params)
self.params = params
# process for reference dataset
ref_dataset = None
if isinstance(reference, Dataset):
ref_dataset = reference.construct().handle
elif reference is not None:
raise TypeError('Reference dataset should be None or dataset instance')
# start construct data
if isinstance(data, (str, Path)):
self.handle = ctypes.c_void_p()
_safe_call(_LIB.LGBM_DatasetCreateFromFile(
c_str(str(data)),
c_str(params_str),
ref_dataset,
ctypes.byref(self.handle)))
elif isinstance(data, scipy.sparse.csr_matrix):
self.__init_from_csr(data, params_str, ref_dataset)
elif isinstance(data, scipy.sparse.csc_matrix):
self.__init_from_csc(data, params_str, ref_dataset)
elif isinstance(data, np.ndarray):
self.__init_from_np2d(data, params_str, ref_dataset)
elif isinstance(data, list) and len(data) > 0:
if all(isinstance(x, np.ndarray) for x in data):
self.__init_from_list_np2d(data, params_str, ref_dataset)
elif all(isinstance(x, Sequence) for x in data):
self.__init_from_seqs(data, ref_dataset)
else:
raise TypeError('Data list can only be of ndarray or Sequence')
elif isinstance(data, Sequence):
self.__init_from_seqs([data], ref_dataset)
elif isinstance(data, dt_DataTable):
self.__init_from_np2d(data.to_numpy(), params_str, ref_dataset)
else:
try:
csr = scipy.sparse.csr_matrix(data)
self.__init_from_csr(csr, params_str, ref_dataset)
except BaseException:
raise TypeError(f'Cannot initialize Dataset from {type(data).__name__}')
if label is not None:
self.set_label(label)
if self.get_label() is None:
raise ValueError("Label should not be None")
if weight is not None:
self.set_weight(weight)
if group is not None:
self.set_group(group)
if isinstance(predictor, _InnerPredictor):
if self._predictor is None and init_score is not None:
_log_warning("The init_score will be overridden by the prediction of init_model.")
self._set_init_score_by_predictor(predictor, data)
elif init_score is not None:
self.set_init_score(init_score)
elif predictor is not None:
raise TypeError(f'Wrong predictor type {type(predictor).__name__}')
# set feature names
return self.set_feature_name(feature_name)