hello I am getting this error when trying to perfo...
# mlforecast
f
hello I am getting this error when trying to perform crossvalidation but the dataframe I am providing isnt empty
Copy code
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[112], line 2
      1 # Hago el crossvalidation para los modelos de ML con los mismos parametros
----> 2 crossvalidation_ml_colombia = ml2.cross_validation(
      3     df=colombia_df,
      4     h=6,
      5     step_size = 12,
      6     n_windows = 3)
      7 #Asigno unique_id como el indice del dataframe
      8 mlcrossvalidation_df.set_index('unique_id', inplace=True)

File ~\.conda\envs\Merck\lib\site-packages\mlforecast\utils.py:164, in old_kw_to_pos.<locals>.decorator.<locals>.inner(*args, **kwargs)
    162                 new_args.append(kwargs.pop(arg_names[i]))
    163             new_args.append(kwargs.pop(old_name))
--> 164 return f(*new_args, **kwargs)

File ~\.conda\envs\Merck\lib\site-packages\mlforecast\forecast.py:741, in MLForecast.cross_validation(self, df, n_windows, h, id_col, time_col, target_col, step_size, static_features, dropna, keep_last_n, refit, max_horizon, before_predict_callback, after_predict_callback, prediction_intervals, level, input_size, fitted, data, window_size)
    739 should_fit = i_window == 0 or (refit > 0 and i_window % refit == 0)
    740 if should_fit:
--> 741     self.fit(
    742         train,
    743         id_col=id_col,
    744         time_col=time_col,
    745         target_col=target_col,
    746         static_features=static_features,
    747         dropna=dropna,
    748         keep_last_n=keep_last_n,
    749         max_horizon=max_horizon,
    750         prediction_intervals=prediction_intervals,
    751         fitted=fitted,
    752     )
    753     self.cv_models_.append(self.models_)
    754     if fitted:

File ~\.conda\envs\Merck\lib\site-packages\mlforecast\utils.py:164, in old_kw_to_pos.<locals>.decorator.<locals>.inner(*args, **kwargs)
    162                 new_args.append(kwargs.pop(arg_names[i]))
    163             new_args.append(kwargs.pop(old_name))
--> 164 return f(*new_args, **kwargs)

File ~\.conda\envs\Merck\lib\site-packages\mlforecast\forecast.py:462, in MLForecast.fit(self, df, id_col, time_col, target_col, static_features, dropna, keep_last_n, max_horizon, prediction_intervals, fitted, data)
    450 X_with_info, y = self.preprocess(
    451     df=df,
    452     id_col=id_col,
   (...)
    459     return_X_y=True,
    460 )
    461 X = X_with_info[self.ts.features_order_]
--> 462 self.fit_models(X, y)
    463 if fitted:
    464     fitted_values = self._compute_fitted_values(
    465         X_with_info=X_with_info,
    466         y=y,
   (...)
    470         max_horizon=max_horizon,
    471     )

File ~\.conda\envs\Merck\lib\site-packages\mlforecast\forecast.py:278, in MLForecast.fit_models(self, X, y)
    274             self.models_[name].append(
    275                 clone(model).fit(X.loc[keep], y[keep, col])
    276             )
    277     else:
--> 278         self.models_[name] = clone(model).fit(X, y)
    279 return self

File ~\.conda\envs\Merck\lib\site-packages\lightgbm\sklearn.py:895, in LGBMRegressor.fit(self, X, y, sample_weight, init_score, eval_set, eval_names, eval_sample_weight, eval_init_score, eval_metric, early_stopping_rounds, verbose, feature_name, categorical_feature, callbacks, init_model)
    888 def fit(self, X, y,
    889         sample_weight=None, init_score=None,
    890         eval_set=None, eval_names=None, eval_sample_weight=None,
    891         eval_init_score=None, eval_metric=None, early_stopping_rounds=None,
    892         verbose='warn', feature_name='auto', categorical_feature='auto',
    893         callbacks=None, init_model=None):
    894     """Docstring is inherited from the LGBMModel."""
--> 895     super().fit(X, y, sample_weight=sample_weight, init_score=init_score,
    896                 eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight,
    897                 eval_init_score=eval_init_score, eval_metric=eval_metric,
    898                 early_stopping_rounds=early_stopping_rounds, verbose=verbose, feature_name=feature_name,
    899                 categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model)
    900     return self

File ~\.conda\envs\Merck\lib\site-packages\lightgbm\sklearn.py:748, in LGBMModel.fit(self, X, y, sample_weight, init_score, group, eval_set, eval_names, eval_sample_weight, eval_class_weight, eval_init_score, eval_group, eval_metric, early_stopping_rounds, verbose, feature_name, categorical_feature, callbacks, init_model)
    745 evals_result = {}
    746 callbacks.append(record_evaluation(evals_result))
--> 748 self._Booster = train(
    749     params=params,
    750     train_set=train_set,
    751     num_boost_round=self.n_estimators,
    752     valid_sets=valid_sets,
    753     valid_names=eval_names,
    754     fobj=self._fobj,
    755     feval=eval_metrics_callable,
    756     init_model=init_model,
    757     feature_name=feature_name,
    758     callbacks=callbacks
    759 )
    761 if evals_result:
    762     self._evals_result = evals_result

File ~\.conda\envs\Merck\lib\site-packages\lightgbm\engine.py:271, in train(params, train_set, num_boost_round, valid_sets, valid_names, fobj, feval, init_model, feature_name, categorical_feature, early_stopping_rounds, evals_result, verbose_eval, learning_rates, keep_training_booster, callbacks)
    269 # construct booster
    270 try:
--> 271     booster = Booster(params=params, train_set=train_set)
    272     if is_valid_contain_train:
    273         booster.set_train_data_name(train_data_name)

File ~\.conda\envs\Merck\lib\site-packages\lightgbm\basic.py:2605, in Booster.__init__(self, params, train_set, model_file, model_str, silent)
   2598     self.set_network(
   2599         machines=machines,
   2600         local_listen_port=params["local_listen_port"],
   2601         listen_time_out=params.get("time_out", 120),
   2602         num_machines=params["num_machines"]
   2603     )
   2604 # construct booster object
-> 2605 train_set.construct()
   2606 # copy the parameters from train_set
   2607 params.update(train_set.get_params())

File ~\.conda\envs\Merck\lib\site-packages\lightgbm\basic.py:1815, in Dataset.construct(self)
   1812             self._set_init_score_by_predictor(self._predictor, self.data, used_indices)
   1813 else:
   1814     # create train
-> 1815     self._lazy_init(self.data, label=self.label,
   1816                     weight=self.weight, group=self.group,
   1817                     init_score=self.init_score, predictor=self._predictor,
   1818                     silent=self.silent, feature_name=self.feature_name,
   1819                     categorical_feature=self.categorical_feature, params=self.params)
   1820 if self.free_raw_data:
   1821     self.data = None

File ~\.conda\envs\Merck\lib\site-packages\lightgbm\basic.py:1474, in Dataset._lazy_init(self, data, label, reference, weight, group, init_score, predictor, silent, feature_name, categorical_feature, params)
   1472     self.pandas_categorical = reference.pandas_categorical
   1473     categorical_feature = reference.categorical_feature
-> 1474 data, feature_name, categorical_feature, self.pandas_categorical = _data_from_pandas(data,
   1475                                                                                      feature_name,
   1476                                                                                      categorical_feature,
   1477                                                                                      self.pandas_categorical)
   1478 label = _label_from_pandas(label)
   1480 # process for args

File ~\.conda\envs\Merck\lib\site-packages\lightgbm\basic.py:566, in _data_from_pandas(data, feature_name, categorical_feature, pandas_categorical)
    564 if isinstance(data, pd_DataFrame):
    565     if len(data.shape) != 2 or data.shape[0] < 1:
--> 566         raise ValueError('Input data must be 2 dimensional and non empty.')
    567     if feature_name == 'auto' or feature_name is None:
    568         data = data.rename(columns=str)

ValueError: Input data must be 2 dimensional and non empty.
j
It probably becomes empty after dropping the null values produced by the transformations. Can you try with
dropna=False
?