Hello, I have another question
# mlforecast
f
Hello, I have another question
Here is the full code: #Defino modelos de machine learning models = [ lgb.LGBMRegressor(verbosity=-1), #verbosity=-1 suprime los mensajes de consola xgb.XGBRegressor(), RandomForestRegressor(random_state=0), #Random_state=0 hace que el modelo sea deterministico y cada vez de el mismo resultado ] #definir lags y lag transforms #Expanding mean es la media movil de todas las observaciones acumuladas, rolling mean_28 es la media movil de las ultimas 28 lags = [1,2,3,6,12] lag_transforms = { 1: [expanding_mean, rolling_mean_12], 2: [expanding_mean, rolling_mean_12], 3: [expanding_mean, rolling_mean_12], 6: [expanding_mean, rolling_mean_12], 12: [expanding_mean, rolling_mean_12] } # Crear pipeline de pronosticos ml = MLForecast( models=models, #3 modelos de ML freq='MS', #Month start lags=lags, lag_transforms=lag_transforms, #Lag transforms definidas anteriormente target_transforms=[GlobalSklearnTransformer(sk_log1p), LocalStandardScaler(), Differences([1])], #El orden de las #transformaciones importa (log, estandarizo y primera diferencia) num_threads=-1, date_features=['month'] )
mlcrossvalidation_df = ml.cross_validation( df=train_agg, h=6, step_size = 12, n_windows = 2)
and the error:
Copy code
Found null values in lag1, lag2, lag3, lag6, lag12, expanding_mean_lag1, rolling_mean_12_lag1, expanding_mean_lag2, rolling_mean_12_lag2, expanding_mean_lag3, rolling_mean_12_lag3, expanding_mean_lag6, rolling_mean_12_lag6, expanding_mean_lag12, rolling_mean_12_lag12.

C:\Users\M330618\.conda\envs\Merck\lib\site-packages\mlforecast\core.py:475: UserWarning:

Found null values in lag2, lag3, lag6, lag12, rolling_mean_12_lag1, expanding_mean_lag2, rolling_mean_12_lag2, expanding_mean_lag3, rolling_mean_12_lag3, expanding_mean_lag6, rolling_mean_12_lag6, expanding_mean_lag12, rolling_mean_12_lag12.

C:\Users\M330618\.conda\envs\Merck\lib\site-packages\mlforecast\core.py:475: UserWarning:

Found null values in lag3, lag6, lag12, rolling_mean_12_lag1, rolling_mean_12_lag2, expanding_mean_lag3, rolling_mean_12_lag3, expanding_mean_lag6, rolling_mean_12_lag6, expanding_mean_lag12, rolling_mean_12_lag12.

C:\Users\M330618\.conda\envs\Merck\lib\site-packages\mlforecast\core.py:475: UserWarning:

Found null values in lag6, lag12, rolling_mean_12_lag1, rolling_mean_12_lag2, rolling_mean_12_lag3, expanding_mean_lag6, rolling_mean_12_lag6, expanding_mean_lag12, rolling_mean_12_lag12.

C:\Users\M330618\.conda\envs\Merck\lib\site-packages\mlforecast\core.py:475: UserWarning:

Found null values in lag6, lag12, rolling_mean_12_lag1, rolling_mean_12_lag2, rolling_mean_12_lag3, expanding_mean_lag6, rolling_mean_12_lag6, expanding_mean_lag12, rolling_mean_12_lag12.

C:\Users\M330618\.conda\envs\Merck\lib\site-packages\mlforecast\core.py:475: UserWarning:

Found null values in lag6, lag12, rolling_mean_12_lag1, rolling_mean_12_lag2, rolling_mean_12_lag3, expanding_mean_lag6, rolling_mean_12_lag6, expanding_mean_lag12, rolling_mean_12_lag12.

C:\Users\M330618\.conda\envs\Merck\lib\site-packages\mlforecast\core.py:475: UserWarning:

Found null values in lag1, lag2, lag3, lag6, lag12, expanding_mean_lag1, rolling_mean_12_lag1, expanding_mean_lag2, rolling_mean_12_lag2, expanding_mean_lag3, rolling_mean_12_lag3, expanding_mean_lag6, rolling_mean_12_lag6, expanding_mean_lag12, rolling_mean_12_lag12.

C:\Users\M330618\.conda\envs\Merck\lib\site-packages\mlforecast\core.py:475: UserWarning:

Found null values in lag2, lag3, lag6, lag12, rolling_mean_12_lag1, expanding_mean_lag2, rolling_mean_12_lag2, expanding_mean_lag3, rolling_mean_12_lag3, expanding_mean_lag6, rolling_mean_12_lag6, expanding_mean_lag12, rolling_mean_12_lag12.

C:\Users\M330618\.conda\envs\Merck\lib\site-packages\mlforecast\core.py:475: UserWarning:

Found null values in lag3, lag6, lag12, rolling_mean_12_lag1, rolling_mean_12_lag2, expanding_mean_lag3, rolling_mean_12_lag3, expanding_mean_lag6, rolling_mean_12_lag6, expanding_mean_lag12, rolling_mean_12_lag12.

C:\Users\M330618\.conda\envs\Merck\lib\site-packages\mlforecast\core.py:475: UserWarning:

Found null values in lag6, lag12, rolling_mean_12_lag1, rolling_mean_12_lag2, rolling_mean_12_lag3, expanding_mean_lag6, rolling_mean_12_lag6, expanding_mean_lag12, rolling_mean_12_lag12.

C:\Users\M330618\.conda\envs\Merck\lib\site-packages\mlforecast\core.py:475: UserWarning:

Found null values in lag6, lag12, rolling_mean_12_lag1, rolling_mean_12_lag2, rolling_mean_12_lag3, expanding_mean_lag6, rolling_mean_12_lag6, expanding_mean_lag12, rolling_mean_12_lag12.

C:\Users\M330618\.conda\envs\Merck\lib\site-packages\mlforecast\core.py:475: UserWarning:

Found null values in lag6, lag12, rolling_mean_12_lag1, rolling_mean_12_lag2, rolling_mean_12_lag3, expanding_mean_lag6, rolling_mean_12_lag6, expanding_mean_lag12, rolling_mean_12_lag12.

C:\Users\M330618\.conda\envs\Merck\lib\site-packages\mlforecast\core.py:475: UserWarning:

Found null values in lag1, lag2, lag3, lag6, lag12, expanding_mean_lag1, rolling_mean_12_lag1, expanding_mean_lag2, rolling_mean_12_lag2, expanding_mean_lag3, rolling_mean_12_lag3, expanding_mean_lag6, rolling_mean_12_lag6, expanding_mean_lag12, rolling_mean_12_lag12.

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[149], line 2
      1 # Hago el crossvalidation para los modelos de ML con los mismos parametros
----> 2 mlcrossvalidation_df = ml.cross_validation(
      3     df=train_agg,
      4     h=6,
      5     step_size = 12,
      6     n_windows = 2)
      7 #Asigno unique_id como el indice del dataframe
      8 mlcrossvalidation_df.set_index('unique_id', inplace=True)

File ~\.conda\envs\Merck\lib\site-packages\mlforecast\utils.py:164, in old_kw_to_pos.<locals>.decorator.<locals>.inner(*args, **kwargs)
    162                 new_args.append(kwargs.pop(arg_names[i]))
    163             new_args.append(kwargs.pop(old_name))
--> 164 return f(*new_args, **kwargs)

File ~\.conda\envs\Merck\lib\site-packages\mlforecast\forecast.py:790, in MLForecast.cross_validation(self, df, n_windows, h, id_col, time_col, target_col, step_size, static_features, dropna, keep_last_n, refit, max_horizon, before_predict_callback, after_predict_callback, prediction_intervals, level, input_size, fitted, data, window_size)
    788 else:
    789     X_df = None
--> 790 y_pred = self.predict(
    791     h,
    792     before_predict_callback=before_predict_callback,
    793     after_predict_callback=after_predict_callback,
    794     new_df=train if not should_fit else None,
    795     level=level,
    796     X_df=X_df,
    797 )
    798 y_pred = y_pred.merge(cutoffs, on=id_col, how="left")
    799 result = valid[[id_col, time_col, target_col]].merge(
    800     y_pred, on=[id_col, time_col]
    801 )

File ~\.conda\envs\Merck\lib\site-packages\mlforecast\utils.py:164, in old_kw_to_pos.<locals>.decorator.<locals>.inner(*args, **kwargs)
    162                 new_args.append(kwargs.pop(arg_names[i]))
    163             new_args.append(kwargs.pop(old_name))
--> 164 return f(*new_args, **kwargs)

File ~\.conda\envs\Merck\lib\site-packages\mlforecast\forecast.py:586, in MLForecast.predict(self, h, dynamic_dfs, before_predict_callback, after_predict_callback, new_df, level, X_df, ids, horizon, new_data)
    583 else:
    584     ts = self.ts
--> 586 forecasts = ts.predict(
    587     models=self.models_,
    588     horizon=h,
    589     dynamic_dfs=dynamic_dfs,
    590     before_predict_callback=before_predict_callback,
    591     after_predict_callback=after_predict_callback,
    592     X_df=X_df,
    593     ids=ids,
    594 )
    595 if level is not None:
    596     if self._cs_df is None:

File ~\.conda\envs\Merck\lib\site-packages\mlforecast\core.py:604, in TimeSeries.predict(self, models, horizon, dynamic_dfs, before_predict_callback, after_predict_callback, X_df, ids)
    600     X_df = X_df.sort_values([self.id_col, self.time_col]).drop(
    601         columns=[self.id_col, self.time_col, "_start", "_end"]
    602     )
    603 if getattr(self, "max_horizon", None) is None:
--> 604     preds = self._predict_recursive(
    605         models,
    606         horizon,
    607         dynamic_dfs,
    608         before_predict_callback,
    609         after_predict_callback,
    610         X_df,
    611     )
    612 else:
    613     preds = self._predict_multi(
    614         models,
    615         horizon,
   (...)
    618         X_df,
    619     )

File ~\.conda\envs\Merck\lib\site-packages\mlforecast\core.py:497, in TimeSeries._predict_recursive(self, models, horizon, dynamic_dfs, before_predict_callback, after_predict_callback, X_df)
    495 if before_predict_callback is not None:
    496     new_x = before_predict_callback(new_x)
--> 497 predictions = model.predict(new_x)
    498 if after_predict_callback is not None:
    499     predictions_serie = pd.Series(predictions, index=self._uids)

File ~\.conda\envs\Merck\lib\site-packages\sklearn\ensemble\_forest.py:984, in ForestRegressor.predict(self, X)
    982 check_is_fitted(self)
    983 # Check data
--> 984 X = self._validate_X_predict(X)
    986 # Assign chunk of trees to jobs
    987 n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

File ~\.conda\envs\Merck\lib\site-packages\sklearn\ensemble\_forest.py:599, in BaseForest._validate_X_predict(self, X)
    596 """
    597 Validate X whenever one tries to predict, apply, predict_proba."""
    598 check_is_fitted(self)
--> 599 X = self._validate_data(X, dtype=DTYPE, accept_sparse="csr", reset=False)
    600 if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc):
    601     raise ValueError("No support for np.int64 index based sparse matrices")

File ~\.conda\envs\Merck\lib\site-packages\sklearn\base.py:604, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, cast_to_ndarray, **check_params)
    602         out = X, y
    603 elif not no_val_X and no_val_y:
--> 604     out = check_array(X, input_name="X", **check_params)
    605 elif no_val_X and not no_val_y:
    606     out = _check_y(y, **check_params)

File ~\.conda\envs\Merck\lib\site-packages\sklearn\utils\validation.py:959, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
    953         raise ValueError(
    954             "Found array with dim %d. %s expected <= 2."
    955             % (array.ndim, estimator_name)
    956         )
    958     if force_all_finite:
--> 959         _assert_all_finite(
    960             array,
    961             input_name=input_name,
    962             estimator_name=estimator_name,
    963             allow_nan=force_all_finite == "allow-nan",
    964         )
    966 if ensure_min_samples > 0:
    967     n_samples = _num_samples(array)

File ~\.conda\envs\Merck\lib\site-packages\sklearn\utils\validation.py:124, in _assert_all_finite(X, allow_nan, msg_dtype, estimator_name, input_name)
    121 if first_pass_isfinite:
    122     return
--> 124 _assert_all_finite_element_wise(
    125     X,
    126     xp=xp,
    127     allow_nan=allow_nan,
    128     msg_dtype=msg_dtype,
    129     estimator_name=estimator_name,
    130     input_name=input_name,
    131 )

File ~\.conda\envs\Merck\lib\site-packages\sklearn\utils\validation.py:173, in _assert_all_finite_element_wise(X, xp, allow_nan, msg_dtype, estimator_name, input_name)
    156 if estimator_name and input_name == "X" and has_nan_error:
    157     # Improve the error message on how to handle missing values in
    158     # scikit-learn.
    159     msg_err += (
    160         f"\n{estimator_name} does not accept missing values"
    161         " encoded as NaN natively. For supervised learning, you might want"
   (...)
    171         "#estimators-that-handle-nan-values"
    172     )
--> 173 raise ValueError(msg_err)

ValueError: Input X contains NaN.
RandomForestRegressor does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See <https://scikit-learn.org/stable/modules/impute.html> You can find a list of all estimators that handle NaN values at the following page: <https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values>