jan rathfelder
04/19/2024, 8:56 PMjan rathfelder
04/19/2024, 8:59 PMJosé Morales
04/19/2024, 9:04 PMjan rathfelder
04/19/2024, 9:15 PM"/usr/local/lib/python3.8/site-packages/mlforecast/core.py", line 762, in predict
preds = self._predict_recursive(
File "/usr/local/lib/python3.8/site-packages/mlforecast/core.py", line 613, in _predict_recursive
predictions = model.predict(new_x)
File "/usr/local/lib/python3.8/site-packages/xgboost/sklearn.py", line 897, in predict
test = DMatrix(
File "/usr/local/lib/python3.8/site-packages/xgboost/core.py", line 506, in inner_f
return f(**kwargs)
File "/usr/local/lib/python3.8/site-packages/xgboost/core.py", line 616, in __init__
handle, feature_names, feature_types = dispatch_data_backend(
File "/usr/local/lib/python3.8/site-packages/xgboost/data.py", line 763, in dispatch_data_backend
return _from_numpy_array(data, missing, threads, feature_names,
File "/usr/local/lib/python3.8/site-packages/xgboost/data.py", line 178, in _from_numpy_array
_check_call(
File "/usr/local/lib/python3.8/site-packages/xgboost/core.py", line 218, in _check_call
raise XGBoostError(py_str(_LIB.XGBGetLastError()))
José Morales
04/19/2024, 9:16 PMJosé Morales
04/19/2024, 9:16 PMraise XGBoostError(py_str(_LIB.XGBGetLastError()))
should show something like:
XGBoostError("hello")
jan rathfelder
04/19/2024, 9:18 PMStack trace:
[bt] (0) /usr/local/lib/python3.8/site-packages/xgboost/lib/libxgboost.so(+0x1135b9) [0x7f0849a4e5b9]
[bt] (1) /usr/local/lib/python3.8/site-packages/xgboost/lib/libxgboost.so(+0x1340dd) [0x7f0849a6f0dd]
[bt] (2) /usr/local/lib/python3.8/site-packages/xgboost/lib/libxgboost.so(+0x155489) [0x7f0849a90489]
[bt] (3) /usr/local/lib/python3.8/site-packages/xgboost/lib/libxgboost.so(+0x125875) [0x7f0849a60875]
[bt] (4) /usr/local/lib/python3.8/site-packages/xgboost/lib/libxgboost.so(XGDMatrixCreateFromDense+0x24f) [0x7f08499dd0ef]
[bt] (5) /usr/lib/x86_64-linux-gnu/libffi.so.6(ffi_call_unix64+0x4c) [0x7f09c12458ee]
[bt] (6) /usr/lib/x86_64-linux-gnu/libffi.so.6(ffi_call+0x22f) [0x7f09c12452bf]
[bt] (7) /usr/local/lib/python3.8/lib-dynload/_ctypes.cpython-38-x86_64-linux-gnu.so(+0xd702) [0x7f09be77d702]
[bt] (8) /usr/local/lib/python3.8/lib-dynload/_ctypes.cpython-38-x86_64-linux-gnu.so(+0x127d5) [0x7f09be7827d5]
jan rathfelder
04/19/2024, 9:19 PMinf
or `nan``José Morales
04/19/2024, 9:19 PMjan rathfelder
04/19/2024, 9:22 PMJosé Morales
04/19/2024, 9:23 PMbooster='gblinear'
doesn't support Inf in the input array and raises that exact errorJosé Morales
04/19/2024, 9:24 PMjan rathfelder
04/19/2024, 9:26 PMJosé Morales
04/19/2024, 9:27 PMjan rathfelder
04/19/2024, 9:27 PMJosé Morales
04/19/2024, 9:30 PMimport xgboost as xgb
from sklearn.datasets import make_regression
X, y = make_regression(1_000, n_features=4)
bst = xgb.XGBRegressor(booster='gblinear').fit(X, y)
X2 = X[:5].copy()
X2[0, 0] = float('inf')
try:
bst.predict(X2)
except Exception as e:
print('in except')
print(e)
which is what is happening within mlforecast, but the try/except catches it correctlyjan rathfelder
04/19/2024, 9:32 PM_def_ objective(_self_, _trial_: optuna.trial.Trial) -> float:
"""
Objective function for hyperparameter tuning with Optuna.
Parameters:
- trial (Trial): A single trial of the hyperparameter tuning process.
Returns:
- float: The mean squared error of the validation set, which Optuna will attempt to minimize.
"""
# validation_length = self.validation_fc_horizon * self.validation_steps
all_time_index = _self_.df_train.ds.unique()
max_number_of_validation_steps = np.floor(
len(all_time_index) / _self_.validation_fc_horizon
)
validation_steps_final = min(
_self_.validation_steps, int(max_number_of_validation_steps) + 1
)
print('max number of validation steps:', max_number_of_validation_steps)
# create the regressor object
lags = _trial_.suggest_int("lags", 2, 15, _step_=1)
seasonal_rolling = _trial_.suggest_int("seasonal_rolling", 2, 50, _step_=1)
seasonal_rolling_month = _trial_.suggest_int('seasonal_rolling_month', 1, 12, _step_=1)
rolling_std = _trial_.suggest_int("rolling_std", 7, 112, _step_=7)
rolling_mean_short_term_window = _trial_.suggest_int(
"rolling_mean_short_term_window", 7, 28, _step_=7
)
rolling_mean_mid_term_window = _trial_.suggest_int(
"rolling_mean_mid_term_window", 28, 70, _step_=7
)
rolling_mean_long_term_window = _trial_.suggest_int(
"rolling_mean_long_term_window", 70, 112, _step_=7
)
differencing_order = _trial_.suggest_int("differencing_order", 0, 1, _step_=1)
alpha_weighted_mean = _trial_.suggest_uniform("alpha_weighted_mean", 0.1, 0.9)
#apply_boxcox = trial.suggest_categorical(
# "apply_boxcox", [True, False]
#) # Suggest whether to apply Box-Cox transformation
params = {
"verbosity": 0,
"objective": "reg:squarederror",
"booster": _trial_.suggest_categorical(
"booster", ["gbtree", 'gblinear']
), # 'dart' an additional option 'gblinear',
"lambda": _trial_.suggest_loguniform("lambda", 1e-3, 10.0),
"alpha": _trial_.suggest_loguniform("alpha", 1e-3, 10.0),
"subsample": _trial_.suggest_uniform("subsample", 0.5, 1.0),
"colsample_bytree": _trial_.suggest_uniform("colsample_bytree", 0.5, 1.0),
"learning_rate": _trial_.suggest_uniform("learning_rate", 0.001, 0.3),
"n_estimators": _trial_.suggest_int("n_estimators", 100, 5000),
"max_depth": _trial_.suggest_int("max_depth", 3, 50),
"min_child_weight": _trial_.suggest_int("min_child_weight", 2, 30),
"gamma": _trial_.suggest_uniform("gamma", 0, 0.8),
"grow_policy": _trial_.suggest_categorical(
"grow_policy", ["depthwise", "lossguide"]
),
}
# Determine transformations based on trial suggestion
#if apply_boxcox:
# Box-Cox requires strictly positive data, ensure this or shift data accordingly
# target_transforms = [boxcox_global, Differences([differencing_order])]
#else:
# Use your existing setup or no transformation
# target_transforms = [Differences([differencing_order])]
target_transforms = [Differences([differencing_order])]
regressor = XGBRegressor(**params)
scores = []
executed_validation_steps = 0
# adapt validation_fc horizon for short series:
train_max_date = _self_.df_train.ds.max() - timedelta(
_days_=_self_.validation_fc_horizon
)
delta_days = (train_max_date - _self_.df_train.ds.min()).days
if delta_days < 250:
day_correction = 250 - delta_days
print("change validation fc horizon")
_self_.validation_fc_horizon = (
_self_.validation_fc_horizon - day_correction
) - 1
print(_self_.validation_fc_horizon)
for t in tqdm(
range(0, validation_steps_final), _desc_="Validation time-windows loop"
):
try:
df_validation = _self_.df_train[
(
_self_.df_train.ds
<= _self_.df_train.ds.max()
- timedelta(_days_=_self_.validation_fc_horizon) * t
)
& (
_self_.df_train.ds
> _self_.df_train.ds.max()
- timedelta(_days_=_self_.validation_fc_horizon) * (t + 1)
)
].copy()
X_validation = df_validation[_self_.features_for_validation].copy()
# Define the training set to include data up to the start of the validation window
df_train_temp = _self_.df_train[
_self_.df_train.ds <= df_validation.ds.min() - timedelta(_days_=1)
].copy()
# Check if the training period is at least as long as the validation period
if len(df_train_temp.ds.unique()) < _self_.validation_fc_horizon:
print(_f_"Skipping validation step {t+1} due to short training data.")
continue # Skip this validation step
executed_validation_steps += 1 # Increment counter for executed steps
model = MLForecast(
_models_=regressor,
_freq_="D",
_lags_=[7 * (i + 1) for i in range(lags)], # + [363, 364, 365],
_date_features_=[
"year",
"month",
"dayofweek",
"quarter",
"week",
"dayofyear",
"is_leap_year",
"is_year_end",
"is_month_end",
"is_month_start",
],
_lag_transforms_={
1: [
ExponentiallyWeightedMean(_alpha_=alpha_weighted_mean)
], # noqa: F601
1: [ExpandingStd()], # noqa: F601
1: [ExpandingMin()], # noqa: F601
1: [ExpandingMax()], # noqa: F601
7: [RollingStd(_window_size_=rolling_std)], # noqa: F601
7: [SeasonalRollingMean(7, seasonal_rolling)], # noqa: F601
7: [SeasonalRollingStd(7, seasonal_rolling)], # noqa: F601
30: [SeasonalRollingMean(30, seasonal_rolling_month)], # noqa: F601
30: [SeasonalRollingStd(30, seasonal_rolling_month)], # noqa: F601
7: [RollingMean(rolling_mean_short_term_window)], # noqa: F601
28: [RollingMean(rolling_mean_mid_term_window)], # noqa: F601
84: [RollingMean(rolling_mean_long_term_window)], # noqa: F601
},
_target_transforms_=target_transforms, # ,
)
# fit model
model.fit(
df_train_temp, _static_features_=_self_.static_features, _as_numpy_=True
)
# print(model.ts.features_order_)
# predict model
p = model.predict(_h_=_self_.validation_fc_horizon, _X_df_=X_validation)
p = p.merge(df_validation, _on_=["unique_id", "ds"], _how_="left")
p = p.fillna(0)
score = mean_squared_error(p.y, p["XGBRegressor"])
scores.append(score)
except Exception as e:
print(_f_"An error occurred in validation step {t+1}: {e}")
continue
# Compute the average score over all time periods
average_score = np.mean(scores)
return average_score # Optuna aims to minimize this value
jan rathfelder
04/19/2024, 9:32 PMjan rathfelder
04/19/2024, 9:33 PMjan rathfelder
04/19/2024, 9:38 PMJosé Morales
04/19/2024, 9:38 PMjan rathfelder
04/19/2024, 9:38 PMjan rathfelder
04/19/2024, 9:39 PMJosé Morales
04/19/2024, 9:39 PMjan rathfelder
04/19/2024, 9:40 PMJosé Morales
04/19/2024, 9:41 PMjan rathfelder
04/19/2024, 9:41 PMjan rathfelder
04/19/2024, 9:41 PMJosé Morales
04/19/2024, 9:43 PMjan rathfelder
04/19/2024, 9:45 PMJosé Morales
04/19/2024, 9:46 PMjan rathfelder
04/19/2024, 9:55 PMjan rathfelder
04/19/2024, 9:56 PMJosé Morales
04/19/2024, 9:57 PMjan rathfelder
04/19/2024, 9:59 PMjan rathfelder
04/20/2024, 4:35 PMjan rathfelder
04/20/2024, 4:41 PMjan rathfelder
04/20/2024, 4:50 PMjan rathfelder
04/20/2024, 10:49 PM30: [SeasonalRollingMean(30, seasonal_rolling_month)], # noqa: F601
30: [SeasonalRollingStd(30, seasonal_rolling_month)], # noqa: F601
José Morales
04/22/2024, 3:07 PMmin_samples=1
which would make it require only 31 (because of the lag30)jan rathfelder
04/23/2024, 11:38 AM