Nasreddine D
08/25/2023, 7:58 AMMairon Cesar Simoes Chaves
08/28/2023, 8:25 PMtrain = df_completo_encoded.query("ds < '2023-07-07'")
test = df_completo_encoded.query("ds >= '2023-07-07'")
from mlforecast.utils import PredictionIntervals
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from mlforecast.target_transforms import Differences
from mlforecast.utils import PredictionIntervals
from window_ops.ewm import ewm_mean
from window_ops.rolling import rolling_mean, seasonal_rolling_mean,rolling_min, rolling_max, rolling_std
mlf = MLForecast(
freq = 'D',
models=[ XGBRegressor(n_jobs = -1),LGBMRegressor(n_jobs = -1)],
target_transforms=[Differences([1,7])],
lag_transforms={
1: [(rolling_mean, 2),(rolling_mean, 3),(rolling_mean, 4),(rolling_mean, 5),(rolling_mean, 6),(rolling_mean, 7),
(rolling_mean, 7), (rolling_mean, 14), (rolling_mean, 28),(ewm_mean, 0.9), expanding_mean,
(rolling_min,7), (rolling_min,14),(rolling_min,28),
(rolling_max,7), (rolling_max,14),(rolling_max,28),
(rolling_std,2),(rolling_std,3),(rolling_std,4),(rolling_std,5),(rolling_std,6),(rolling_std,7), (rolling_std,14),(rolling_std,28),
},
lags=[1,7,14,21,28],
date_features=['month', 'year', 'day_of_week', 'day_of_year','is_month_start','quarter','days_in_month'],
num_threads=4
)
%%time
mlf.fit( train,
id_col='unique_id',
#max_horizon = 47,
prediction_intervals=PredictionIntervals(n_windows=10, window_size=47),
time_col='ds',
target_col='y',
static_features= ['gtin','ADI','CV2','cluster_0','cluster_1','cluster_2','cluster_3','cluster_4'],)
To here, everything ok. But follow de error:
levels = [50, 80, 95]
forecasts = mlf.predict(47, level = levels )
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
Cell In[81], line 3
1 levels = [50, 80, 95]
----> 3 forecasts = mlf.predict(47, level = levels )
4 forecasts.head()
File /usr/local/lib/python3.10/site-packages/mlforecast/utils.py:186, in old_kw_to_pos.<locals>.decorator.<locals>.inner(*args, **kwargs)
184 new_args.append(kwargs.pop(arg_names[i]))
185 new_args.append(kwargs.pop(old_name))
--> 186 return f(*new_args, **kwargs)
File /usr/local/lib/python3.10/site-packages/mlforecast/forecast.py:532, in MLForecast.predict(self, h, dynamic_dfs, before_predict_callback, after_predict_callback, new_df, level, X_df, ids, horizon, new_data)
528 model_names = self.models.keys()
529 conformal_method = _get_conformal_method(
530 self.prediction_intervals.method
531 )
--> 532 forecasts = conformal_method(
533 forecasts,
534 self._cs_df,
535 model_names=list(model_names),
536 level=level_,
537 cs_h=self.prediction_intervals.h,
538 cs_n_windows=self.prediction_intervals.n_windows,
539 n_series=self.ts.ga.ngroups,
540 horizon=h,
541 )
542 return forecasts
File /usr/local/lib/python3.10/site-packages/mlforecast/forecast.py:55, in _add_conformal_distribution_intervals(fcst_df, cs_df, model_names, level, cs_n_windows, cs_h, n_series, horizon)
53 scores = scores[:, :, :horizon]
54 mean = fcst_df[model].values.reshape(1, n_series, -1)
---> 55 scores = np.vstack([mean - scores, mean + scores])
56 quantiles = np.quantile(
57 scores,
58 cuts,
59 axis=0,
60 )
61 quantiles = quantiles.reshape(len(cuts), -1)
File /usr/local/lib/python3.10/site-packages/pandas/core/arrays/masked.py:528, in BaseMaskedArray.__array_ufunc__(self, ufunc, method, *inputs, **kwargs)
525 return NotImplemented
527 # for binary ops, use our custom dunder methods
--> 528 result = ops.maybe_dispatch_ufunc_to_dunder_op(
529 self, ufunc, method, *inputs, **kwargs
530 )
531 if result is not NotImplemented:
532 return result
File /usr/local/lib/python3.10/site-packages/pandas/_libs/ops_dispatch.pyx:113, in pandas._libs.ops_dispatch.maybe_dispatch_ufunc_to_dunder_op()
File /usr/local/lib/python3.10/site-packages/pandas/core/ops/common.py:81, in _unpack_zerodim_and_defer.<locals>.new_method(self, other)
77 return NotImplemented
79 other = item_from_zerodim(other)
---> 81 return method(self, other)
File /usr/local/lib/python3.10/site-packages/pandas/core/arraylike.py:198, in OpsMixin.__rsub__(self, other)
196 @unpack_zerodim_and_defer("__rsub__")
197 def __rsub__(self, other):
--> 198 return self._arith_method(other, roperator.rsub)
File /usr/local/lib/python3.10/site-packages/pandas/core/arrays/masked.py:659, in BaseMaskedArray._arith_method(self, other, op)
657 other = np.asarray(other)
658 if other.ndim > 1:
--> 659 raise NotImplementedError("can only perform ops with 1-d structures")
661 # We wrap the non-masked arithmetic logic used for numpy dtypes
662 # in Series/Index arithmetic ops.
663 other = ops.maybe_prepare_scalar_for_op(other, (len(self),))
NotImplementedError: can only perform ops with 1-d structures
John Lashlee
08/28/2023, 11:07 PMWen Yao
08/28/2023, 11:09 PMtarget_transformer
is not updated to latest. I’m running into this error when importing GlobalSklearnTransformer
and have to manually update this file.
ImportError: cannot import name 'GlobalSklearnTransformer' from 'mlforecast.target_transforms' (/home/ec2-user/anaconda3/envs/plaid_python38/lib/python3.8/site-packages/mlforecast/target_transforms.py)
Mehmet Can Yıldırım
08/29/2023, 6:43 AMRaul Gherman
08/29/2023, 4:31 PMEvan Miller
08/30/2023, 6:05 PMEvan Miller
08/30/2023, 6:12 PMDiego Menezes
08/31/2023, 3:26 PMfrom statsforecast.utils import ConformalIntervals
gives the error:
ImportError: cannot import name 'ConformalIntervals' from 'statsforecast.utils'
Has anyone else experienced this? Python = 3.10.9 & statsforecast = 1.5.0
Thanks a bunch.Evan Miller
08/31/2023, 8:08 PMimport pandas as pd
import numpy as np
from mlforecast import MLForecast
import lightgbm as lgb
cols = 4 # col 0 is target, col 1 is weights, cols 2 and 3 are features
rows = 20000
data=np.random.rand(rows,cols)
columns = ['y','weight','feat_1','feat_2']
data_df = pd.DataFrame(data[:,0:4], columns=columns)
data_df['unique_id'] = 1
data_df['ds']= [i for i in range(0, rows)]
model_1 = MLForecast(
models={
'LGBM': lgb.LGBMRegressor()
},
freq=1,
lags = [1, 2],
)
prep = model_1.preprocess(data_df, static_features = None, dropna = False)
train_weight = prep['weight']
train_x = prep.iloc[:,[2,3,6,7]]
train_y = prep['y']
model_1.models['LGBM'].fit(train_x,train_y,train_weight)
data_future = pd.DataFrame({'unique_id': [1,1,1],'ds':[rows,rows+1,rows+2], 'feat_1':[1,2,3], 'feat_2':[1,2,3]})
model_1.predict(3,X_df=data_future)
Yaping Lang
09/04/2023, 7:37 AMTraceback (most recent call last):
File "/Users/lilypad/Documents/Code/timegpt/test1.py", line 19, in <module>
sf.plot(df, fcst_df, level=[80, 90], max_insample_length=24 * 5)
File "/opt/homebrew/lib/python3.11/site-packages/statsforecast/core.py", line 1789, in plot
axes[idx, idy].fill_between(
File "/opt/homebrew/lib/python3.11/site-packages/matplotlib/__init__.py", line 1446, in inner
return func(ax, *map(sanitize_sequence, args), **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/matplotlib/axes/_axes.py", line 5425, in fill_between
return self._fill_between_x_or_y(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/matplotlib/axes/_axes.py", line 5330, in _fill_between_x_or_y
ind, dep1, dep2 = map(
^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/numpy/ma/core.py", line 2360, in masked_invalid
return masked_where(~(np.isfinite(getdata(a))), a, copy=copy)
^^^^^^^^^^^^^^^^^^^^^^^
TypeError: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
python script is :
import pandas as pd
import os
from nixtlats import TimeGPT
from statsforecast import StatsForecast as sf
print(os.environ)
df = pd.read_csv('<https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/electricity-short.csv>')
timegpt = TimeGPT(token=os.environ['TIMEGPT_TOKEN'])
fcst_df = timegpt.forecast(df, h=24, level=[80, 90])
print("NaN count in df:")
print(df.isna().sum())
print("NaN count in fcst_df:")
print(fcst_df.isna().sum())
sf.plot(df, fcst_df, level=[80, 90], max_insample_length=24 * 5)
J.
09/06/2023, 8:39 AM# Find timeseries with least data
min_ts_count = big_df_for_stats.groupby(by="unique_id").count()["y"].min()
# This is the amount of minimum trained wanted for rolling! window
initial_training_size = 365
step_size = 4
h = 1
test_size_parameter = ((min_ts_count - initial_training_size)//step_size)
# This is the workaround to prevent the exception which happens when "(test_size-h)%step_size != 0"
test_size_parameter = test_size_parameter - (test_size_parameter%step_size) + h
res_df = sf.cross_validation(h=h, step_size=step_size, fitted=True, test_size=test_size_parameter, n_windows=None)
Evan Miller
09/06/2023, 10:31 PMEvan Miller
09/06/2023, 10:45 PMfrom utilsforecast import preprocessing as pr
import pandas as pd
import numpy as np
df = pd.DataFrame(
{
'unique_id': [0, 0, 0, 1, 1],
'ds': pd.to_datetime(['2018-11-11', '2018-11-18', '2018-12-02', '2018-11-11', '2018-12-02']),
'y': np.arange(5),
}
)
df1 = pr.fill_gaps(
df,
freq='W',
)
print(df)
print(df1)
##### OUTPUT ######
unique_id ds y
0 0 2018-11-11 0
1 0 2018-11-18 1
2 0 2018-12-02 2
3 1 2018-11-11 3
4 1 2018-12-02 4
unique_id ds y
0 0 2018-11-08 NaN
1 0 2018-11-15 NaN
2 0 2018-11-22 NaN
3 0 2018-11-29 NaN
4 1 2018-11-08 NaN
5 1 2018-11-15 NaN
6 1 2018-11-22 NaN
7 1 2018-11-29 NaN
This is happening because the numpy definition for week is bucketed starting on Thursdays:
np.datetime64(pd.to_datetime("2018-11-11"),'W')
##### OUTPUT ######
numpy.datetime64('2018-11-08')
If I comment out the last two lines of _determine_bound in the fill_gaps method, the problem is resolved:
def fill_gaps(
df: pd.DataFrame,
freq: str = 'W',
start: str = "per_serie",
end: str = "global",
id_col: str = "unique_id",
time_col: str = "ds",
) -> pd.DataFrame:
delta = np.timedelta64(1, freq) if isinstance(freq, str) else freq
times_by_id = df.groupby(id_col)[time_col].agg(["min", "max"])
starts = _determine_bound(start, freq, times_by_id, "min")
ends = _determine_bound(end, freq, times_by_id, "max") + delta
sizes = ((ends - starts) / delta).astype(np.int64)
times = np.concatenate(
[np.arange(start, end, delta) for start, end in zip(starts, ends)]
)
if isinstance(freq, str):
times = times.astype("datetime64[ns]", copy=False)
uids = np.repeat(times_by_id.index, sizes)
idx = pd.MultiIndex.from_arrays([uids, times], names=[id_col, time_col])
return df.set_index([id_col, time_col]).reindex(idx).reset_index()
def _determine_bound(bound, freq, times_by_id, agg) -> np.ndarray:
if bound == "per_serie":
out = times_by_id[agg].values
else:
# the following return a scalar
if bound == "global":
val = getattr(times_by_id[agg].values, agg)()
if isinstance(freq, str):
val = np.datetime64(val)
else:
if isinstance(freq, str):
# this raises a nice error message if it isn't a valid datetime
val = np.datetime64(bound)
else:
val = bound
out = np.full(times_by_id.shape[0], val)
#if isinstance(freq, str):
#out = out.astype(f"datetime64[{freq}]")
return out
df = pd.DataFrame(
{
'unique_id': [0, 0, 0, 1, 1],
'ds': pd.to_datetime(['2018-11-11', '2018-11-18', '2018-12-02', '2018-11-11', '2018-12-02']),
'y': np.arange(5),
}
)
df1 = fill_gaps(
df,
freq='W',
)
print(df)
print(df1)
##### OUTPUT #####
unique_id ds y
0 0 2018-11-11 0
1 0 2018-11-18 1
2 0 2018-12-02 2
3 1 2018-11-11 3
4 1 2018-12-02 4
unique_id ds y
0 0 2018-11-11 0.0
1 0 2018-11-18 1.0
2 0 2018-11-25 NaN
3 0 2018-12-02 2.0
4 1 2018-11-11 3.0
5 1 2018-11-18 NaN
6 1 2018-11-25 NaN
7 1 2018-12-02 4.0
However, with this change problems occur when the weekly bucketing of the data is not precise (i.e. if I accidentally put '2018-12-01' in it would error)J.
09/07/2023, 2:07 PMManuel
09/07/2023, 8:11 PMLinenBot
09/08/2023, 2:25 AMBWBarber
joined #general.steve tawk
09/08/2023, 9:07 AMMairon Cesar Simoes Chaves
09/08/2023, 3:27 PMApiError: status_code: 429, body: {'data': None, 'message': 'Too many requests', 'details': 'You have reached your request limit, email <mailto:ops@nixtla.io|ops@nixtla.io> to continue using the API', 'code': 'A20', 'requestID': 'CJA2TUG4YJ', 'support': 'If you have questions or need support, please email <mailto:ops@nixtla.io|ops@nixtla.io>'}
Brian Head
09/08/2023, 4:11 PMhanchen su
09/13/2023, 7:30 AMBrian Head
09/15/2023, 3:58 PMnixtla
package, which I can't find. I haven't had luck with google/stackoverflow searches. I did find one ticket/issue on the GitHub page about FFORMA (which would be great), but right now I'm just looking to do combo models of simple averages--with statsforecast and mlforecast models. Is this a feature I've just not been able to find or something perhaps in the works?
https://medium.com/@gosshhh9/nixtla-using-ensembling-method-for-time-series-e1d23964ac30Hernando Gaitán
09/18/2023, 3:34 AMArun Rajagopal
09/18/2023, 3:18 PMPatrick Baron
09/19/2023, 2:47 AMJ.
09/19/2023, 1:12 PMAkmal Soliev
09/19/2023, 7:57 PMLinenBot
09/20/2023, 11:54 AMGoostValley
joined #general.Hernando Gaitán
09/20/2023, 1:40 PMConor Curran
09/20/2023, 11:22 PM