DeepAR with NormalDistributionLoss error on 0 values in target #1550

safersephy · 2024-03-22T11:43:46Z

PyTorch-Forecasting version: 2.2.0.post0
PyTorch version: 2.2.1+cu121
Python version: 3.10
Operating System: linux

Expected behavior

I executed code

min_prediction_length = 1
max_prediction_length = 7
max_encoder_length = 45
min_encoder_length = max_encoder_length //2


df['Date'] = pd.to_datetime(df['Date'])
training_cutoff = df["dayindex"].max() - max_prediction_length


static_categoricals = ['groupId','staticCat1']
time_varying_known_reals = ['dayindex',"day","day_of_week", "month"]
categorical_encoders = {"staticCat1": NaNLabelEncoder(add_nan=True)}


# Define the TimeSeriesDataSet
train_dataset = TimeSeriesDataSet(
    df[lambda x: x.dayindex <= training_cutoff],
    time_idx='dayindex',
    target='target',
    group_ids=['groupId'],

    min_encoder_length= max_encoder_length ,
    min_prediction_length=max_prediction_length,
    max_encoder_length=max_encoder_length, 
    max_prediction_length=max_prediction_length,
    static_categoricals=static_categoricals,
    time_varying_known_categoricals=[],
    time_varying_known_reals=time_varying_known_reals,
    time_varying_unknown_reals=['target'],
    categorical_encoders=categorical_encoders,
    target_normalizer=GroupNormalizer(
        groups=['groupId'], transformation='softplus'),
    add_relative_time_idx=False,
    add_target_scales=True, 
    add_encoder_length=True, 
    allow_missing_timesteps=True,
)

val_dataset = TimeSeriesDataSet.from_dataset(
    train_dataset, df, predict=True, stop_randomization=True
)


pl.seed_everything(42)

trainer = pl.Trainer(
        accelerator='gpu', 
        devices=1,
        gradient_clip_val=0.1,
    )

model = DeepAR.from_dataset(
        train_dataset,
        loss=NormalDistributionLoss()
    )

train_dataloader=train_dataset.to_dataloader(train=True, batch_size=128)
val_dataloader=val_dataset.to_dataloader(train=False, batch_size=128)

res = Tuner(trainer).lr_find(
    model,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
    min_lr=1e-5,
    max_lr=1e0,
    early_stop_threshold=100,
)

in order to do a simple run of DeepAR before moving to actual implementation and expected to get result of the find learning rate function.

Actual behavior

However, result was an error:

ValueError: Expected value argument (Tensor of shape (50, 7)) to be within the support (GreaterThan(lower_bound=0.0)) of the distribution TransformedDistribution(), but found invalid values:
tensor([[1.1970e+03, 1.1405e+03, 1.1565e+03, 1.1440e+03, 1.2220e+03, 1.1785e+03,
1.2575e+03],
[1.1566e+03, 1.1141e+03, 5.6951e+02, 3.8200e+02, 1.2393e+03, 1.1761e+03,
1.1487e+03],
[3.9768e+03, 3.4806e+03, 1.7094e+03, 2.0382e+03, 3.8790e+03, 4.2432e+03,
4.1328e+03],
[5.1714e+02, 5.6480e+02, 6.1144e+02, 5.8812e+02, 7.3109e+02, 7.2197e+02,
6.5099e+02],
[2.6130e+03, 2.7488e+03, 2.8462e+03, 2.7900e+03, 2.6310e+03, 2.7442e+03,
2.6760e+03],
[1.0802e+03, 1.1340e+03, 5.3125e+02, 1.4725e+02, 1.6258e+03, 1.1568e+03,
1.2272e+03],
[4.3299e+04, 2.3356e+04, 5.1015e+03, 3.5430e+03, 3.9182e+04, 5.1436e+04,
5.1934e+04],
[1.2720e+02, 1.2030e+02, 1.2810e+02, 1.3860e+02, 1.6110e+02, 1.6590e+02,
1.7220e+02],
[3.8550e+02, 3.9150e+02, 2.3238e+02, 1.0262e+02, 4.1488e+02, 4.2825e+02,
3.7788e+02],
[9.9600e+01, 1.5840e+02, 2.8500e+02, 1.2240e+02, 1.3020e+02, 3.7860e+02,
2.0040e+02],
[3.2876e+03, 3.4352e+03, 3.4078e+03, 3.5599e+03, 3.5036e+03, 3.4496e+03,
3.4626e+03],
[4.5550e+02, 4.0750e+02, 4.3950e+02, 4.2650e+02, 4.8400e+02, 4.7450e+02,
4.4925e+02],
[1.4558e+03, 1.4011e+03, 1.4960e+03, 1.2486e+03, 1.2721e+03, 1.4698e+03,
1.5194e+03],
[1.1138e+03, 9.7400e+02, 1.0333e+03, 1.0014e+03, 1.2019e+03, 1.1184e+03,
1.0849e+03],
[3.8085e+02, 4.0950e+02, 4.1220e+02, 1.5645e+02, 1.5810e+02, 4.2240e+02,
4.3800e+02],
[2.3150e+02, 2.4962e+02, 1.9200e+02, 2.1625e+02, 2.0200e+02, 1.6925e+02,
2.1825e+02],
[1.7680e+04, 1.7242e+04, 1.4014e+04, 1.3303e+04, 1.7064e+04, 1.6994e+04,
1.5360e+04],
[5.8699e+02, 6.2227e+02, 6.0845e+02, 6.1804e+02, 5.8656e+02, 5.9064e+02,
5.8764e+02],
[7.3350e+02, 7.4632e+02, 8.6275e+02, 8.5971e+02, 6.9766e+02, 7.0759e+02,
7.0873e+02],
[9.8420e+02, 9.8404e+02, 9.9685e+02, 9.6848e+02, 9.4029e+02, 9.5702e+02,
9.9533e+02],
[3.2761e+04, 3.3143e+04, 2.9130e+04, 2.7516e+04, 2.9792e+04, 2.9353e+04,
2.6305e+04],
[2.1624e+02, 2.1300e+02, 1.6707e+02, 1.6964e+02, 1.7825e+02, 1.7362e+02,
1.7035e+02],
[5.2478e+02, 5.2283e+02, 3.0174e+02, 2.5808e+02, 5.6009e+02, 5.4890e+02,
5.7898e+02],
[2.6234e+02, 2.4674e+02, 9.9280e+01, 8.0860e+01, 2.9206e+02, 2.8192e+02,
2.3914e+02],
[6.9935e+02, 7.3448e+02, 3.6166e+02, 2.5895e+02, 6.4098e+02, 6.7534e+02,
7.6642e+02],
[0.0000e+00, 0.0000e+00, 6.4000e+01, 9.3750e+00, 9.3750e+00, 9.3750e+00,
9.3750e+00],
[8.7610e+02, 3.0116e+02, 1.6609e+02, 1.7978e+02, 7.0635e+02, 8.1039e+02,
6.3517e+02],
[9.3986e+02, 9.5424e+02, 6.6546e+02, 2.9847e+02, 9.6926e+02, 1.0088e+03,
4.5387e+02],
[1.6494e+03, 1.9598e+03, 2.2036e+03, 1.6644e+03, 1.5173e+03, 1.5435e+03,
1.3588e+03],
[8.3550e+02, 7.8650e+02, 4.6250e+02, 4.8125e+02, 8.9162e+02, 7.9425e+02,
7.7512e+02],
[7.4144e+02, 6.6518e+02, 7.4306e+02, 8.1201e+02, 1.0432e+03, 9.7020e+02,
9.9778e+02],
[7.5533e+02, 6.8232e+02, 6.6468e+02, 6.5464e+02, 6.3852e+02, 6.7046e+02,
8.0126e+02],
[2.0685e+02, 1.9770e+02, 1.8405e+02, 2.2875e+02, 3.2550e+02, 2.8830e+02,
2.5545e+02],
[4.4413e+01, 4.4717e+01, 4.5022e+01, 4.6847e+01, 4.4717e+01, 4.5022e+01,
4.5022e+01],
[5.8755e+03, 5.6703e+03, 5.1266e+03, 4.5704e+03, 6.5678e+03, 6.6610e+03,
6.6457e+03],
[1.4850e+02, 1.0588e+02, 8.7250e+01, 8.2000e+01, 5.5575e+02, 4.1475e+02,
2.1638e+02],
[1.0840e+03, 1.1868e+03, 8.6562e+02, 6.6550e+02, 8.5775e+02, 9.4412e+02,
6.8650e+02],
[3.9384e+03, 3.5124e+03, 1.5000e+02, 1.5060e+02, 3.4770e+03, 3.4032e+03,
4.2810e+03],
[4.0687e+04, 3.3182e+04, 2.5645e+03, 3.5075e+03, 4.1414e+04, 4.3016e+04,
4.1641e+04],
[7.3008e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 2.7074e+01, 1.4297e+01,
3.0420e-01],
[7.0650e+02, 9.2625e+02, 1.7750e+01, 1.7625e+01, 1.7750e+01, 1.7875e+01,
1.7750e+01],
[1.8975e+02, 1.2125e+02, 8.0500e+01, 7.0500e+01, 3.5775e+02, 2.8475e+02,
2.5425e+02],
[3.8362e+02, 3.7262e+02, 3.7500e+02, 3.8300e+02, 4.2500e+02, 3.7300e+02,
3.3462e+02],
[5.5914e+04, 5.5315e+04, 5.4964e+04, 5.4704e+04, 5.5668e+04, 5.6167e+04,
5.7086e+04],
[1.1207e+05, 1.1388e+05, 1.1032e+05, 1.1228e+05, 1.1501e+05, 1.1098e+05,
1.0660e+05],
[3.6750e+02, 3.8500e+02, 3.0750e+02, 1.9688e+02, 3.8125e+02, 4.0775e+02,
3.6938e+02],
[3.3050e+02, 4.7475e+02, 2.3750e+02, 1.9200e+02, 4.8050e+02, 4.4700e+02,
4.3750e+02],
[4.7475e+02, 4.9000e+02, 4.5925e+02, 4.3700e+02, 5.0700e+02, 4.8125e+02,
4.7100e+02],
[1.5325e+03, 1.0725e+03, 7.0000e+01, 6.5500e+01, 2.0175e+03, 1.6040e+03,
1.2785e+03],
[5.3640e+02, 6.2720e+02, 1.3320e+02, 1.2740e+02, 4.4660e+02, 5.6040e+02,
6.5860e+02]], device='cuda:0')

I think it has to do with the fact that there are 0 values in the target because because when i change the group normalization to count, or replace all 0's with a very small positive value, the code does run.

AFAIK this is incorrect because when applying softplus and NormalDistributionLoss, i'd expect values from 0 and up to be allowed.

This might be related: pytorch/pytorch#59228

"the change in behavior from PyTorch 1.7 to 1.8 was that we turned on argument validation by default. You want to disable validation for a single distribution by passing validate_args=False to the constructor."

I can't share the notebook but here is the traceback
traceback:

File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 543, in fit
call._call_and_handle_interrupt(
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 44, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 579, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 986, in _run
results = self._run_stage()
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 1030, in _run_stage
self._run_sanity_check()
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 1059, in _run_sanity_check
val_loop.run()
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/lightning/pytorch/loops/utilities.py", line 182, in _decorator
return loop_run(self, *args, **kwargs)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/lightning/pytorch/loops/evaluation_loop.py", line 135, in run
self._evaluation_step(batch, batch_idx, dataloader_idx, dataloader_iter)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/lightning/pytorch/loops/evaluation_loop.py", line 396, in _evaluation_step
output = call._call_strategy_hook(trainer, hook_name, *step_args)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 309, in _call_strategy_hook
output = fn(*args, **kwargs)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/lightning/pytorch/strategies/strategy.py", line 412, in validation_step
return self.lightning_module.validation_step(*args, **kwargs)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/pytorch_forecasting/models/base_model.py", line 630, in validation_step
log, out = self.step(x, y, batch_idx)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/pytorch_forecasting/models/base_model.py", line 786, in step
loss = self.loss(prediction, y)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/torchmetrics/metric.py", line 303, in forward
self._forward_cache = self._forward_reduce_state_update(*args, **kwargs)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/torchmetrics/metric.py", line 372, in _forward_reduce_state_update
self.update(*args, **kwargs)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/torchmetrics/metric.py", line 465, in wrapped_func
update(*args, **kwargs)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/pytorch_forecasting/metrics/base_metrics.py", line 784, in update
losses = self.loss(y_pred, target)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/pytorch_forecasting/metrics/base_metrics.py", line 932, in loss
loss = -distribution.log_prob(y_actual)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/torch/distributions/transformed_distribution.py", line 163, in log_prob
self._validate_sample(value)
File "/anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages/torch/distributions/distribution.py", line 312, in _validate_sample
raise ValueError(

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

DeepAR with NormalDistributionLoss error on 0 values in target #1550

DeepAR with NormalDistributionLoss error on 0 values in target #1550

safersephy commented Mar 22, 2024

DeepAR with NormalDistributionLoss error on 0 values in target #1550

DeepAR with NormalDistributionLoss error on 0 values in target #1550

Comments

safersephy commented Mar 22, 2024

Expected behavior

Actual behavior