Forecasting Mauna Loa CO2 dataset with SpectralMixtureKernel
#1825
-
I was reading the paper Gaussian Process Kernels for Pattern Discovery and Extrapolation which is implemented as I believe I am missing some steps to reproduce Figure 1a. exactly. Can someone help me with this? Attaching the code I have tried for this: import numpy as np
import pandas as pd
import torch
import gpytorch
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
# Config
n_restarts = 10
n_iter = 100
# Load dataset
# data = pd.read_csv(
# "https://raw.githubusercontent.com/patel-zeel/Adhoc/master/data/co2.csv"
# )
data = pd.read_csv("co2.csv")
print("Data is loaded")
# Train test split
X = data["0"].iloc[:290].values.reshape(-1, 1)
X_test = data["0"].iloc[290:].values.reshape(-1, 1)
y = data["1"].iloc[:290].values
y_test = data["1"].iloc[290:].values
# Scaling the dataset
Xscaler = StandardScaler()
X = Xscaler.fit_transform(X)
X_test = Xscaler.transform(X_test)
yscaler = StandardScaler()
y = yscaler.fit_transform(y.reshape(-1, 1)).ravel()
y_test = yscaler.transform(y_test.reshape(-1, 1)).ravel()
# convert to torch tensors
X = torch.tensor(X).to(torch.float32)
X_test = torch.tensor(X_test).to(torch.float32)
y = torch.tensor(y).to(torch.float32)
y_test = torch.tensor(y_test).to(torch.float32)
# Spectral mixture model from gpytorch
class ExactGPModel(gpytorch.models.ExactGP):
def __init__(self, train_x, train_y, likelihood):
super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
self.mean_module = gpytorch.means.ConstantMean()
self.covar_module = gpytorch.kernels.SpectralMixtureKernel(num_mixtures=10)
def forward(self, x):
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
# Defining the model and optimizer
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = ExactGPModel(X, y, likelihood)
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
# Training the model
model.train()
likelihood.train()
best_loss = np.inf
for restart in range(n_restarts):
torch.manual_seed(restart)
# Initializing the model
for param in model.parameters():
torch.nn.init.normal_(param, mean=0, std=0.1)
for i in range(n_iter):
optimizer.zero_grad()
output = model(X)
loss = -mll(output, y)
# print(loss.item())
loss.backward()
optimizer.step()
if loss.item() < best_loss:
best_loss = loss.item()
best_state = model.state_dict()
print("restart", restart, "loss", loss.item(), "best_loss", best_loss)
# Activate best model state
model.load_state_dict(best_state)
# test the model
model.eval()
likelihood.eval()
with torch.no_grad(), gpytorch.settings.fast_pred_var():
preds = likelihood(model(X_test)).mean.cpu().numpy()
# plot the data
plt.plot(X, y, label="train")
plt.plot(X_test, y_test, label="test")
plt.plot(X_test, preds, label="prediction")
plt.legend()
plt.show() |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 3 replies
-
Ugh, this is pretty annoying because the optimization is so unstable, see Appendix D of this work. I was ultimately able to reproduce the results (but inconsistently, not sure why this is the case) with the following changes:
from botorch import fit_gpytorch_model
fit_gpytorch_model(mll, max_retries=10); |
Beta Was this translation helpful? Give feedback.
Ugh, this is pretty annoying because the optimization is so unstable, see Appendix D of this work. I was ultimately able to reproduce the results (but inconsistently, not sure why this is the case) with the following changes:
self.covar_module.initialize_from_data_empspect(train_x, train_y)
in the initialization (also important)