Skip to content

Commit

Permalink
✨ Add Early Stopping to LSTM
Browse files Browse the repository at this point in the history
  • Loading branch information
EssamWisam committed Jul 6, 2023
1 parent 51262b7 commit 0641e72
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 27 deletions.
91 changes: 77 additions & 14 deletions botiverse/models/LSTM/LSTM.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -49,6 +49,7 @@
"from torch.autograd import Variable\n",
"import numpy as np\n",
"from tqdm import tqdm\n",
"import os\n",
"\n",
"class LSTMCell(nn.Module): \n",
" '''\n",
Expand Down Expand Up @@ -111,7 +112,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -178,7 +179,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -194,34 +195,96 @@
" self.criterion = nn.CrossEntropyLoss()\n",
"\n",
" def forward(self, x):\n",
" '''\n",
" Forward pass of the LSTMClassifier which takes the input and passes it through all the LSTM layers and an output layer to produce an output.\n",
" :param x: The input to the LSTMClassifier which is of shape (batch_size, seq_len, input_size)\n",
" :return: The output of the LSTMClassifier which is of shape (batch_size, num_classes)\n",
" '''\n",
" out = self.lstm(x)\n",
" out = self.fc(out)\n",
" return out\n",
" \n",
" def fit(self, X, y, hidden_size=64, λ=0.001, num_epochs=100, val_size=0.0):\n",
" \n",
" def fit(self, X, y, λ=0.001, α=1e-3, max_epochs=100, patience=5, val_ratio=0.2):\n",
" '''\n",
" Fit the LSTMClassifier to the given data.\n",
" :param X: The input data of shape (batch_size, seq_len, input_size)\n",
" :param y: The labels of the data of shape (batch_size)\n",
" :param hidden_size: The size of the hidden state of the LSTM layer (default: 64)\n",
" :param λ: The learning rate (default: 0.001)\n",
" :param num_epochs: The number of epochs to train the model for (default: 100)\n",
" '''\n",
" Xt = torch.from_numpy(X)\n",
" yt = torch.from_numpy(y)\n",
" if val_ratio:\n",
" indices = torch.randperm(len(Xt))\n",
" Xt, yt = Xt[indices], yt[indices]\n",
" # split the data into train and validation sets\n",
" val_size = int(val_ratio * len(Xt))\n",
" Xt, Xv = Xt[:-val_size], Xt[-val_size:]\n",
" yt, yv = yt[:-val_size], yt[-val_size:]\n",
" \n",
" optimizer = torch.optim.Adam(self.parameters(), lr=λ)\n",
" pbar = tqdm(range(num_epochs))\n",
" \n",
" optimizer = torch.optim.Adam(self.parameters(), lr=λ, weight_decay=α)\n",
" print(\"Training the LSTMClassifier...\")\n",
" curr_dir = os.path.dirname(os.path.realpath(__file__))\n",
" bad_epochs = 0\n",
" val_accuracy = 0\n",
" val_loss = 0\n",
" best_loss = np.inf\n",
" pbar = tqdm(range(max_epochs))\n",
" for epoch in pbar:\n",
" outputs = self(Xt)\n",
" loss = self.criterion(outputs.squeeze(), yt)\n",
" pbar.set_description(f\"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}\")\n",
" pbar.set_description(f\"Epoch {epoch+1}/{max_epochs}, Loss: {loss.item()}\")\n",
" \n",
" optimizer.zero_grad()\n",
" loss.backward()\n",
" optimizer.step()\n",
" if val_ratio:\n",
" # randomly shuffle the data\n",
" val_accuracy = self.evaluate(Xv, yv)\n",
" with torch.no_grad():\n",
" val_loss = self.criterion(self(Xv).squeeze(), yv)\n",
" if val_loss < best_loss:\n",
" best_loss = val_loss\n",
" bad_epochs = 0\n",
" # save the model\n",
" torch.save(self.state_dict(), os.path.join(curr_dir, \"LSTMClassifier.pt\"))\n",
" else:\n",
" bad_epochs += 1\n",
" if bad_epochs == patience:\n",
" print(f\"{patience} epochs have passed without improvement. Early stopping...\")\n",
" self.load_state_dict(torch.load(os.path.join(curr_dir, \"LSTMClassifier.pt\")))\n",
" break\n",
" # every 5 epochs see\n",
" pbar.set_postfix({\"Validation Accuracy\": val_accuracy}) \n",
" \n",
"\n",
" def predict(self, X):\n",
" '''\n",
" Predict the labels of the given data by passing it through the LSTMClassifier.\n",
" :param X: The input data of shape (batch_size, seq_len, input_size)\n",
" :return: The predicted labels of the data of shape (batch_size)\n",
" '''\n",
" Xt = torch.from_numpy(X)\n",
" outputs = self(Xt)\n",
" outputs = torch.argmax(outputs, dim=1)\n",
" return outputs.detach().numpy()\n",
" pred = torch.argmax(outputs, dim=1)\n",
" softmax = nn.Softmax(dim=1)\n",
" prob = torch.max(softmax(outputs), dim=1)\n",
" return pred.detach().numpy(), prob.values.detach().numpy()\n",
" \n",
" def evaluate(self, X, y):\n",
" Xt = torch.from_numpy(X)\n",
" yt = torch.from_numpy(y)\n",
" def evaluate(self, Xt, yt):\n",
" '''\n",
" Evaluate the LSTMClassifier on the given data.\n",
" :param X: The input data of shape (batch_size, seq_len, input_size)\n",
" :param y: The labels of the data of shape (batch_size)\n",
" :return: The accuracy of the LSTMClassifier on the given data\n",
" '''\n",
" # check ig they are torch tensors\n",
" if not isinstance(Xt, torch.Tensor) or not isinstance(yt, torch.Tensor):\n",
" Xt = torch.from_numpy(Xt)\n",
" yt = torch.from_numpy(yt)\n",
" outputs = self(Xt)\n",
" outputs = torch.argmax(outputs, dim=1)\n",
" # compute the accuracy\n",
Expand All @@ -230,15 +293,15 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[NbConvertApp] Converting notebook LSTM.ipynb to script\n",
"[NbConvertApp] Writing 8093 bytes to LSTM.py\n"
"[NbConvertApp] Writing 11235 bytes to LSTM.py\n"
]
}
],
Expand Down
89 changes: 76 additions & 13 deletions botiverse/models/LSTM/LSTM.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#
#

# In[2]:
# In[5]:


import torch
Expand All @@ -36,6 +36,7 @@
from torch.autograd import Variable
import numpy as np
from tqdm import tqdm
import os

class LSTMCell(nn.Module):
'''
Expand Down Expand Up @@ -90,7 +91,7 @@ def forward(self, input, h, c):
#
# Given an input sequence, each token passes by all the layers and each layer has its own hidden state and cell state which is its output due to the previous token.

# In[3]:
# In[6]:


class LSTMX(nn.Module):
Expand Down Expand Up @@ -154,7 +155,7 @@ def forward(self, input, hₒ=None):
return outs[-1]


# In[4]:
# In[7]:


class LSTMClassifier(nn.Module):
Expand All @@ -169,41 +170,103 @@ def __init__(self, input_size, hidden_size, num_classes):
self.criterion = nn.CrossEntropyLoss()

def forward(self, x):
'''
Forward pass of the LSTMClassifier which takes the input and passes it through all the LSTM layers and an output layer to produce an output.
:param x: The input to the LSTMClassifier which is of shape (batch_size, seq_len, input_size)
:return: The output of the LSTMClassifier which is of shape (batch_size, num_classes)
'''
out = self.lstm(x)
out = self.fc(out)
return out

def fit(self, X, y, hidden_size=64, λ=0.001, num_epochs=100, val_size=0.0):

def fit(self, X, y, λ=0.001, α=1e-3, max_epochs=100, patience=5, val_ratio=0.2):
'''
Fit the LSTMClassifier to the given data.
:param X: The input data of shape (batch_size, seq_len, input_size)
:param y: The labels of the data of shape (batch_size)
:param hidden_size: The size of the hidden state of the LSTM layer (default: 64)
:param λ: The learning rate (default: 0.001)
:param num_epochs: The number of epochs to train the model for (default: 100)
'''
Xt = torch.from_numpy(X)
yt = torch.from_numpy(y)
if val_ratio:
indices = torch.randperm(len(Xt))
Xt, yt = Xt[indices], yt[indices]
# split the data into train and validation sets
val_size = int(val_ratio * len(Xt))
Xt, Xv = Xt[:-val_size], Xt[-val_size:]
yt, yv = yt[:-val_size], yt[-val_size:]

optimizer = torch.optim.Adam(self.parameters(), lr=λ)
pbar = tqdm(range(num_epochs))

optimizer = torch.optim.Adam(self.parameters(), lr=λ, weight_decay=α)
print("Training the LSTMClassifier...")
curr_dir = os.path.dirname(os.path.realpath(__file__))
bad_epochs = 0
val_accuracy = 0
val_loss = 0
best_loss = np.inf
pbar = tqdm(range(max_epochs))
for epoch in pbar:
outputs = self(Xt)
loss = self.criterion(outputs.squeeze(), yt)
pbar.set_description(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")
pbar.set_description(f"Epoch {epoch+1}/{max_epochs}, Loss: {loss.item()}")

optimizer.zero_grad()
loss.backward()
optimizer.step()
if val_ratio:
# randomly shuffle the data
val_accuracy = self.evaluate(Xv, yv)
with torch.no_grad():
val_loss = self.criterion(self(Xv).squeeze(), yv)
if val_loss < best_loss:
best_loss = val_loss
bad_epochs = 0
# save the model
torch.save(self.state_dict(), os.path.join(curr_dir, "LSTMClassifier.pt"))
else:
bad_epochs += 1
if bad_epochs == patience:
print(f"{patience} epochs have passed without improvement. Early stopping...")
self.load_state_dict(torch.load(os.path.join(curr_dir, "LSTMClassifier.pt")))
break
# every 5 epochs see
pbar.set_postfix({"Validation Accuracy": val_accuracy})


def predict(self, X):
'''
Predict the labels of the given data by passing it through the LSTMClassifier.
:param X: The input data of shape (batch_size, seq_len, input_size)
:return: The predicted labels of the data of shape (batch_size)
'''
Xt = torch.from_numpy(X)
outputs = self(Xt)
outputs = torch.argmax(outputs, dim=1)
return outputs.detach().numpy()
pred = torch.argmax(outputs, dim=1)
softmax = nn.Softmax(dim=1)
prob = torch.max(softmax(outputs), dim=1)
return pred.detach().numpy(), prob.values.detach().numpy()

def evaluate(self, X, y):
Xt = torch.from_numpy(X)
yt = torch.from_numpy(y)
def evaluate(self, Xt, yt):
'''
Evaluate the LSTMClassifier on the given data.
:param X: The input data of shape (batch_size, seq_len, input_size)
:param y: The labels of the data of shape (batch_size)
:return: The accuracy of the LSTMClassifier on the given data
'''
# check ig they are torch tensors
if not isinstance(Xt, torch.Tensor) or not isinstance(yt, torch.Tensor):
Xt = torch.from_numpy(Xt)
yt = torch.from_numpy(yt)
outputs = self(Xt)
outputs = torch.argmax(outputs, dim=1)
# compute the accuracy
return (outputs == yt).sum().item() / len(yt)


# In[5]:
# In[8]:


# if running from notebook
Expand Down
Binary file added botiverse/models/LSTM/LSTMClassifier.pt
Binary file not shown.

0 comments on commit 0641e72

Please sign in to comment.