RNN in PyTorch
Recurrent Neural Networks in PyTorch
Recurrent Neural Network in PyTorch
Recurrent Neural Networks are a type of neural networks that are designed to work on sequence prediction models. RNNs can be used for text data, speech data, classification problems and generative models. Unlike ANNs, RNNs' prediction are based on the past prediction as well as the current input. RNNs are networks with loops in them allowing information to persist.
Each node of an RNN consists of 2 inputs:
- Memory unit
- Event unit
M(t-1)
is the memory unit or the output of the previous prediction. E(t)
is the current event or the information being provided at the present time. M(t)
is the output of the current node or the output at the present time in the sequence.
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
%matplotlib inline
In this notebook, I'm going to train a very simple LSTM model, which is a type of RNN architecture to do time series prediction. Given some input data, it should be able to generate a prediction for the next step. I'll be using a Sin wave as an example as it's very easy to visualiase the behaviour of a sin wave.
x = torch.linspace(0,799,800)
y = torch.sin(x*2*3.1416/40)
plt.figure(figsize=(12,4))
plt.xlim(-10,801)
plt.grid(True)
plt.xlabel("x")
plt.ylabel("sin")
plt.title("Sin plot")
plt.plot(y.numpy(),color='#8000ff')
plt.show()
test_size = 40
train_set = y[:-test_size]
test_set = y[-test_size:]
plt.figure(figsize=(12,4))
plt.xlim(-10,801)
plt.grid(True)
plt.xlabel("x")
plt.ylabel("sin")
plt.title("Sin plot")
plt.plot(train_set.numpy(),color='#8000ff')
plt.plot(range(760,800),test_set.numpy(),color="#ff8000")
plt.show()
While working with LSTM models, we divide the training sequence into series of overlapping windows. The label used for comparison is the next value in the sequence.
For example if we have series of of 12 records and a window size of 3, we feed [x1, x2, x3] into the model, and compare the prediction to x4
. Then we backdrop, update parameters, and feed [x2, x3, x4] into the model and compare the prediction to x5
. To ease this process, I'm defining a function input_data(seq,ws)
that created a list of (seq,labels) tuples. If ws
is the window size, then the total number of (seq,labels) tuples will be len(series)-ws
.
def input_data(seq,ws):
out = []
L = len(seq)
for i in range(L-ws):
window = seq[i:i+ws]
label = seq[i+ws:i+ws+1]
out.append((window,label))
return out
window_size = 40
train_data = input_data(train_set, window_size)
len(train_data)
train_data[0]
class LSTM(nn.Module):
def __init__(self,input_size = 1, hidden_size = 50, out_size = 1):
super().__init__()
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size, hidden_size)
self.linear = nn.Linear(hidden_size,out_size)
self.hidden = (torch.zeros(1,1,hidden_size),torch.zeros(1,1,hidden_size))
def forward(self,seq):
lstm_out, self.hidden = self.lstm(seq.view(len(seq),1,-1), self.hidden)
pred = self.linear(lstm_out.view(len(seq),-1))
return pred[-1]
torch.manual_seed(42)
model = LSTM()
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
model
During training, I'm visualising the prediction process for the test data on the go. It will give a better understanding of how the training is being carried out in each epoch. The training sequence is represented in purple while the predicted sequence in represented in orange.
epochs = 10
future = 40
for i in range(epochs):
for seq, y_train in train_data:
optimizer.zero_grad()
model.hidden = (torch.zeros(1,1,model.hidden_size),
torch.zeros(1,1,model.hidden_size))
y_pred = model(seq)
loss = criterion(y_pred, y_train)
loss.backward()
optimizer.step()
print(f"Epoch {i} Loss: {loss.item()}")
preds = train_set[-window_size:].tolist()
for f in range(future):
seq = torch.FloatTensor(preds[-window_size:])
with torch.no_grad():
model.hidden = (torch.zeros(1,1,model.hidden_size),
torch.zeros(1,1,model.hidden_size))
preds.append(model(seq).item())
loss = criterion(torch.tensor(preds[-window_size:]), y[760:])
print(f"Performance on test range: {loss}")
plt.figure(figsize=(12,4))
plt.xlim(700,801)
plt.grid(True)
plt.plot(y.numpy(),color='#8000ff')
plt.plot(range(760,800),preds[window_size:],color='#ff8000')
plt.show()
df = pd.read_csv("/kaggle/input/for-simple-exercises-time-series-forecasting/Alcohol_Sales.csv", index_col = 0, parse_dates = True)
df.head()
df.dropna(inplace=True)
len(df)
plt.figure(figsize = (12,4))
plt.title('Alcohol Sales')
plt.ylabel('Sales in million dollars')
plt.grid(True)
plt.autoscale(axis='x',tight=True)
plt.plot(df['S4248SM144NCEN'],color='#8000ff')
plt.show()
y = df['S4248SM144NCEN'].values.astype(float)
#defining a test size
test_size = 12
#create train and test splits
train_set = y[:-test_size]
test_set = y[-test_size:]
test_set
from sklearn.preprocessing import MinMaxScaler
# instantiate a scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
# normalize the training set
train_norm = scaler.fit_transform(train_set.reshape(-1, 1))
train_norm = torch.FloatTensor(train_norm).view(-1)
# define a window size
window_size = 12
# define a function to create sequence/label tuples
def input_data(seq,ws):
out = []
L = len(seq)
for i in range(L-ws):
window = seq[i:i+ws]
label = seq[i+ws:i+ws+1]
out.append((window,label))
return out
# apply input_data to train_norm
train_data = input_data(train_norm, window_size)
len(train_data)
train_data[0]
class LSTMnetwork(nn.Module):
def __init__(self,input_size=1,hidden_size=100,output_size=1):
super().__init__()
self.hidden_size = hidden_size
# add an LSTM layer:
self.lstm = nn.LSTM(input_size,hidden_size)
# add a fully-connected layer:
self.linear = nn.Linear(hidden_size,output_size)
# initializing h0 and c0:
self.hidden = (torch.zeros(1,1,self.hidden_size),
torch.zeros(1,1,self.hidden_size))
def forward(self,seq):
lstm_out, self.hidden = self.lstm(
seq.view(len(seq),1,-1), self.hidden)
pred = self.linear(lstm_out.view(len(seq),-1))
return pred[-1]
torch.manual_seed(42)
# instantiate
model = LSTMnetwork()
# loss
criterion = nn.MSELoss()
#optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
model
epochs = 100
import time
start_time = time.time()
for epoch in range(epochs):
for seq, y_train in train_data:
optimizer.zero_grad()
model.hidden = (torch.zeros(1,1,model.hidden_size),
torch.zeros(1,1,model.hidden_size))
y_pred = model(seq)
loss = criterion(y_pred, y_train)
loss.backward()
optimizer.step()
print(f'Epoch: {epoch+1:2} Loss: {loss.item():10.8f}')
print(f'\nDuration: {time.time() - start_time:.0f} seconds')
future = 12
preds = train_norm[-window_size:].tolist()
model.eval()
for i in range(future):
seq = torch.FloatTensor(preds[-window_size:])
with torch.no_grad():
model.hidden = (torch.zeros(1,1,model.hidden_size),
torch.zeros(1,1,model.hidden_size))
preds.append(model(seq).item())
preds[window_size:]
df['S4248SM144NCEN'][-12:]
true_predictions = scaler.inverse_transform(np.array(preds[window_size:]).reshape(-1, 1))
true_predictions
x = np.arange('2018-02-01', '2019-02-01', dtype='datetime64[M]').astype('datetime64[D]')
plt.figure(figsize=(12,4))
plt.title('Alcohol Sales')
plt.ylabel('Sales in million dollars')
plt.grid(True)
plt.autoscale(axis='x',tight=True)
plt.plot(df['S4248SM144NCEN'], color='#8000ff')
plt.plot(x,true_predictions, color='#ff8000')
plt.show()
fig = plt.figure(figsize=(12,4))
plt.title('Alcohol Sales')
plt.ylabel('Sales in million dollars')
plt.grid(True)
plt.autoscale(axis='x',tight=True)
fig.autofmt_xdate()
plt.plot(df['S4248SM144NCEN']['2017-01-01':], color='#8000ff')
plt.plot(x,true_predictions, color='#ff8000')
plt.show()