import pandas as pd import torch import torch.nn as nn from sklearn.preprocessing import StandardScaler, MinMaxScaler from sklearn.model_selection import train_test_split import numpy as np import datetime as dt import matplotlib.pyplot as plt
x = [dt.datetime.date(d) for d in df.index] fig = plt.figure(figsize=(10,5)) plt.title('Temperature Changing with Time') plt.ylabel('Temperature/K') plt.grid(True) plt.plot(x[:], df.tave[:], "b-",label="Temperature") plt.legend()
温度变化
对数据进行处理,分割为train和test:
def train_test(df, test_periods): train = df[:-test_periods].tave test = df[-test_periods:].tave return train, test test_periods = 30 train, test = train_test(df, test_periods) train = train.to_frame() test = test.to_frame()
Original dimensions : torch.Size([14212, 1]) Correct dimensions : torch.Size([14212])
在深度学习模型中,我们需要的配对的X和Y,用下面的代码生成匹配的X和Y:
def get_x_y_pairs(train_scaled, train_periods, prediction_periods): """ train_scaled - training sequence train_periods - How many data points to use as inputs prediction_periods - How many periods to ouput as predictions """ x_train = [train_scaled[i:i+train_periods] for i in range(len(train_scaled)-train_periods-prediction_periods)] y_train = [train_scaled[i+train_periods:i+train_periods+prediction_periods] for i in range(len(train_scaled)-train_periods-prediction_periods)]
#-- use the stack function to convert the list of 1D tensors # into a 2D tensor where each element of the list is now a row x_train = torch.stack(x_train) y_train = torch.stack(y_train)
return x_train, y_train
train_periods = 60 #-- number of quarters for input prediction_periods = test_periods # 30 x_train, y_train = get_x_y_pairs(train_scaled, train_periods, prediction_periods) print(x_train.shape) print(y_train.shape)
torch.Size([14122, 60]) torch.Size([14122, 30])
模型
LSTM的原理网上有很多,可以随便找个看看,用下面的代码就可以定义一个LSTM模型:
class LSTM(nn.Module): """ input_size - will be 1 in this example since we have only 1 predictor (a sequence of previous values) hidden_size - Can be chosen to dictate how much hidden "long term memory" the network will have output_size - This will be equal to the prediciton_periods input to get_x_y_pairs """ def __init__(self, input_size, hidden_size, output_size,device='cuda'): super(LSTM, self).__init__() self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size, hidden_size)
self.linear = nn.Linear(hidden_size, output_size)
def forward(self, x, hidden=None): if hidden==None: self.hidden = (torch.zeros(1,1,self.hidden_size).to('cuda'), torch.zeros(1,1,self.hidden_size).to('cuda'))
""" inputs need to be in the right shape as defined in documentation - https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html
lstm_out - will contain the hidden states from all times in the sequence self.hidden - will contain the current hidden state and cell state """ lstm_out, self.hidden = self.lstm(x.view(len(x),1,-1), self.hidden)
model = LSTM(input_size=1, hidden_size=50, output_size=test_periods,device='cuda') model = model.to('cuda') criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=0.001)
迭代训练
epochs = 10 model.train() for epoch in range(epochs+1): for x,y in zip(x_train, y_train): x = x.to('cuda') y = y.to('cuda') # print(x.shape) # print(y.shape) y_hat, _ = model(x, None) optimizer.zero_grad() loss = criterion(y_hat, y) loss.backward() optimizer.step()
# # if epoch%100==0: print(f'epoch: {epoch:4} loss:{loss.item():10.8f}')