In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error
import tensorflow as tf
from keras import Model
from keras.layers import Input, Dense, Dropout
from keras.layers import LSTM
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('merged_data.csv')
df.head()
Out[4]:
| Date | Price | Open | High | Low | Vol. | Change % | |
|---|---|---|---|---|---|---|---|
| 0 | 2000-08-30 | 273.899994 | 273.899994 | 273.899994 | 273.899994 | 0 | 0 |
| 1 | 2000-08-31 | 278.299988 | 274.799988 | 278.299988 | 274.799988 | 0 | 0 |
| 2 | 2000-09-01 | 277.000000 | 277.000000 | 277.000000 | 277.000000 | 0 | 0 |
| 3 | 2000-09-05 | 275.799988 | 275.799988 | 275.799988 | 275.799988 | 2 | 0 |
| 4 | 2000-09-06 | 274.200012 | 274.200012 | 274.200012 | 274.200012 | 0 | 0 |
In [5]:
df.shape
df.info()
df.describe()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 6064 entries, 0 to 6063 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 6064 non-null object 1 Price 6064 non-null float64 2 Open 6064 non-null float64 3 High 6064 non-null float64 4 Low 6064 non-null float64 5 Vol. 6064 non-null int64 6 Change % 6064 non-null int64 dtypes: float64(4), int64(2), object(1) memory usage: 331.8+ KB
Out[5]:
| Price | Open | High | Low | Vol. | Change % | |
|---|---|---|---|---|---|---|
| count | 6064.000000 | 6064.000000 | 6064.000000 | 6064.000000 | 6064.000000 | 6064.0 |
| mean | 1150.724818 | 1150.766623 | 1156.481085 | 1144.814528 | 4262.785455 | 0.0 |
| std | 569.143985 | 569.078431 | 572.181256 | 565.977679 | 24284.937764 | 0.0 |
| min | 255.100006 | 255.000000 | 256.100006 | 255.000000 | 0.000000 | 0.0 |
| 25% | 628.274994 | 628.700012 | 630.875000 | 626.000000 | 20.000000 | 0.0 |
| 50% | 1231.199951 | 1231.049988 | 1237.450012 | 1226.049988 | 105.000000 | 0.0 |
| 75% | 1605.024963 | 1604.999969 | 1614.274994 | 1592.724976 | 395.000000 | 0.0 |
| max | 2788.500000 | 2774.600098 | 2789.000000 | 2774.600098 | 386334.000000 | 0.0 |
In [6]:
# 特征子集选择
# 由于我们不会使Change %特征来预测价格,我们将删除这两个特征:
df.drop(['Vol.', 'Change %'], axis=1, inplace=True)
In [7]:
# 日期特征以对象的形式存储在数据帧中。为了提高计算速度,我们将其数据类型转换为datetime,然后按升序对该特征进行排序:
df['Date'] = pd.to_datetime(df['Date'])
df.sort_values(by='Date', ascending=True, inplace=True)
df.reset_index(drop=True, inplace=True)
In [8]:
# “,”符号在数据集中是冗余的。首先,我们将其从整个数据集中移除,然后将数值变量的数据类型更改为float:
NumCols = df.columns.drop(['Date'])
df[NumCols] = df[NumCols].replace({',': ''}, regex=True)
df[NumCols] = df[NumCols].astype('float64')
df.head()
# 统计重复值情况
df.duplicated().sum()
# 统计缺失值情况
df.isnull().sum().sum()
Out[8]:
0
In [9]:
# 可视化黄金价格历史数据
# 互动黄金价格图表:
fig = px.line(y=df.Price, x=df.Date)
fig.update_traces(line_color='black')
fig.update_layout(xaxis_title="Date",
yaxis_title="Scaled Price",
title={'text': "Gold Price History Data", 'y':0.95, 'x':0.5, 'xanchor':'center', 'yanchor':'top'},
plot_bgcolor='rgba(255,223,0,0.8)')
In [10]:
# 将数据分割为训练集和测试集
# 由于我们不能对时间序列数据中的未来数据进行训练,所以我们不应该对时间序列数据进行随机分割。
# 在时间序列分割中,测试集总是晚于训练集。我们将最后一年的时间用于测试,其他时间用于培训:
test_size = df[df.Date.dt.year==2022].shape[0]
print(test_size)
# 黄金价格训练和测试集
plt.figure(figsize=(15, 6), dpi=150)
plt.rcParams['axes.facecolor'] = 'yellow'
plt.rc('axes',edgecolor='white')
plt.plot(df.Date[:-test_size], df.Price[:-test_size], color='black', lw=2)
plt.plot(df.Date[-test_size:], df.Price[-test_size:], color='blue', lw=2)
plt.title('Gold Price Training and Test Sets', fontsize=15)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price', fontsize=12)
plt.legend(['Training set', 'Test set'], loc='upper left', prop={'size': 15})
plt.grid(color='white')
plt.show()
251
In [11]:
# 数据缩放
# 由于我们的目标是仅根据其历史数据预测价格,我们使用MinMaxScaler缩放价格以避免密集的计算:
scaler = MinMaxScaler()
scaler.fit(df.Price.values.reshape(-1,1))
Out[11]:
MinMaxScaler()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
MinMaxScaler()
In [12]:
# 重构数据并创建滑动窗口
# 利用前一个时间步长来预测为滑动窗口。
# 这样,时间序列数据就可以表示为监督学习。
# 我们可以通过使用前一个时间步骤作为输入变量间步骤作为输出变量来做到这一点。
# 前一个时间步长的数量称为窗口宽度。这里我们将窗口宽度设置为60。
# 因此,X_train和X_test将是包含60个时间戳价格的嵌套列
# y_train和y_test也是黄金价格列表,其中包含第二天的黄金价应X_train和X_test中的每个列表:
window_size = 60
# 训练集:
train_data = df.Price[:-test_size]
train_data = scaler.transform(train_data.values.reshape(-1,1))
X_train = []
y_train = []
for i in range(window_size, len(train_data)):
X_train.append(train_data[i-60:i, 0])
y_train.append(train_data[i, 0])
# 测试集:
test_data = df.Price[-test_size-60:]
test_data = scaler.transform(test_data.values.reshape(-1,1))
X_test = []
y_test = []
for i in range(window_size, len(test_data)):
X_test.append(test_data[i-60:i, 0])
y_test.append(test_data[i, 0])
In [13]:
#将数据转换为Numpy数组
# 现在X_train和X_test是嵌套列表(二维列表),y_train是一维列表。我们需要将它们转换为更高维度的numpy数组,这是TensorFlow在训练神经网络时接受的数据格式:
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
y_train = np.reshape(y_train, (-1,1))
y_test = np.reshape(y_test, (-1,1))
print('X_train Shape: ', X_train.shape)
print('y_train Shape: ', y_train.shape)
print('X_test Shape: ', X_test.shape)
print('y_test Shape: ', y_test.shape)
X_train Shape: (5753, 60, 1) y_train Shape: (5753, 1) X_test Shape: (251, 60, 1) y_test Shape: (251, 1)
In [14]:
# 创建LSTM网络
# 我们建立了一个LS归神经网络,旨在解决梯度消失问题:
# 模型定义:
def define_model():
input1 = Input(shape=(window_size,1))
x = LSTM(units = 64, return_sequences=True)(input1)
x = Dropout(0.2)(x)
x = LSTM(units = 64, return_sequences=True)(x)
x = Dropout(0.2)(x)
x = LSTM(units = 64)(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='softmax')(x)
dnn_output = Dense(1)(x)
model = Model(inputs=input1, outputs=[dnn_output])
model.compile(loss='mean_squared_error', optimizer='Nadam')
model.summary()
return model
# 模型训练:
model = define_model()
history = model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.1, verbose=1)
# 模型评价-接下来,我们使用MAPE(平均绝对百分比误差)度量来评估我们的时间序列预测:
result = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)
MAPE = mean_absolute_percentage_error(y_test, y_pred)
Accuracy = 1 - MAPE
print("Test Loss:", result)
print("Test MAPE:", MAPE)
print("Test Accuracy:", Accuracy)
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 60, 1)] 0
lstm (LSTM) (None, 60, 64) 16896
dropout (Dropout) (None, 60, 64) 0
lstm_1 (LSTM) (None, 60, 64) 33024
dropout_1 (Dropout) (None, 60, 64) 0
lstm_2 (LSTM) (None, 64) 33024
dropout_2 (Dropout) (None, 64) 0
dense (Dense) (None, 32) 2080
dense_1 (Dense) (None, 1) 33
=================================================================
Total params: 85057 (332.25 KB)
Trainable params: 85057 (332.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/150
162/162 [==============================] - 18s 65ms/step - loss: 0.0106 - val_loss: 0.0088
Epoch 2/150
162/162 [==============================] - 8s 52ms/step - loss: 7.8897e-04 - val_loss: 0.0030
Epoch 3/150
162/162 [==============================] - 19s 116ms/step - loss: 5.2719e-04 - val_loss: 0.0022
Epoch 4/150
162/162 [==============================] - 19s 117ms/step - loss: 4.5727e-04 - val_loss: 0.0013
Epoch 5/150
162/162 [==============================] - 19s 120ms/step - loss: 3.9210e-04 - val_loss: 0.0012
Epoch 6/150
162/162 [==============================] - 19s 117ms/step - loss: 3.5666e-04 - val_loss: 8.6598e-04
Epoch 7/150
162/162 [==============================] - 19s 117ms/step - loss: 3.2906e-04 - val_loss: 9.2532e-04
Epoch 8/150
162/162 [==============================] - 20s 122ms/step - loss: 3.2668e-04 - val_loss: 6.5188e-04
Epoch 9/150
162/162 [==============================] - 19s 117ms/step - loss: 2.8803e-04 - val_loss: 5.3289e-04
Epoch 10/150
162/162 [==============================] - 19s 118ms/step - loss: 2.8474e-04 - val_loss: 6.1622e-04
Epoch 11/150
162/162 [==============================] - 19s 118ms/step - loss: 2.6626e-04 - val_loss: 4.7030e-04
Epoch 12/150
162/162 [==============================] - 19s 118ms/step - loss: 2.4347e-04 - val_loss: 4.7204e-04
Epoch 13/150
162/162 [==============================] - 20s 121ms/step - loss: 2.4051e-04 - val_loss: 7.5888e-04
Epoch 14/150
162/162 [==============================] - 19s 116ms/step - loss: 2.2499e-04 - val_loss: 5.3468e-04
Epoch 15/150
162/162 [==============================] - 19s 118ms/step - loss: 2.1704e-04 - val_loss: 3.3589e-04
Epoch 16/150
162/162 [==============================] - 12s 72ms/step - loss: 2.0513e-04 - val_loss: 3.1571e-04
Epoch 17/150
162/162 [==============================] - 9s 58ms/step - loss: 1.9693e-04 - val_loss: 3.2035e-04
Epoch 18/150
162/162 [==============================] - 9s 53ms/step - loss: 1.9078e-04 - val_loss: 3.2326e-04
Epoch 19/150
162/162 [==============================] - 8s 51ms/step - loss: 1.9439e-04 - val_loss: 4.5259e-04
Epoch 20/150
162/162 [==============================] - 9s 53ms/step - loss: 1.7575e-04 - val_loss: 3.0369e-04
Epoch 21/150
162/162 [==============================] - 9s 56ms/step - loss: 1.7230e-04 - val_loss: 3.4877e-04
Epoch 22/150
162/162 [==============================] - 9s 53ms/step - loss: 1.6820e-04 - val_loss: 2.4779e-04
Epoch 23/150
162/162 [==============================] - 11s 68ms/step - loss: 1.8030e-04 - val_loss: 2.5705e-04
Epoch 24/150
162/162 [==============================] - 21s 130ms/step - loss: 1.6870e-04 - val_loss: 2.4762e-04
Epoch 25/150
162/162 [==============================] - 21s 131ms/step - loss: 1.5947e-04 - val_loss: 2.0439e-04
Epoch 26/150
162/162 [==============================] - 17s 102ms/step - loss: 1.5602e-04 - val_loss: 2.1421e-04
Epoch 27/150
162/162 [==============================] - 9s 55ms/step - loss: 1.5293e-04 - val_loss: 2.4899e-04
Epoch 28/150
162/162 [==============================] - 16s 99ms/step - loss: 1.4568e-04 - val_loss: 2.1670e-04
Epoch 29/150
162/162 [==============================] - 9s 56ms/step - loss: 1.4614e-04 - val_loss: 3.2282e-04
Epoch 30/150
162/162 [==============================] - 10s 63ms/step - loss: 1.4187e-04 - val_loss: 1.9240e-04
Epoch 31/150
162/162 [==============================] - 9s 56ms/step - loss: 1.4024e-04 - val_loss: 2.1943e-04
Epoch 32/150
162/162 [==============================] - 10s 59ms/step - loss: 1.3924e-04 - val_loss: 2.2399e-04
Epoch 33/150
162/162 [==============================] - 11s 70ms/step - loss: 1.3581e-04 - val_loss: 1.8295e-04
Epoch 34/150
162/162 [==============================] - 20s 125ms/step - loss: 1.2495e-04 - val_loss: 1.6639e-04
Epoch 35/150
162/162 [==============================] - 21s 130ms/step - loss: 1.2901e-04 - val_loss: 3.4664e-04
Epoch 36/150
162/162 [==============================] - 20s 124ms/step - loss: 1.2061e-04 - val_loss: 1.8357e-04
Epoch 37/150
162/162 [==============================] - 9s 53ms/step - loss: 1.2016e-04 - val_loss: 1.5767e-04
Epoch 38/150
162/162 [==============================] - 9s 53ms/step - loss: 1.2104e-04 - val_loss: 3.2001e-04
Epoch 39/150
162/162 [==============================] - 8s 50ms/step - loss: 1.1567e-04 - val_loss: 1.7161e-04
Epoch 40/150
162/162 [==============================] - 8s 52ms/step - loss: 1.0733e-04 - val_loss: 1.6174e-04
Epoch 41/150
162/162 [==============================] - 8s 51ms/step - loss: 1.1083e-04 - val_loss: 1.7505e-04
Epoch 42/150
162/162 [==============================] - 8s 50ms/step - loss: 1.0501e-04 - val_loss: 1.4032e-04
Epoch 43/150
162/162 [==============================] - 8s 50ms/step - loss: 1.0446e-04 - val_loss: 1.4984e-04
Epoch 44/150
162/162 [==============================] - 8s 51ms/step - loss: 1.0508e-04 - val_loss: 1.4646e-04
Epoch 45/150
162/162 [==============================] - 8s 50ms/step - loss: 1.0283e-04 - val_loss: 1.3543e-04
Epoch 46/150
162/162 [==============================] - 9s 54ms/step - loss: 1.0530e-04 - val_loss: 2.0344e-04
Epoch 47/150
162/162 [==============================] - 15s 92ms/step - loss: 1.1172e-04 - val_loss: 1.5000e-04
Epoch 48/150
162/162 [==============================] - 19s 116ms/step - loss: 1.0076e-04 - val_loss: 1.2793e-04
Epoch 49/150
162/162 [==============================] - 19s 116ms/step - loss: 9.7277e-05 - val_loss: 1.8630e-04
Epoch 50/150
162/162 [==============================] - 19s 117ms/step - loss: 9.5232e-05 - val_loss: 1.2460e-04
Epoch 51/150
162/162 [==============================] - 19s 119ms/step - loss: 9.2612e-05 - val_loss: 3.2295e-04
Epoch 52/150
162/162 [==============================] - 19s 116ms/step - loss: 9.5978e-05 - val_loss: 1.4077e-04
Epoch 53/150
162/162 [==============================] - 19s 115ms/step - loss: 8.5737e-05 - val_loss: 1.3975e-04
Epoch 54/150
162/162 [==============================] - 18s 113ms/step - loss: 9.3604e-05 - val_loss: 1.1063e-04
Epoch 55/150
162/162 [==============================] - 19s 120ms/step - loss: 8.7095e-05 - val_loss: 1.6231e-04
Epoch 56/150
162/162 [==============================] - 8s 52ms/step - loss: 9.3599e-05 - val_loss: 1.2728e-04
Epoch 57/150
162/162 [==============================] - 9s 53ms/step - loss: 8.5044e-05 - val_loss: 9.9109e-05
Epoch 58/150
162/162 [==============================] - 8s 52ms/step - loss: 8.5785e-05 - val_loss: 2.5197e-04
Epoch 59/150
162/162 [==============================] - 8s 52ms/step - loss: 8.3934e-05 - val_loss: 1.1901e-04
Epoch 60/150
162/162 [==============================] - 9s 53ms/step - loss: 8.1970e-05 - val_loss: 1.0072e-04
Epoch 61/150
162/162 [==============================] - 8s 52ms/step - loss: 8.3421e-05 - val_loss: 9.8977e-05
Epoch 62/150
162/162 [==============================] - 8s 52ms/step - loss: 8.2547e-05 - val_loss: 1.0643e-04
Epoch 63/150
162/162 [==============================] - 8s 52ms/step - loss: 7.9152e-05 - val_loss: 1.0729e-04
Epoch 64/150
162/162 [==============================] - 8s 52ms/step - loss: 7.8550e-05 - val_loss: 1.0471e-04
Epoch 65/150
162/162 [==============================] - 8s 51ms/step - loss: 8.1759e-05 - val_loss: 1.2367e-04
Epoch 66/150
162/162 [==============================] - 8s 52ms/step - loss: 8.0237e-05 - val_loss: 1.2168e-04
Epoch 67/150
162/162 [==============================] - 9s 53ms/step - loss: 7.7877e-05 - val_loss: 1.2800e-04
Epoch 68/150
162/162 [==============================] - 9s 53ms/step - loss: 7.8707e-05 - val_loss: 3.8363e-04
Epoch 69/150
162/162 [==============================] - 9s 54ms/step - loss: 7.6098e-05 - val_loss: 9.7754e-05
Epoch 70/150
162/162 [==============================] - 9s 53ms/step - loss: 7.5200e-05 - val_loss: 9.7792e-05
Epoch 71/150
162/162 [==============================] - 9s 54ms/step - loss: 7.5631e-05 - val_loss: 1.1650e-04
Epoch 72/150
162/162 [==============================] - 9s 53ms/step - loss: 7.8414e-05 - val_loss: 1.2821e-04
Epoch 73/150
162/162 [==============================] - 9s 54ms/step - loss: 7.5443e-05 - val_loss: 1.5868e-04
Epoch 74/150
162/162 [==============================] - 9s 56ms/step - loss: 7.5302e-05 - val_loss: 9.0591e-05
Epoch 75/150
162/162 [==============================] - 8s 51ms/step - loss: 7.2389e-05 - val_loss: 1.0445e-04
Epoch 76/150
162/162 [==============================] - 9s 53ms/step - loss: 7.5871e-05 - val_loss: 1.4784e-04
Epoch 77/150
162/162 [==============================] - 8s 52ms/step - loss: 7.0340e-05 - val_loss: 2.7843e-04
Epoch 78/150
162/162 [==============================] - 8s 52ms/step - loss: 7.2507e-05 - val_loss: 9.0265e-05
Epoch 79/150
162/162 [==============================] - 9s 53ms/step - loss: 7.2328e-05 - val_loss: 1.6916e-04
Epoch 80/150
162/162 [==============================] - 9s 53ms/step - loss: 7.2294e-05 - val_loss: 1.0140e-04
Epoch 81/150
162/162 [==============================] - 9s 53ms/step - loss: 7.1531e-05 - val_loss: 1.2586e-04
Epoch 82/150
162/162 [==============================] - 9s 53ms/step - loss: 6.9363e-05 - val_loss: 1.0426e-04
Epoch 83/150
162/162 [==============================] - 8s 51ms/step - loss: 7.1142e-05 - val_loss: 1.1752e-04
Epoch 84/150
162/162 [==============================] - 9s 54ms/step - loss: 6.9948e-05 - val_loss: 2.3219e-04
Epoch 85/150
162/162 [==============================] - 9s 55ms/step - loss: 6.8561e-05 - val_loss: 1.1243e-04
Epoch 86/150
162/162 [==============================] - 9s 58ms/step - loss: 6.7515e-05 - val_loss: 1.1163e-04
Epoch 87/150
162/162 [==============================] - 9s 55ms/step - loss: 6.7472e-05 - val_loss: 8.7347e-05
Epoch 88/150
162/162 [==============================] - 9s 56ms/step - loss: 6.7838e-05 - val_loss: 1.2508e-04
Epoch 89/150
162/162 [==============================] - 9s 58ms/step - loss: 6.6798e-05 - val_loss: 1.7729e-04
Epoch 90/150
162/162 [==============================] - 11s 66ms/step - loss: 6.5392e-05 - val_loss: 1.0309e-04
Epoch 91/150
162/162 [==============================] - 10s 62ms/step - loss: 6.6740e-05 - val_loss: 9.1711e-05
Epoch 92/150
162/162 [==============================] - 9s 58ms/step - loss: 6.6058e-05 - val_loss: 9.4432e-05
Epoch 93/150
162/162 [==============================] - 9s 54ms/step - loss: 6.7823e-05 - val_loss: 1.5534e-04
Epoch 94/150
162/162 [==============================] - 9s 57ms/step - loss: 6.8266e-05 - val_loss: 2.6834e-04
Epoch 95/150
162/162 [==============================] - 9s 56ms/step - loss: 6.5091e-05 - val_loss: 1.6613e-04
Epoch 96/150
162/162 [==============================] - 10s 60ms/step - loss: 6.5533e-05 - val_loss: 3.0703e-04
Epoch 97/150
162/162 [==============================] - 9s 57ms/step - loss: 6.5492e-05 - val_loss: 9.0506e-05
Epoch 98/150
162/162 [==============================] - 9s 57ms/step - loss: 6.6845e-05 - val_loss: 1.0469e-04
Epoch 99/150
162/162 [==============================] - 9s 56ms/step - loss: 6.2633e-05 - val_loss: 9.5170e-05
Epoch 100/150
162/162 [==============================] - 9s 54ms/step - loss: 6.2065e-05 - val_loss: 9.7500e-05
Epoch 101/150
162/162 [==============================] - 9s 56ms/step - loss: 6.0304e-05 - val_loss: 8.6625e-05
Epoch 102/150
162/162 [==============================] - 9s 53ms/step - loss: 6.0777e-05 - val_loss: 8.5349e-05
Epoch 103/150
162/162 [==============================] - 9s 58ms/step - loss: 5.9733e-05 - val_loss: 9.3307e-05
Epoch 104/150
162/162 [==============================] - 9s 57ms/step - loss: 6.3923e-05 - val_loss: 7.3567e-05
Epoch 105/150
162/162 [==============================] - 9s 56ms/step - loss: 6.2343e-05 - val_loss: 7.7206e-05
Epoch 106/150
162/162 [==============================] - 17s 106ms/step - loss: 6.1902e-05 - val_loss: 2.2471e-04
Epoch 107/150
162/162 [==============================] - 19s 117ms/step - loss: 6.0830e-05 - val_loss: 7.1722e-05
Epoch 108/150
162/162 [==============================] - 19s 117ms/step - loss: 6.3875e-05 - val_loss: 8.6304e-05
Epoch 109/150
162/162 [==============================] - 19s 117ms/step - loss: 6.2178e-05 - val_loss: 8.8512e-05
Epoch 110/150
162/162 [==============================] - 19s 116ms/step - loss: 6.2979e-05 - val_loss: 7.9009e-05
Epoch 111/150
162/162 [==============================] - 19s 118ms/step - loss: 6.1746e-05 - val_loss: 6.8209e-05
Epoch 112/150
162/162 [==============================] - 19s 117ms/step - loss: 6.2086e-05 - val_loss: 7.5832e-05
Epoch 113/150
162/162 [==============================] - 19s 117ms/step - loss: 5.9551e-05 - val_loss: 8.6154e-05
Epoch 114/150
162/162 [==============================] - 19s 116ms/step - loss: 5.8919e-05 - val_loss: 8.4682e-05
Epoch 115/150
162/162 [==============================] - 19s 116ms/step - loss: 6.1298e-05 - val_loss: 2.0378e-04
Epoch 116/150
162/162 [==============================] - 19s 120ms/step - loss: 6.0438e-05 - val_loss: 1.0652e-04
Epoch 117/150
162/162 [==============================] - 19s 116ms/step - loss: 5.9773e-05 - val_loss: 1.0248e-04
Epoch 118/150
162/162 [==============================] - 18s 109ms/step - loss: 6.0398e-05 - val_loss: 2.2634e-04
Epoch 119/150
162/162 [==============================] - 10s 63ms/step - loss: 5.7438e-05 - val_loss: 8.5198e-05
Epoch 120/150
162/162 [==============================] - 20s 125ms/step - loss: 5.8431e-05 - val_loss: 7.4507e-05
Epoch 121/150
162/162 [==============================] - 21s 127ms/step - loss: 5.5843e-05 - val_loss: 4.5455e-04
Epoch 122/150
162/162 [==============================] - 20s 122ms/step - loss: 5.8847e-05 - val_loss: 8.9782e-05
Epoch 123/150
162/162 [==============================] - 20s 125ms/step - loss: 5.6163e-05 - val_loss: 8.4558e-05
Epoch 124/150
162/162 [==============================] - 21s 128ms/step - loss: 5.6389e-05 - val_loss: 1.1555e-04
Epoch 125/150
162/162 [==============================] - 21s 127ms/step - loss: 5.7521e-05 - val_loss: 8.0397e-05
Epoch 126/150
162/162 [==============================] - 20s 125ms/step - loss: 5.7754e-05 - val_loss: 1.4261e-04
Epoch 127/150
162/162 [==============================] - 21s 127ms/step - loss: 5.7707e-05 - val_loss: 7.5376e-05
Epoch 128/150
162/162 [==============================] - 14s 89ms/step - loss: 5.5630e-05 - val_loss: 6.8344e-05
Epoch 129/150
162/162 [==============================] - 8s 51ms/step - loss: 5.7338e-05 - val_loss: 1.3198e-04
Epoch 130/150
162/162 [==============================] - 17s 106ms/step - loss: 5.5127e-05 - val_loss: 7.6997e-05
Epoch 131/150
162/162 [==============================] - 19s 115ms/step - loss: 5.7834e-05 - val_loss: 6.6510e-05
Epoch 132/150
162/162 [==============================] - 19s 116ms/step - loss: 5.5019e-05 - val_loss: 8.3544e-05
Epoch 133/150
162/162 [==============================] - 19s 117ms/step - loss: 5.6976e-05 - val_loss: 7.3942e-05
Epoch 134/150
162/162 [==============================] - 19s 116ms/step - loss: 5.4899e-05 - val_loss: 1.0027e-04
Epoch 135/150
162/162 [==============================] - 19s 116ms/step - loss: 5.6005e-05 - val_loss: 1.0558e-04
Epoch 136/150
162/162 [==============================] - 19s 116ms/step - loss: 5.5624e-05 - val_loss: 8.0709e-05
Epoch 137/150
162/162 [==============================] - 13s 83ms/step - loss: 5.5474e-05 - val_loss: 7.9278e-05
Epoch 138/150
162/162 [==============================] - 8s 52ms/step - loss: 5.4928e-05 - val_loss: 7.7744e-05
Epoch 139/150
162/162 [==============================] - 8s 52ms/step - loss: 5.6508e-05 - val_loss: 9.0065e-05
Epoch 140/150
162/162 [==============================] - 9s 54ms/step - loss: 5.5559e-05 - val_loss: 1.1390e-04
Epoch 141/150
162/162 [==============================] - 9s 53ms/step - loss: 5.2906e-05 - val_loss: 8.1642e-05
Epoch 142/150
162/162 [==============================] - 9s 53ms/step - loss: 5.3843e-05 - val_loss: 7.0006e-05
Epoch 143/150
162/162 [==============================] - 8s 52ms/step - loss: 5.6863e-05 - val_loss: 1.2550e-04
Epoch 144/150
162/162 [==============================] - 8s 52ms/step - loss: 5.2277e-05 - val_loss: 1.1869e-04
Epoch 145/150
162/162 [==============================] - 8s 52ms/step - loss: 5.2586e-05 - val_loss: 7.4703e-05
Epoch 146/150
162/162 [==============================] - 8s 51ms/step - loss: 5.4492e-05 - val_loss: 9.7432e-05
Epoch 147/150
162/162 [==============================] - 8s 52ms/step - loss: 5.2374e-05 - val_loss: 7.8251e-05
Epoch 148/150
162/162 [==============================] - 9s 53ms/step - loss: 5.3602e-05 - val_loss: 6.7243e-05
Epoch 149/150
162/162 [==============================] - 17s 107ms/step - loss: 5.2564e-05 - val_loss: 6.9065e-05
Epoch 150/150
162/162 [==============================] - 14s 89ms/step - loss: 5.4822e-05 - val_loss: 8.2309e-05
8/8 [==============================] - 2s 23ms/step - loss: 0.0232
8/8 [==============================] - 2s 19ms/step
Test Loss: 0.023154638707637787
Test MAPE: 0.14413526581705413
Test Accuracy: 0.8558647341829458
In [15]:
# 可视化结果
# 将实际和预测的Price值返回到它们的原始刻度:
y_test_true = scaler.inverse_transform(y_test)
y_test_pred = scaler.inverse_transform(y_pred)
# 调查模型预测的价格与实际价格的接近程度:
plt.figure(figsize=(15, 6), dpi=150)
plt.rcParams['axes.facecolor'] = 'yellow'
plt.rc('axes',edgecolor='white')
plt.plot(df['Date'].iloc[:-test_size], scaler.inverse_transform(train_data), color='black', lw=2)
plt.plot(df['Date'].iloc[-test_size:], y_test_true, color='blue', lw=2)
plt.plot(df['Date'].iloc[-test_size:], y_test_pred, color='red', lw=2)
plt.title('Model Performance on Gold Price Prediction', fontsize=15)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price', fontsize=12)
plt.legend(['Training Data', 'Actual Test Data', 'Predicted Test Data'], loc='upper left', prop={'size': 15})
plt.grid(color='white')
plt.show()
In [16]:
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, r2_score, explained_variance_score
# === 结果评价 ===
MAPE = mean_absolute_percentage_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = np.mean(np.abs(y_test - y_pred))
R2 = r2_score(y_test, y_pred)
SMAPE = np.mean(2 * np.abs(y_test - y_pred) / (np.abs(y_test) + np.abs(y_pred))) * 100
EVS = explained_variance_score(y_test, y_pred)
print(f"MAPE: {MAPE * 100:.2f}%")
print(f"RMSE: {RMSE:.2f}")
print(f"MAE: {MAE:.2f}")
print(f"R² Score: {R2:.2f}")
print(f"SMAPE: {SMAPE:.2f}%")
print(f"Explained Variance Score: {EVS:.2f}")
MAPE: 14.41% RMSE: 0.15 MAE: 0.12 R² Score: -1.89 SMAPE: 16.08% Explained Variance Score: 0.02
In [17]:
import datetime
# 设置预测到2024年末的时间范围
last_date = df['Date'].iloc[-1]
end_date = datetime.datetime(2024, 12, 31)
# 初始化输入序列
future_predictions = []
current_input = test_data[-window_size:] # 使用测试集最后的窗口作为初始输入
# 逐步预测未来时间点
while last_date < end_date:
# 预测下一个时间步
current_pred = model.predict(current_input.reshape(1, window_size, 1))
future_predictions.append(current_pred[0, 0])
# 更新输入数据:去掉第一个值,添加新的预测值
current_input = np.append(current_input[1:], current_pred, axis=0)
# 更新日期
last_date += datetime.timedelta(days=1) # 假设每天都有数据
# 将预测结果反向缩放回原始价格范围
future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))
# 合并预测结果与日期
future_dates = pd.date_range(start=df['Date'].iloc[-1] + datetime.timedelta(days=1),
end=end_date)
future_df = pd.DataFrame({
'Date': future_dates,
'Predicted Price': future_predictions.flatten()
})
# 可视化历史数据和预测结果
plt.figure(figsize=(15, 6))
plt.plot(df['Date'], df['Price'], label='Historical Prices', color='black')
plt.plot(future_df['Date'], future_df['Predicted Price'], label='Predicted Prices', color='blue')
plt.xlabel('Date')
plt.ylabel('Gold Price')
plt.title('Gold Price Prediction till 2024')
plt.legend()
plt.grid()
plt.show()
1/1 [==============================] - 0s 27ms/step 1/1 [==============================] - 0s 33ms/step 1/1 [==============================] - 0s 34ms/step 1/1 [==============================] - 0s 33ms/step 1/1 [==============================] - 0s 46ms/step 1/1 [==============================] - 0s 41ms/step 1/1 [==============================] - 0s 34ms/step 1/1 [==============================] - 0s 33ms/step 1/1 [==============================] - 0s 27ms/step 1/1 [==============================] - 0s 34ms/step 1/1 [==============================] - 0s 34ms/step 1/1 [==============================] - 0s 24ms/step 1/1 [==============================] - 0s 32ms/step 1/1 [==============================] - 0s 25ms/step 1/1 [==============================] - 0s 38ms/step 1/1 [==============================] - 0s 28ms/step 1/1 [==============================] - 0s 32ms/step 1/1 [==============================] - 0s 24ms/step 1/1 [==============================] - 0s 32ms/step 1/1 [==============================] - 0s 40ms/step 1/1 [==============================] - 0s 25ms/step 1/1 [==============================] - 0s 27ms/step 1/1 [==============================] - 0s 29ms/step 1/1 [==============================] - 0s 43ms/step 1/1 [==============================] - 0s 33ms/step 1/1 [==============================] - 0s 33ms/step 1/1 [==============================] - 0s 28ms/step 1/1 [==============================] - 0s 28ms/step 1/1 [==============================] - 0s 23ms/step 1/1 [==============================] - 0s 33ms/step 1/1 [==============================] - 0s 43ms/step 1/1 [==============================] - 0s 24ms/step 1/1 [==============================] - 0s 31ms/step 1/1 [==============================] - 0s 55ms/step 1/1 [==============================] - 0s 50ms/step 1/1 [==============================] - 0s 32ms/step 1/1 [==============================] - 0s 34ms/step 1/1 [==============================] - 0s 34ms/step 1/1 [==============================] - 0s 26ms/step 1/1 [==============================] - 0s 24ms/step 1/1 [==============================] - 0s 41ms/step 1/1 [==============================] - 0s 64ms/step 1/1 [==============================] - 0s 47ms/step 1/1 [==============================] - 0s 40ms/step 1/1 [==============================] - 0s 33ms/step 1/1 [==============================] - 0s 36ms/step 1/1 [==============================] - 0s 25ms/step 1/1 [==============================] - 0s 40ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 28ms/step 1/1 [==============================] - 0s 31ms/step 1/1 [==============================] - 0s 39ms/step 1/1 [==============================] - 0s 47ms/step 1/1 [==============================] - 0s 26ms/step 1/1 [==============================] - 0s 34ms/step 1/1 [==============================] - 0s 41ms/step 1/1 [==============================] - 0s 24ms/step 1/1 [==============================] - 0s 31ms/step 1/1 [==============================] - 0s 41ms/step 1/1 [==============================] - 0s 40ms/step 1/1 [==============================] - 0s 32ms/step 1/1 [==============================] - 0s 30ms/step
In [18]:
import datetime
import plotly.graph_objects as go
# 设置预测到2024-10-12的时间范围
forecast_end_date = datetime.datetime(2024, 10, 12)
last_date = df['Date'].iloc[-1]
# 初始化输入序列
current_input = test_data[-window_size:] # 使用测试集最后的窗口作为初始输入
future_predictions = []
future_dates = []
# 滚动预测
while last_date < forecast_end_date:
# 预测下一时间步
current_pred = model.predict(current_input.reshape(1, window_size, 1))
future_predictions.append(current_pred[0, 0])
# 更新输入数据
current_input = np.append(current_input[1:], current_pred, axis=0)
# 更新日期
last_date += datetime.timedelta(days=1)
future_dates.append(last_date)
# 将预测结果反向缩放回原始价格范围
future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1)).flatten()
# 合并历史和预测数据
full_dates = pd.concat([df['Date'], pd.Series(future_dates)], ignore_index=True)
full_prices = pd.concat([df['Price'], pd.Series(future_predictions)], ignore_index=True)
# 绘制交互式图表
fig = go.Figure()
# 添加历史价格线
fig.add_trace(go.Scatter(x=df['Date'], y=df['Price'], mode='lines', name='Historical Prices', line=dict(color='black')))
# 添加预测价格线
fig.add_trace(go.Scatter(x=future_dates, y=future_predictions, mode='lines', name='Predicted Prices', line=dict(color='blue', dash='dash')))
# 图表设置
fig.update_layout(
title='Gold Price Prediction till 2024-10-12',
xaxis_title='Date',
yaxis_title='Gold Price',
template='plotly_white',
plot_bgcolor='rgba(0,0,0,0)'
)
fig.show()
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[18], line 27 24 future_dates.append(last_date) 26 # 将预测结果反向缩放回原始价格范围 ---> 27 future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1)).flatten() 29 # 合并历史和预测数据 30 full_dates = pd.concat([df['Date'], pd.Series(future_dates)], ignore_index=True) File d:\Python310\lib\site-packages\sklearn\preprocessing\_data.py:544, in MinMaxScaler.inverse_transform(self, X) 530 """Undo the scaling of X according to feature_range. 531 532 Parameters (...) 540 Transformed data. 541 """ 542 check_is_fitted(self) --> 544 X = check_array( 545 X, copy=self.copy, dtype=FLOAT_DTYPES, force_all_finite="allow-nan" 546 ) 548 X -= self.min_ 549 X /= self.scale_ File d:\Python310\lib\site-packages\sklearn\utils\validation.py:967, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name) 965 n_samples = _num_samples(array) 966 if n_samples < ensure_min_samples: --> 967 raise ValueError( 968 "Found array with %d sample(s) (shape=%s) while a" 969 " minimum of %d is required%s." 970 % (n_samples, array.shape, ensure_min_samples, context) 971 ) 973 if ensure_min_features > 0 and array.ndim == 2: 974 n_features = array.shape[1] ValueError: Found array with 0 sample(s) (shape=(0, 1)) while a minimum of 1 is required.
In [ ]:
In [ ]: