In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
df = pd.read_csv('BABA.csv')
In [2]:
# 1. 检查数据是否平稳。
from statsmodels.tsa.stattools import adfuller
def check_stationary(df):
result = adfuller(df)
print('ADF Statistic: %f' % result[0])
print('p-vslue: %f'% result[1])
if result[1] <=0.5:
print('reject the null hypothesis. Data is stationary')
else:
print('Fail to reject the null hypothesis. Data is not stationary')
check_stationary(df['Adj Close'])
# 观测数据不是平稳的。现在选择SARIMA模型进行时间序列分析。
ADF Statistic: -0.912328 p-vslue: 0.783886 Fail to reject the null hypothesis. Data is not stationary
In [3]:
# 价格变动概览
from statsmodels.tsa.seasonal import seasonal_decompose
decompose = seasonal_decompose(df['Adj Close'], model='additive', period=30)
decompose.plot()
# 正如我们所看到的,股票的趋势是下降趋势。
# 股票具有季节性。库存中存在负剩余。
Out[3]:
In [4]:
# 求P值的差分模型
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
import matplotlib.pyplot as plt
fig, axes = plt.subplots(3, 2, sharex=True)
axes[0, 0].plot(df['Adj Close']); axes[0, 0].set_title('Original Series')
plot_acf(df['Adj Close'], ax=axes[0, 1])
# 一阶差分
axes[1, 0].plot(df['Adj Close'].diff()); axes[1, 0].set_title('1st Order Differencing')
plot_acf(df['Adj Close'].diff().dropna(), ax=axes[1, 1])
# 二阶差分
axes[2, 0].plot(df['Adj Close'].diff().diff()); axes[2, 0].set_title('2nd Order Differencing')
plot_acf(df['Adj Close'].diff().diff().dropna(), ax=axes[2, 1])
plt.show
# 差分法显示P = 1的值
Out[4]:
<function matplotlib.pyplot.show(close=None, block=None)>
In [5]:
# 利用自相关得到d值
pd.plotting.autocorrelation_plot(df['Adj Close'])
plot_acf(df['Adj Close'], alpha=0.05)
from statsmodels.tsa.stattools import acf
x_acf = pd.DataFrame(acf(df['Adj Close']))
In [6]:
# 用部分自相关法求q值
plot_pacf(df['Adj Close'], lags=20,alpha=0.05)
# D的值为3
# P的值为1
# Q的值是2
Out[6]:
In [7]:
import statsmodels.api as sm
p , d, q = 1,3,2
model = sm.tsa.statespace.SARIMAX(df['Adj Close'],
order=(p,d,q),
seasonal_order=(p,d,q,3))
model = model.fit()
print(model.summary())
SARIMAX Results
=========================================================================================
Dep. Variable: Adj Close No. Observations: 1089
Model: SARIMAX(1, 3, 2)x(1, 3, 2, 3) Log Likelihood -3731.533
Date: Mon, 23 Sep 2024 AIC 7477.066
Time: 12:36:56 BIC 7511.940
Sample: 0 HQIC 7490.273
- 1089
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.0452 0.025 -1.787 0.074 -0.095 0.004
ma.L1 -1.9951 0.057 -34.705 0.000 -2.108 -1.882
ma.L2 0.9978 0.058 17.328 0.000 0.885 1.111
ar.S.L3 -0.9855 0.013 -73.051 0.000 -1.012 -0.959
ma.S.L3 -0.6183 0.071 -8.660 0.000 -0.758 -0.478
ma.S.L6 -0.3742 0.044 -8.480 0.000 -0.461 -0.288
sigma2 33.1632 2.235 14.840 0.000 28.783 37.543
===================================================================================
Ljung-Box (L1) (Q): 0.98 Jarque-Bera (JB): 278.46
Prob(Q): 0.32 Prob(JB): 0.00
Heteroskedasticity (H): 0.15 Skew: -0.00
Prob(H) (two-sided): 0.00 Kurtosis: 5.49
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
D:\Python310\lib\site-packages\statsmodels\base\model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
In [11]:
predictions = model.predict(start=len(df['Adj Close']),end=len(df['Adj Close'])+10)
# 预测结果可视化
plt.figure(figsize=(15,10))
plt.plot(df['Adj Close'], label='Actual')
plt.plot(predictions, color='red', label='Predicted')
plt.xlabel('Adj Close')
plt.title('Ali Baba Stock Forecaste')
plt.legend(loc='upper left')
plt.show()
In [ ]: