In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
plt.rcParams['font.sans-serif'] = ['SimHei'] #解决中文显示
plt.rcParams['axes.unicode_minus'] = False #解决符号无法显示
data = pd.read_csv('长江水质污染.csv')
data
Out[1]:
| 年份 | 排污总量 | |
|---|---|---|
| 0 | 1995 | 174.0 |
| 1 | 1996 | 179.0 |
| 2 | 1997 | 183.0 |
| 3 | 1998 | 189.0 |
| 4 | 1999 | 207.0 |
| 5 | 2000 | 234.0 |
| 6 | 2001 | 220.5 |
| 7 | 2002 | 256.0 |
| 8 | 2003 | 270.0 |
| 9 | 2004 | 285.0 |
In [4]:
#准备数据
plt.figure(figsize=(8,5))
y = data.groupby('年份')['排污总量'].sum().values.tolist()
x =range(10)
x_ticks = [f'{i}年' for i in data.groupby('年份')['排污总量'].sum().index.to_list()]
plt.xticks(x,x_ticks)
#添加标题
plt.title('各年份的排污总量')
plt.xlabel('年份')
plt.ylabel('排污总量')
plt.plot(x,y,marker='o',color='g')
for a,b in zip(data.groupby('年份')['排污总量'].sum().reset_index(drop=True).index,data.groupby('年份')['排污总量'].sum().reset_index(drop=True).values):
plt.text(a,b,'%d'%b,ha='center',va='bottom',fontsize=14,fontdict={'color':'red'})
plt.show()
In [5]:
# 原始序列x_0
x_0 = np.array(data["排污总量"].to_list())
# 计算一次累加生成序列x_1
x_1 = x_0.cumsum()
# 作图可视化
plt.figure(figsize=(8,5))
x_ticks = [f'{i}年' for i in data.groupby('年份')['排污总量'].sum().index.to_list()]
plt.xticks(x,x_ticks)
plt.xlabel('年份')
plt.ylabel('排污总量')
# 绘制x_1线图
plt.plot(range(10),x_1,marker='o',color='g',label='X(1)')
# 给图添加数值
for a,b in zip(data.groupby('年份')['排污总量'].sum().reset_index(drop=True).index,x_1):
plt.text(a,b,'%d'%b,ha='center',va='bottom',fontsize=14,fontdict={'color':'green'})
# 绘制x_0线图
plt.plot(x,y,marker='X',color='r',label='X(0)')
# 给图添加数值
for a,b in zip(data.groupby('年份')['排污总量'].sum().reset_index(drop=True).index,data.groupby('年份')['排污总量'].sum().reset_index(drop=True).values):
plt.text(a,b,'%d'%b,ha='center',va='bottom',fontsize=14,fontdict={'color':'red'})
# 添加标签
plt.legend()
plt.show()
In [9]:
import math as mt
def grade_ratio_test(x_0):
X0 = x_0
for i in range(len(X0)-1):
l = X0[i]/X0[i+1]
if l <= mt.exp(-2/(len(X0)+1)) or l >= mt.exp(2/(len(X0)+1)):
break
else:
pass
if i == len(X0)-2 and l > mt.exp(-2/(len(X0)+1)) and l < mt.exp(2/(len(X0)+1)):
print('级比检验通过!')
else:
print('级比检验不通过!')
grade_ratio_test(x_0)
级比检验通过!
In [11]:
# 1.计算一次累加生成序列x_1
x_1 = x_0.cumsum()
# 2.计算均值生成序列z_1
z_1 = (x_1[:-1] + x_1[1:]) / 2.0
# 3.计算B矩阵
B = np.vstack([-z_1, np.ones(len(x_0)-1)]).T
# 4.计算Y矩阵
Y = x_0[1:].reshape((-1, 1))
# 5.计算a,b
# a为发展系数 b为灰色作用量
[[a], [b]] = np.linalg.inv(B.T @ B) @ B.T @ Y # 计算参数
# 假设预测未来5年
x_1_predict = []
n = len(x_0)
k = 5
for i in range(n+k): # 如果预测k个未来年份 这里就n+k 假设预测未来5年就n+5
x_1_predict.append((x_0[0]-b/a)*np.exp(-a*i) + b/a) # 递推计算 第k+1个数 比如k=0的时候 就是第一个预测值
x_1_predict
# 7.还原数据
x_0_predict = np.hstack([x_0[0],np.diff(x_1_predict)])
x_0_predict
Out[11]:
array([174. , 172.80895647, 183.93550596, 195.77845411,
208.38392724, 221.80102163, 236.08199464, 251.28246834,
267.46164606, 284.68254307, 303.01223193, 322.52210378,
343.28814636, 365.39124002, 388.91747268])
In [12]:
import pandas as pd
result = pd.DataFrame({"原始数据":x_0,"预测数据":x_0_predict[:-k]})
# 残差:真实值 - 预测值
result["残差"] = result["原始数据"] - result["预测数据"]
# 相对误差
result["相对误差"] = (abs(result["原始数据"] - result["预测数据"]) / result["原始数据"]).map('{:.2%}'.format)
# 级比偏差
lambda_x = x_0[:-1] / x_0[1:]
result["级比偏差值"] = np.append(np.nan, abs(1-(1-0.5*a)/(1+0.5*a)*lambda_x))
result
Out[12]:
| 原始数据 | 预测数据 | 残差 | 相对误差 | 级比偏差值 | |
|---|---|---|---|---|---|
| 0 | 174.0 | 174.000000 | 0.000000 | 0.00% | NaN |
| 1 | 179.0 | 172.808956 | 6.191044 | 3.46% | 0.034676 |
| 2 | 183.0 | 183.935506 | -0.935506 | 0.51% | 0.041142 |
| 3 | 189.0 | 195.778454 | -6.778454 | 3.59% | 0.030617 |
| 4 | 207.0 | 208.383927 | -1.383927 | 0.67% | 0.028149 |
| 5 | 234.0 | 221.801022 | 12.198978 | 5.21% | 0.058408 |
| 6 | 220.5 | 236.081995 | -15.581995 | 7.07% | 0.129576 |
| 7 | 256.0 | 251.282468 | 4.717532 | 1.84% | 0.083195 |
| 8 | 270.0 | 267.461646 | 2.538354 | 0.94% | 0.009216 |
| 9 | 285.0 | 284.682543 | 0.317457 | 0.11% | 0.008387 |
In [13]:
result = result.set_index(data.年份)
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# 设定 seaborn 风格
sns.set()
#用 matplotlib 画出每个序列的折线
plt.figure(figsize=(10,6))
plt.plot(result['原始数据'], label='Original data',marker='o',color='g')
plt.plot(result['预测数据'], label='Predicted data',marker='X',color='r')
# 设定图例和标题
plt.legend()
plt.title('Comparison of Original Data and Predicted Data')
# 显示图表
plt.show()
In [20]:
year = data["年份"].tolist()
for i in range(k):
year.append(year[-1]+1)
x_0_predict_more = pd.DataFrame({"未来预测":x_0_predict,"年份":year})
x_0_predict_more = x_0_predict_more.set_index("年份")
x_0_predict_more.iloc[0:n-1,:] = np.nan
#用 matplotlib 画出每个序列的折线
plt.figure(figsize=(10,6))
plt.plot(result['原始数据'], label='Original data',marker='o',color='g')
plt.plot(result['预测数据'], label='Predicted data',marker='X',color='r')
plt.plot(x_0_predict_more['未来预测'], label='Predicted Future data',marker='1',color='b',linestyle='--')
# 设定图例和标题
plt.legend()
plt.title('Comparison of Original Data and Predicted Data')
# 显示图表
plt.show()
In [ ]: