from __future__ import print_function
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA

"""
ARIMA模型Python实现

ARIMA模型基本假设:
    1.数据平稳性
    2.白噪声同方差
    3.数据无周期性

参考文献:
    https://www.cnblogs.com/junge-mike/p/9335054.html
    https://support.minitab.com/zh-cn/minitab/18/help-and-how-to/modeling-statistics/time-series/how-to/partial-autocorrelation/interpret-the-results/partial-autocorrelation-function-pacf/
    https://support.minitab.com/zh-cn/minitab/18/help-and-how-to/modeling-statistics/time-series/how-to/autocorrelation/interpret-the-results/autocorrelation-function-acf/
"""

####1.载入数据
#载入为dataframe格式,data列为序列值,sdate列为日期
df = pd.read_csv(r"C:\Users\ld\Desktop\yc18\train1.csv",encoding="cp936")
dta=pd.Series(df["data"])
dta.index = pd.Index(df["sdate"])


####2.差分
#对原始数据进行差分,并绘制差分后的折线图观察平稳性
fig = plt.figure(figsize=(12,8))
ax2= fig.add_subplot(111)
diffs = dta.diff(3)
diffs.plot(ax=ax2)
diffs = diffs.dropna()
plt.show()

####3.ADF单位根检验
#观察差分图像判断数据平稳性的方式有一定主观性,因此进一步采用.ADF单位根检验;确定d值(差分阶数)
from statsmodels.tsa.stattools import adfuller
result = adfuller(diffs)
print(u'差分序列的ADF平稳性检验结果为:',result)

####4.自相关图和偏自相关图
#在差分满足数据平稳性要求后,通过绘制相关图和偏自相关图确定最优p,q值
#判断方法见:https://mp.csdn.net/mp_blog/creation/editor/122997522
fig = plt.figure(figsize=(12,8))
ax1=fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(diffs,lags=30,ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(diffs,lags=30,ax=ax2)
plt.show()


####5.对p,q进行定阶
#同样,通过偏/自相关图判断最优p,q值的方式有些主观,下面采样bic方式获取最优p,q值
pmax = int(len(diffs) / 10)    #一般阶数不超过 length /10
qmax = int(len(diffs) / 10)
bic = []
for i in range(pmax +1):
    item= []
    for j in range(qmax+1):
        try:
            item.append(int(ARIMA(diffs, (i, 1, j)).fit().bic))
        except:
            item.append(None)
        bic .append(item)

bic = pd.DataFrame(bic ) 
bic .fillna(bic .max(),inplace=True)
print("bic :",bic )
p,q = bic .stack().idxmin()
print('best p = %s , q = %s' %(i,j))  


####6.白噪声检验
#对白噪声的平稳性进行检验
from statsmodels.stats.diagnostic import acorr_ljungbox
print(u'白噪声检验:', acorr_ljungbox( diffs, lags=1))


####7.周期性检验
#对数据的趋势性周期性进行检验
import statsmodels.api as sm
dfs = pd.read_csv(r"C:\Users\ld\Desktop\yc18\train1.csv",encoding="cp936")[["sdate","data"]]
dfs['sdate'] = pd.to_datetime(dfs['sdate'])
dfs = dfs.set_index(["sdate"])
#data指你自己的时序数据,model='additive'代表是加法模式,multiplicative乘法模型
#extrapolate_trend='freq'表示trend 、resid频率会从最近点开始,并且会对最近点的缺失值进行填充
#更多参数设置请参考官方文档:https://www.statsmodels.org/stable/generated/statsmodels.tsa.seasonal.seasonal_decompose.html
decomposition = sm.tsa.seasonal_decompose(dfs, model='additive', extrapolate_trend='freq')
plt.rc('figure',figsize=(12,8))
fig = decomposition.plot()
plt.show()

####8.拟合ARIMA模型
#在确保三个基本假设都满足后,即可开始建模,其中ARIMA的三个参数(p,d,q)选用步骤3和5的最优结果
model = ARIMA(diffs, (p,0,q)).fit()
model.summary2()

####9.预测
#forecast()中填入预测期数,开始预测
pre = model.forecast(1)[0]
print("pre:",pre)

Logo

为开发者提供学习成长、分享交流、生态实践、资源工具等服务,帮助开发者快速成长。

更多推荐