Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                

TSA2

Download as pdf or txt
Download as pdf or txt
You are on page 1of 13

22/10/2024, 06:17 TSA2

In [2]: from random import randrange


from matplotlib import pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
import pandas as pd
import statsmodels.api as sm
import numpy as np
from sklearn.linear_model import LinearRegression
from pandas.plotting import lag_plot, autocorrelation_plot
from datetime import datetime
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, me
from math import sqrt
from sklearn.model_selection import TimeSeriesSplit
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA

In [3]: df = pd.read_csv(r"C:\Users\Ankit Panday\Downloads\daily_minimum_temperatures.csv",

In [4]: df.head()

Out[4]: Daily minimum temperatures

Date

1981-01-01 20.7

1981-01-02 17.9

1981-01-03 18.8

1981-01-04 14.6

1981-01-05 15.8

In [5]: series = df.squeeze()

In [6]: series.head()

Date
Out[6]:
1981-01-01 20.7
1981-01-02 17.9
1981-01-03 18.8
1981-01-04 14.6
1981-01-05 15.8
Name: Daily minimum temperatures, dtype: float64

In [7]: series.tail()

Date
Out[7]:
1990-12-27 14.0
1990-12-28 13.6
1990-12-29 13.5
1990-12-30 15.7
1990-12-31 13.0
Name: Daily minimum temperatures, dtype: float64

In [8]: series.isna().sum()

0
Out[8]:

localhost:8888/nbconvert/html/Downloads/TSA2.ipynb?download=false 1/13
22/10/2024, 06:17 TSA2

In [9]: series.plot()
plt.xlabel('Year')
plt.ylabel('Temperature')
plt.show()

In [10]: series['1981'].plot()

<Axes: xlabel='Date'>
Out[10]:

localhost:8888/nbconvert/html/Downloads/TSA2.ipynb?download=false 2/13
22/10/2024, 06:17 TSA2

In [11]: series.info()

<class 'pandas.core.series.Series'>
DatetimeIndex: 3650 entries, 1981-01-01 to 1990-12-31
Series name: Daily minimum temperatures
Non-Null Count Dtype
-------------- -----
3650 non-null float64
dtypes: float64(1)
memory usage: 57.0 KB

In [12]: series.describe()

count 3650.000000
Out[12]:
mean 11.177753
std 4.071837
min 0.000000
25% 8.300000
50% 11.000000
75% 14.000000
max 26.300000
Name: Daily minimum temperatures, dtype: float64

In [13]: plt.hist(series)

(array([ 53., 181., 564., 862., 830., 670., 347., 102., 32., 9.]),
Out[13]:
array([ 0. , 2.63, 5.26, 7.89, 10.52, 13.15, 15.78, 18.41, 21.04,
23.67, 26.3 ]),
<BarContainer object of 10 artists>)

In [14]: groups = series.groupby(pd.Grouper(freq='A'))


years = pd.DataFrame()
for i,j in groups:
years[i.year] = j.values
years.plot(subplots=True ,legend=True)
plt.plot()

[]
Out[14]:

localhost:8888/nbconvert/html/Downloads/TSA2.ipynb?download=false 3/13
22/10/2024, 06:17 TSA2

In [15]: plt.boxplot(years)
plt.show()

In [16]: Values = pd.DataFrame(series.values)


dataframe = pd.concat([Values.shift(1),Values],axis=1)
dataframe.columns = ['t','t+1']
dataframe.head()

localhost:8888/nbconvert/html/Downloads/TSA2.ipynb?download=false 4/13
22/10/2024, 06:17 TSA2

Out[16]: t t+1

0 NaN 20.7

1 20.7 17.9

2 17.9 18.8

3 18.8 14.6

4 14.6 15.8

In [18]: # stationary check

series.plot()
plt.xlabel('Years')
plt.ylabel("Temperature")
plt.show()

In [19]: X= series.values
spilt = int(len(X)/2)
x1,x2 = X[:spilt],X[spilt:]
mean1, mean2 = x1.mean(),x2.mean()
var1, var2 = x1.var(),x2.var()
print('mean1=%.3f,mean2 = %.3f'%(mean1,mean2))
print('var1=%.3f, var2=%.3f'%(var1,var2))

mean1=11.044,mean2 = 11.312
var1=18.161, var2=14.954

In [23]: result = adfuller(series)


print('ADF statistics: ',{result[0]})
print('P_value: ',result[1])

if result[1]>0.05:
print("Non Stationary")
else:
print("stationary")

localhost:8888/nbconvert/html/Downloads/TSA2.ipynb?download=false 5/13
22/10/2024, 06:17 TSA2
ADF statistics: {-4.444804924611684}
P_value: 0.0002470826300361156
stationary

In [24]: # In this case we will consdier the d is 0

In [31]: # Persistence Model

X =pd.DataFrame( series.values)
dataframe = pd.concat([X.shift(1), X],axis = 1)
dataframe.columns = ['t','t+1']
dataframe.head()

X = dataframe.values
train_size = int(len(X)*0.66)
train,test = X[1:train_size],X[train_size:]
train_x,train_y = train[:,0],train[:,1]
test_x,test_y = test[:,0],test[:,1]

def model_persistence(x):
return(x)

# walk forward
predictions = list()

for x in test_x:
yhat = model_persistence(x)
predictions.append(yhat)

rmse = sqrt(mean_squared_error(test_y,predictions))
print(rmse)

plt.figure(figsize=(11,6))
plt.plot(train_y)
plt.plot([None for i in train_y]+[x for x in test_y])
plt.plot([None for i in train_y]+[x for x in predictions])

2.61279779714526
[<matplotlib.lines.Line2D at 0x223a625f9d0>]
Out[31]:

localhost:8888/nbconvert/html/Downloads/TSA2.ipynb?download=false 6/13
22/10/2024, 06:17 TSA2

In [32]: plot_acf(series);
plot_pacf(series);

localhost:8888/nbconvert/html/Downloads/TSA2.ipynb?download=false 7/13
22/10/2024, 06:17 TSA2

In [36]: model = ARIMA(series,order=(1,0,1))


model_fit= model.fit()
model_prediction = model_fit.predict(steps=len(series))
MAPE = mean_absolute_percentage_error(series,model_prediction)
print("MAPE: ", MAPE*100)

c:\Users\Ankit Panday\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.p
y:473: ValueWarning: A date index has been provided, but it has no associated freq
uency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
c:\Users\Ankit Panday\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.p
y:473: ValueWarning: A date index has been provided, but it has no associated freq
uency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
c:\Users\Ankit Panday\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.p
y:473: ValueWarning: A date index has been provided, but it has no associated freq
uency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
MAPE: 1416865131432253.5

In [38]: residual = series - model_prediction

In [40]: plt.plot(residual)
plt.axhline(0,color ='red',linestyle='--')
plt.title('Residual Plot')

Text(0.5, 1.0, 'Residual Plot')


Out[40]:

localhost:8888/nbconvert/html/Downloads/TSA2.ipynb?download=false 8/13
22/10/2024, 06:17 TSA2

In [41]: model_fit.resid.plot.hist()

<Axes: ylabel='Frequency'>
Out[41]:

In [42]: model_fit.summary()

localhost:8888/nbconvert/html/Downloads/TSA2.ipynb?download=false 9/13
22/10/2024, 06:17 TSA2

Out[42]: SARIMAX Results

Dep. Variable: Daily minimum temperatures No. Observations: 3650

Model: ARIMA(1, 0, 1) Log Likelihood -8605.425

Date: Tue, 22 Oct 2024 AIC 17218.851

Time: 00:28:46 BIC 17243.661

Sample: 0 HQIC 17227.686

- 3650

Covariance Type: opg

coef std err z P>|z| [0.025 0.975]

const 11.1965 0.240 46.721 0.000 10.727 11.666

ar.L1 0.8649 0.011 81.948 0.000 0.844 0.886

ma.L1 -0.2414 0.019 -12.604 0.000 -0.279 -0.204

sigma2 6.5349 0.143 45.767 0.000 6.255 6.815

Ljung-Box (L1) (Q): 8.97 Jarque-Bera (JB): 15.27

Prob(Q): 0.00 Prob(JB): 0.00

Heteroskedasticity (H): 0.85 Skew: 0.05

Prob(H) (two-sided): 0.00 Kurtosis: 3.30

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

In [43]: autocorrelation_plot(residual)

<Axes: xlabel='Lag', ylabel='Autocorrelation'>


Out[43]:

localhost:8888/nbconvert/html/Downloads/TSA2.ipynb?download=false 10/13
22/10/2024, 06:17 TSA2

In [44]: plot_acf(residual);
plot_pacf(residual);

localhost:8888/nbconvert/html/Downloads/TSA2.ipynb?download=false 11/13
22/10/2024, 06:17 TSA2

In [45]: model_fit.plot_diagnostics()

Out[45]:

localhost:8888/nbconvert/html/Downloads/TSA2.ipynb?download=false 12/13
22/10/2024, 06:17 TSA2

In [51]: # Date Time Featue

temps = pd.DataFrame(series.values)
dataframe = pd.DataFrame()
dataframe['month'] = [series.index[i].month for i in range(len(series))]
dataframe['day'] = [series.index[i].day for i in range(len(series))]
dataframe['temperature'] = [series[i] for i in range(len(series))]
dataframe.head()

Out[51]: month day temperature

0 1 1 20.7

1 1 2 17.9

2 1 3 18.8

3 1 4 14.6

4 1 5 15.8

In [ ]:

localhost:8888/nbconvert/html/Downloads/TSA2.ipynb?download=false 13/13

You might also like