관리 메뉴

log.Sehee

[데이터 취업 스쿨 스터디 노트] Prophet / forecast / EDA 시계열 분석 1 - 2 본문

Zerobase DS School

[데이터 취업 스쿨 스터디 노트] Prophet / forecast / EDA 시계열 분석 1 - 2

Sehe_e 2024. 8. 12. 21:56

 


 

forecast 시계열 분석

 

prophet 설치하기

conda install pandas-datareader
conda install -c conda-forge fbprophet
pip install prophet

 

import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

 

**kwargs로 함수 작성

def plotSinWave(**kwargs):
    """
    plot sine wave
    y = a sin(2 pi f t + t_0) + b
    """
    endTime = kwargs.get('endTime', 1)
    sampleTime = kwargs.get('sampleTime', 0.01)
    amp = kwargs.get('amp', 1)
    freq = kwargs.get('freq', 1)
    startTime = kwargs.get('startTime', 0)
    bias = kwargs.get('bias', 1)
    figsize = kwargs.get('figsize', (12, 6))

    time = np.arange(startTime, endTime, sampleTime)
    result = amp * np.sin(2 * np.pi * freq * time + startTime) + bias

    plt.figure(figsize=figsize)
    plt.plot(time, result)
    plt.grid(True)
    plt.xlabel('time')
    plt.ylabel('sin')
    plt.title(str(amp) + '*sin(2*pi' + str(freq) + '*t+' + str(startTime) + ')+' + str(bias))
    plt.show()

 

plotSinWave()

 

인자값을 지정하고 작성하면 해당 인자값으로 그린 sinwave를 볼 수 있다.

plotSinWave(amp=2, freq=0.5, endTime=10)

 

함수 모듈화

%%writefile ./drawSinWave.py

import matplotlib.pyplot as plt
import numpy as np

def plotSinWave(**kwargs):
    """
    plot sine wave
    y = a sin(2 pi f t + t_0) + b
    """
    endTime = kwargs.get('endTime', 1)
    sampleTime = kwargs.get('sampleTime', 0.01)
    amp = kwargs.get('amp', 1)
    freq = kwargs.get('freq', 1)
    startTime = kwargs.get('startTime', 0)
    bias = kwargs.get('bias', 1)
    figsize = kwargs.get('figsize', (12, 6))

    time = np.arange(startTime, endTime, sampleTime)
    result = amp * np.sin(2 * np.pi * freq * time + startTime) + bias

    plt.figure(figsize=figsize)
    plt.plot(time, result)
    plt.grid(True)
    plt.xlabel('time')
    plt.ylabel('sin')
    plt.title(str(amp) + '*sin(2*pi' + str(freq) + '*t+' + str(startTime) + ')+' + str(bias))
    plt.show()

if __name__ == '__main__':
    plotSinWave(amp=1, endTime=2)

 

import drawSinWave as dS
dS.plotSinWave()

 

한글 설정 모듈화

%%writefile ./set_matplotlib_hangul.py

import platform
import matplotlib.pyplot as plt
from matplotlib import font_manager, rc

rc('font', family='Arial Unicode MS')
plt.rcParams['axes.unicode_minus'] = False
import set_matplotlib_hangul
plt.title('한글 테스트')

 

 


 

prophet 기초

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
time = np.linspace(0, 1, 365*2)
result = np.sin(2*np.pi*12*time)
ds = pd.date_range('2018-01-01', periods=365*2, freq='D')
df = pd.DataFrame({'ds':ds, 'y':result})
df.head()

 

df['y'].plot(figsize=(10, 6));

 

from prophet import Prophet

m = Prophet(yearly_seasonality=True, daily_seasonality=True)
m.fit(df);

future = m.make_future_dataframe(periods=30)
fprecast = m.predict(future)
fig = m.plot(fprecast)

 

time = np.linspace(0, 1, 365*2)
result = np.sin(2*np.pi*12*time) + time
ds = pd.date_range('2018-01-01', periods=365*2, freq='D')
df = pd.DataFrame({'ds':ds, 'y':result})
df['y'].plot(figsize=(10, 6))

 

m = Prophet(yearly_seasonality=True, daily_seasonality=True)
m.fit(df)
future = m.make_future_dataframe(periods=30)
forecast = m.predict(future)
m.plot(forecast);

 

time = np.linspace(0, 1, 365*2)
result = np.sin(2*np.pi*12*time) + time + np.random.randn(365*2)/4
ds = pd.date_range('2018-01-01', periods=365*2, freq='D')
df = pd.DataFrame({'ds':ds, 'y':result})
df['y'].plot(figsize=(10, 6));

 

m = Prophet(yearly_seasonality=True, daily_seasonality=True)
m.fit(df)
future = m.make_future_dataframe(periods=30)
forecast = m.predict(future)
m.plot(forecast);

 

 


 

시계열 데이터 실전 이용해보기

 

https://pinkwink.kr/ 블로그의 트래픽 데이터를 사용

 

import pandas as pd
import numpy as np
import pandas_datareader as web
import matplotlib.pyplot as plt

from prophet import Prophet
from datetime import datetime

%matplotlib inline
pinkwink_web = pd.read_csv(
    '../data/05_PinkWink_Web_Traffic.csv',
    encoding = 'utf-8',
    thousands = ',',
    names = ['date', 'hit'],
    index_col = 0,
)

pinkwink_web = pinkwink_web[pinkwink_web['hit'].notnull()]
pinkwink_web.head()

 

# 전체 데이터 그려보기
pinkwink_web['hit'].plot(figsize=(12, 4), grid=True)

 

# trend 분석을 시각화하기 위한 x축 값을 만들기
time = np.arange(0, len(pinkwink_web))
traffic = pinkwink_web['hit'].values
fx = np.linspace(0, time[-1], 1000)

 

def error(f, x, y):
    residuals = f(x) - y
    mse = np.mean(residuals ** 2)
    return np.sqrt(mse)
fp1 = np.polyfit(time, traffic, 1)
f1 = np.poly1d(fp1)

f2p = np.polyfit(time, traffic, 2)
f2 = np.poly1d(f2p)

f3p = np.polyfit(time, traffic, 3
f3 = np.poly1d(f3p)

f15p = np.polyfit(time, traffic, 15)
f15 = np.poly1d(f15p)
print(error(f1, time, traffic))
print(error(f2, time, traffic))
print(error(f3, time, traffic))
print(error(f15, time, traffic))

 

plt.figure(figsize=(12, 4))
plt.scatter(time, traffic, s=10)
plt.plot(fx, f1(fx), lw=4, label='f1')
plt.plot(fx, f2(fx), lw=4, label='f2')
plt.plot(fx, f3(fx), lw=4, label='f3')
plt.plot(fx, f15(fx), lw=4, label='f15')

plt.grid(True, linestyle='-', color='0.75')
plt.legend(loc=2)
plt.show()



df = pd.DataFrame({'ds':pinkwink_web.index, 'y': pinkwink_web['hit']})
df.reset_index(inplace=True)
df['ds'] = pd.to_datetime(df['ds'], format='%y. %m. %d.')
del df['date']
df.head()

 

m = Prophet(yearly_seasonality=True, daily_seasonality=True)
m.fit(df)

# 60일에 해당하는 데이터 예측
future = m.make_future_dataframe(periods=60)
future.tail()

 

# 예측 결과는 상한/하한의 범위를 포함해서 얻어진다.
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

 

m.plot(forecast);

 

m.plot_components(forecast)

 

 


솔직히 오늘 들은 강의 중 알아들은 내용이 절반도 안되는 것 같다.. 코드 주석이 없는 이유가 바로 그것

통계 수업할 때 다시 공부해야겠다.

 

 

내일의 학습 목표

EDA Naver API 1 - 2

 

 

Comments