log.Sehee
[데이터 취업 스쿨 스터디 노트] Prophet / forecast / EDA 시계열 분석 1 - 2 본문
Zerobase DS School
[데이터 취업 스쿨 스터디 노트] Prophet / forecast / EDA 시계열 분석 1 - 2
Sehe_e 2024. 8. 12. 21:56
forecast 시계열 분석
prophet 설치하기
conda install pandas-datareader
conda install -c conda-forge fbprophet
pip install prophet
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
**kwargs로 함수 작성
def plotSinWave(**kwargs):
"""
plot sine wave
y = a sin(2 pi f t + t_0) + b
"""
endTime = kwargs.get('endTime', 1)
sampleTime = kwargs.get('sampleTime', 0.01)
amp = kwargs.get('amp', 1)
freq = kwargs.get('freq', 1)
startTime = kwargs.get('startTime', 0)
bias = kwargs.get('bias', 1)
figsize = kwargs.get('figsize', (12, 6))
time = np.arange(startTime, endTime, sampleTime)
result = amp * np.sin(2 * np.pi * freq * time + startTime) + bias
plt.figure(figsize=figsize)
plt.plot(time, result)
plt.grid(True)
plt.xlabel('time')
plt.ylabel('sin')
plt.title(str(amp) + '*sin(2*pi' + str(freq) + '*t+' + str(startTime) + ')+' + str(bias))
plt.show()
plotSinWave()
인자값을 지정하고 작성하면 해당 인자값으로 그린 sinwave를 볼 수 있다.
plotSinWave(amp=2, freq=0.5, endTime=10)
함수 모듈화
%%writefile ./drawSinWave.py
import matplotlib.pyplot as plt
import numpy as np
def plotSinWave(**kwargs):
"""
plot sine wave
y = a sin(2 pi f t + t_0) + b
"""
endTime = kwargs.get('endTime', 1)
sampleTime = kwargs.get('sampleTime', 0.01)
amp = kwargs.get('amp', 1)
freq = kwargs.get('freq', 1)
startTime = kwargs.get('startTime', 0)
bias = kwargs.get('bias', 1)
figsize = kwargs.get('figsize', (12, 6))
time = np.arange(startTime, endTime, sampleTime)
result = amp * np.sin(2 * np.pi * freq * time + startTime) + bias
plt.figure(figsize=figsize)
plt.plot(time, result)
plt.grid(True)
plt.xlabel('time')
plt.ylabel('sin')
plt.title(str(amp) + '*sin(2*pi' + str(freq) + '*t+' + str(startTime) + ')+' + str(bias))
plt.show()
if __name__ == '__main__':
plotSinWave(amp=1, endTime=2)
import drawSinWave as dS
dS.plotSinWave()
한글 설정 모듈화
%%writefile ./set_matplotlib_hangul.py
import platform
import matplotlib.pyplot as plt
from matplotlib import font_manager, rc
rc('font', family='Arial Unicode MS')
plt.rcParams['axes.unicode_minus'] = False
import set_matplotlib_hangul
plt.title('한글 테스트')
prophet 기초
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
time = np.linspace(0, 1, 365*2)
result = np.sin(2*np.pi*12*time)
ds = pd.date_range('2018-01-01', periods=365*2, freq='D')
df = pd.DataFrame({'ds':ds, 'y':result})
df.head()
df['y'].plot(figsize=(10, 6));
from prophet import Prophet
m = Prophet(yearly_seasonality=True, daily_seasonality=True)
m.fit(df);
future = m.make_future_dataframe(periods=30)
fprecast = m.predict(future)
fig = m.plot(fprecast)
time = np.linspace(0, 1, 365*2)
result = np.sin(2*np.pi*12*time) + time
ds = pd.date_range('2018-01-01', periods=365*2, freq='D')
df = pd.DataFrame({'ds':ds, 'y':result})
df['y'].plot(figsize=(10, 6))
m = Prophet(yearly_seasonality=True, daily_seasonality=True)
m.fit(df)
future = m.make_future_dataframe(periods=30)
forecast = m.predict(future)
m.plot(forecast);
time = np.linspace(0, 1, 365*2)
result = np.sin(2*np.pi*12*time) + time + np.random.randn(365*2)/4
ds = pd.date_range('2018-01-01', periods=365*2, freq='D')
df = pd.DataFrame({'ds':ds, 'y':result})
df['y'].plot(figsize=(10, 6));
m = Prophet(yearly_seasonality=True, daily_seasonality=True)
m.fit(df)
future = m.make_future_dataframe(periods=30)
forecast = m.predict(future)
m.plot(forecast);
시계열 데이터 실전 이용해보기
https://pinkwink.kr/ 블로그의 트래픽 데이터를 사용
import pandas as pd
import numpy as np
import pandas_datareader as web
import matplotlib.pyplot as plt
from prophet import Prophet
from datetime import datetime
%matplotlib inline
pinkwink_web = pd.read_csv(
'../data/05_PinkWink_Web_Traffic.csv',
encoding = 'utf-8',
thousands = ',',
names = ['date', 'hit'],
index_col = 0,
)
pinkwink_web = pinkwink_web[pinkwink_web['hit'].notnull()]
pinkwink_web.head()
# 전체 데이터 그려보기
pinkwink_web['hit'].plot(figsize=(12, 4), grid=True)
# trend 분석을 시각화하기 위한 x축 값을 만들기
time = np.arange(0, len(pinkwink_web))
traffic = pinkwink_web['hit'].values
fx = np.linspace(0, time[-1], 1000)
def error(f, x, y):
residuals = f(x) - y
mse = np.mean(residuals ** 2)
return np.sqrt(mse)
fp1 = np.polyfit(time, traffic, 1)
f1 = np.poly1d(fp1)
f2p = np.polyfit(time, traffic, 2)
f2 = np.poly1d(f2p)
f3p = np.polyfit(time, traffic, 3
f3 = np.poly1d(f3p)
f15p = np.polyfit(time, traffic, 15)
f15 = np.poly1d(f15p)
print(error(f1, time, traffic))
print(error(f2, time, traffic))
print(error(f3, time, traffic))
print(error(f15, time, traffic))
plt.figure(figsize=(12, 4))
plt.scatter(time, traffic, s=10)
plt.plot(fx, f1(fx), lw=4, label='f1')
plt.plot(fx, f2(fx), lw=4, label='f2')
plt.plot(fx, f3(fx), lw=4, label='f3')
plt.plot(fx, f15(fx), lw=4, label='f15')
plt.grid(True, linestyle='-', color='0.75')
plt.legend(loc=2)
plt.show()
df = pd.DataFrame({'ds':pinkwink_web.index, 'y': pinkwink_web['hit']})
df.reset_index(inplace=True)
df['ds'] = pd.to_datetime(df['ds'], format='%y. %m. %d.')
del df['date']
df.head()
m = Prophet(yearly_seasonality=True, daily_seasonality=True)
m.fit(df)
# 60일에 해당하는 데이터 예측
future = m.make_future_dataframe(periods=60)
future.tail()
# 예측 결과는 상한/하한의 범위를 포함해서 얻어진다.
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
m.plot(forecast);
m.plot_components(forecast)
솔직히 오늘 들은 강의 중 알아들은 내용이 절반도 안되는 것 같다.. 코드 주석이 없는 이유가 바로 그것
통계 수업할 때 다시 공부해야겠다.
내일의 학습 목표
EDA Naver API 1 - 2
Comments