ππ 3. λ―Έκ΅ μ°λλ³ μΆμ λΉλλ₯Ό μμ보μ (3)
π μμΌλ‘ μ§νν λ°μ΄ν° λΆμ κ³Όμ μ μν΄ μ΄μ ν¬μ€ν μμμ μ μ²λ¦¬λ λ°μ΄ν°λ₯Ό κ°μ Έμ€μ.
import pandas as pd
birth = pd.read_csv("Data/births.csv", encoding = 'utf-8-sig')
birth_til_1988 = birth[:15067]
birth_date = pd.pivot_table(birth_til_1988, index = ['year','month', 'day'], values = ['births'], aggfunc={'births' : 'sum'})
birth_date = birth_date.reset_index()
birth_date[['day']] = birth_date[['day']].astype('int64')
import numpy as np
quartiles = np.percentile(birth_date['births'], [25,50,75])
mu = quartiles[1]
sig = 0.74 * (quartiles[2] - quartiles[0])
birth_date = birth_date.query('(births > @mu - 5*@sig) & (births < @mu + 5*@sig)')
birth_date = birth_date.astype('str')
birth_date['births'] = birth_date['births'].astype('int64')
birth_date['date'] = pd.to_datetime(birth_date['year'] + '-' + birth_date['month'] + '-' + birth_date['day'], errors = 'raise')
birth_date['weekday'] = birth_date['date'].dt.weekday
def weekday_func(row):
if row['weekday'] == 0:
row['weekday'] = 'Mon'
elif row['weekday'] == 1:
row['weekday'] = 'Tue'
elif row['weekday'] == 2:
row['weekday'] = 'Wed'
elif row['weekday'] == 3:
row['weekday'] = 'Thu'
elif row['weekday'] == 4:
row['weekday'] = 'Fri'
elif row['weekday'] == 5:
row['weekday'] = 'Sat'
elif row['weekday'] == 6:
row['weekday'] = 'Sun'
return row
birth_date = birth_date.apply(weekday_func, axis = 1)
birth_date.head()
>>
year month day births date weekday
0 1969 1 1 8486 1969-01-01 Wed
1 1969 1 2 9002 1969-01-02 Thu
2 1969 1 3 9542 1969-01-03 Fri
3 1969 1 4 8960 1969-01-04 Sat
4 1969 1 5 8390 1969-01-05 Sun
1. 1969λ μΌλ³λ‘ μΆμ μΆμ΄ νμ νκΈ°
π μμ μ¬μ©νμ§ μμ 1969λ λμ λ°μ΄ν°λ₯Ό λ°νμΌλ‘ μΌλ³ μΆμ μΆμ΄λ₯Ό νμ ν΄λ³΄μ.
birth_1969 = birth_date[birth_date['year'] == 1969]
birth_1969
>>
year month day births date weekday
0 1969 1 1 8486 1969-01-01 Wed
1 1969 1 2 9002 1969-01-02 Thu
2 1969 1 3 9542 1969-01-03 Fri
3 1969 1 4 8960 1969-01-04 Sat
4 1969 1 5 8390 1969-01-05 Sun
... ... ... ... ... ... ...
378 1969 12 27 9304 1969-12-27 Sat
379 1969 12 28 9004 1969-12-28 Sun
380 1969 12 29 10980 1969-12-29 Mon
381 1969 12 30 12232 1969-12-30 Tue
382 1969 12 31 11122 1969-12-31 Wed
365 rows Γ 6 columns
κΈ°κ° λ§νκ² λ± 365κ°μ νμ κ°μ§λ€. μ΄λ°κ±Έ 보면 κΈ°λΆμ΄ μ°Έ μ’λ€π.
μ΄ λ°μ΄ν°νλ μλ μ€λ μ μ²λ¦¬ν΄μ£Όκ³ datetime ν¨μλ₯Ό μ μ©νμ.
import numpy as np
quartiles = np.percentile(birth_1969['births'], [25,50,75])
mu = quartiles[1]
sig = 0.74 * (quartiles[2] - quartiles[0])
birth_1969 = birth_1969.query('(births > @mu - 5*@sig) & (births < @mu + 5*@sig)')
birth_1969['day'] = birth_1969['day'].astype('int64')
birth_1969 = birth_1969.astype('str')
birth_1969['births'] = birth_1969['births'].astype('int64')
birth_1969['date'] = pd.to_datetime(birth_1969['year'] + '-' + birth_1969['month'] + '-' + birth_1969['day'], errors = 'raise')
birth_1969.head()
>>
year month day births date weekday
0 1969 1 1 8486 1969-01-01 Wed
1 1969 1 2 9002 1969-01-02 Thu
2 1969 1 3 9542 1969-01-03 Fri
3 1969 1 4 8960 1969-01-04 Sat
4 1969 1 5 8390 1969-01-05 Sun
λ§μ§λ§μΌλ‘ plotly μκ°νν΄μ£Όμ.
import plotly.graph_objects as go
import plotly.offline as pyo
pyo.init_notebook_mode()
fig = go.Figure()
fig.add_trace(
go.Scatter(
x = birth_1969['date'], y = birth_1969['births'], marker_color = '#025918'))
fig.update_layout(
{
'title' : {'text':'<b>Births in 1969</b>', 'font':{'size':25}, 'x':0.5, 'y':0.92},
'xaxis' : {'showticklabels':True, 'dtick' : 'M1', 'tickfont' : {'size':15}, 'title' : {'text':'Date', 'font':{'size':20}}},
'yaxis' : {'showticklabels':True, 'tickfont' : {'size':15}, 'title' : {'text':'Births', 'font':{'size':20}}},
'template':'presentation'
})
fig.add_annotation(
x = '1969-12-30', y = 12250,
arrowcolor='#025918',
arrowhead=2,
arrowsize=1,
arrowwidth=2,
text = 'MAX : 1969-12-30,12232',
font = dict(size = 13, color = 'white'),
bordercolor='#025918',
borderpad=4,
borderwidth=2,
ax = 20, ay = -40,
align = 'center',
bgcolor = '#025918',
opacity = 0.8
)
fig.add_annotation(
x = '1969-4-20', y = 7930,
arrowcolor='#025918',
arrowhead=2,
arrowsize=1,
arrowwidth=2,
text = 'MIN : 1969-4-20, 7928',
font = dict(size = 13, color = 'white'),
bordercolor='#025918',
borderpad=4,
borderwidth=2,
ax = 20, ay = 50,
align = 'center',
bgcolor = '#025918',
opacity = 0.8
)
fig.show()
4. Heatmap - λΆκΈ°λ³ μΆμ λΉλ (1969 ~ 2008)
π λ§μ§λ§μΌλ‘ μ 체μ μΈ μΆμ λΉλλ₯Ό μμ보기 μν΄ ννΈλ§΅μ κ·Έλ €λ³΄μ.
birth_quarter = birth.groupby(['year','month']).agg({'births':'sum'})
birth_quarter = birth_quarter.reset_index()
birth_quarter = birth_quarter.astype('str')
birth_quarter['Date'] = pd.to_datetime(birth_quarter['year'] + '-' + birth_quarter['month'], errors = 'raise')
birth_quarter.head()
>>
year month births Date
0 1969 1 293940 1969-01-01
1 1969 2 270786 1969-02-01
2 1969 3 296550 1969-03-01
3 1969 4 282638 1969-04-01
4 1969 5 289124 1969-05-01
ν΄λΉ μμ λΆκΈ°λ₯Ό ꡬνκΈ° μν΄μ dt . quarter λ©μλλ₯Ό μ¬μ©νμ.
birth_quarter['Quarter'] = birth_quarter['Date'].dt.quarter
birth_quarter['births'] = birth_quarter['births'].astype('int64')
birth_quarter = birth_quarter.groupby(['year', 'Quarter']).agg({'births':'sum'})
birth_quarter_plotly = birth_quarter.reset_index()
birth_quarter_plotly.head()
>>
year Quarter births
0 1969 1 861276
1 1969 2 863372
2 1969 3 952010
3 1969 4 923548
4 1970 1 891214
κΉλνκ² μ 리λμλ€!! μκ°ννμπ.
fig = go.Figure()
fig.add_trace(
go.Heatmap(
x = birth_quarter_plotly['year'], y = birth_quarter_plotly['Quarter'], z = birth_quarter_plotly['births'],
colorscale = 'blues'))
fig.update_layout(
{
'title':{'text':'<b>λΆκΈ°λ³ μΆμ μ λ</b>', 'font':{'size':25}, 'x':0.5, 'y':0.92},
'xaxis':{'showticklabels':True, 'title':{'text':'λ
λ', 'font':{'size':20}}},
'yaxis':{'showticklabels':True, 'dtick':1, 'title':{'text':'λΆκΈ°', 'font':{'size':20}}}
})
fig.show()
π μ 체μ μΈ λ λμ λΆκΈ°λ₯Ό 보면 3λΆκΈ°μ μΆμ° μΆμ΄κ° κ°μ₯ λμ κ²μ νμΈν μ μλ€. λν μκ°μ΄ μ§λ μλ‘ μ 체μ μΈ μΆμ°μ¨μ΄ λμμ§κ³ μμμ νμΈνμ.
π μΆκ°λ‘ μ΄λ κ² λ³΅μ‘ν κ³Όμ μ κ±°μΉμ§ μκ³ pandasμ stlye κΈ°λ₯μ μ¬μ©ν΄μ ννΈλ§΅μ 그릴μλ μλ€. κ°λ¨νκ³ μ§κ΄μ μΌλ‘ λΆμν λ μ’μ λ°©λ²μ΄λ―λ‘ μμλλ©΄ μ’μ κ² κ°λ€π!!
birth_quarter_df = birth_quarter_plotly[128:160].set_index(['year','Quarter']).unstack()
birth_quarter_df.style.background_gradient(cmap = 'Reds')
π μ΅κ·Ό 8λ κ°μ μΆμΈλ₯Ό 보μλ κ³μν΄μ μΆμ°μ¨μ΄ λμμ§κ³ μμμ νμΈν μ μλ€.
π μλ κ² ν΄μ CDC λ°μ΄ν° λΆμμ νμ°¨λ‘ λλλ€. λμ€ν¬ 곡백기 λλ¬Έμ μμ μ μ¬μ©νλ ν¨μλ€λ νλ²μ© μ°Ύμ보면μ μ¬μ©λ²μ λ€μ μ΅νλ μ€μ΄λΌ μμ°μ€λ½κ² μμ΄ μ‘°κΈ κ΅Όλ μ‘λ€π€¦ββοΈ. κΈ°μ΄λΆν° λ€μ ννν λ€μ§λ€λ μκ°μΌλ‘ μ΄μ¬ν λ¬λ €λ΄μΌκ² λ€πββοΈπββοΈπββοΈ!!
π λ΄μΌμ΄λ©΄ κ°κ°μ΄λ€. 5μ 곡 3νλ‘μ νΈμ 2022λ 1νκΈ°κ° λ κ² κ°λ€. λ νλ©΄ λκ² μ§λΌλ λ§μμΌλ‘ κ·Έλ₯ μ΄μ¬ν μ΄μλ΄μΌκ² λ€. μμμμ£!!
Leave a comment