Data/Dacon
์ง ๊ฐ ์์ธก ๋ถ์...2
Kirok Kim
2022. 2. 8. 21:09
์์นํ๋ฐ์ดํฐ ๋ฐ ๋ช ๋ชฉํ ๋ฐ์ดํฐ ์๊ฐํ
#์์นํ ๋ฐ์ดํฐ
numeric_feature = data.columns[(data.dtypes==int) | (data.dtypes== float)]
# ์นดํ
๊ณ ๋ฆฌํ ๋ฐ์ดํฐ
categorical_feature = data.columns[data.dtypes=='O']
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use("ggplot")
feature = numeric_feature
# Boxplot ์ ์ฌ์ฉํด์ ๋ฐ์ดํฐ์ ๋ถํฌ๋ฅผ ์ดํด๋ด
๋๋ค.
plt.figure(figsize=(20,15))
plt.suptitle("Boxplots", fontsize=40)
for i in range(len(feature)):
# 4ํ 3์ด 1~๋๊น์ง
plt.subplot(4,3,i+1) # ์์นํ ๋ฐ์ดํฐ๊ฐ 11๊ฐ์ด๋ฏ๋ก 4*3=12๊ฐ ์๋ฆฌ๊ฐ ํ์ํฉ๋๋ค.
# ๊ทธ๋ํ ์ ๋ชฉ
plt.title(feature[i])
# ๊ทธ๋ํ๊ทธ๋ฆฌ๊ธฐ
plt.boxplot(data[feature[i]])
# ๊ทธ๋ํ์ถ๋ ฅ
plt.show()
# ํ์คํ ๊ทธ๋จ ์ ์ฌ์ฉํด์ ๋ฐ์ดํฐ์ ๋ถํฌ๋ฅผ ์ดํด๋ด
๋๋ค.
feature = categorical_feature
plt.figure(figsize=(20,10))
plt.suptitle("Bar Plot", fontsize=40)
for i in range(len(feature)):
# 1ํ 3์ด 1~๋
plt.subplot(1,3,i+1)
plt.title(feature[i], fontsize=20)
temp = data[feature[i]].value_counts()
plt.bar(temp.keys(), temp.values, width=0.5, color='b', alpha=0.5)
plt.xticks(temp.keys(), fontsize=12)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()
๋ฐ์ํ