Data/Dacon

Dacon 와인품질 경진대회...2

Kirok Kim 2021. 12. 7. 23:09
클래스 분포 확인
counted_values = train['quality'].value_counts()
plt.style.use('ggplot')
plt.figure(figsize=(12, 10))
plt.title('class counting', fontsize = 30)
value_bar_ax = sns.barplot(x=counted_values.index, y=counted_values)
value_bar_ax.tick_params(labelsize=20)

와인 품질별 고유 특성 확인
qualities = {}

for i in range(4, 9):
    quality_description = train[train['quality'] == i].drop(['id', 'quality'], axis=1).describe()
    if i == 4:
        means = pd.DataFrame({i: quality_description.loc['mean']})
    else:
        mean = pd.DataFrame({i: quality_description.loc['mean']})
        means = pd.concat([means, mean], axis=1)
        
means = means.T
fig, axes = plt.subplots(4, 3, figsize=(25, 15))

fig.suptitle('mean values per quality', fontsize= 40)
for ax, col in zip(axes.flat, means.columns):
    ax.plot([4,5,6,7,8], means[col])
    ax.scatter([4,5,6,7,8], means[col])
    ax.set_title(col, fontsize=20)
plt.setp(axes, xticks=[4, 5, 6, 7, 8])
plt.tight_layout()
plt.show()

반응형