Data/Dacon
Dacon 와인품질 경진대회...2
Kirok Kim
2021. 12. 7. 23:09
클래스 분포 확인
counted_values = train['quality'].value_counts()
plt.style.use('ggplot')
plt.figure(figsize=(12, 10))
plt.title('class counting', fontsize = 30)
value_bar_ax = sns.barplot(x=counted_values.index, y=counted_values)
value_bar_ax.tick_params(labelsize=20)
와인 품질별 고유 특성 확인
qualities = {}
for i in range(4, 9):
quality_description = train[train['quality'] == i].drop(['id', 'quality'], axis=1).describe()
if i == 4:
means = pd.DataFrame({i: quality_description.loc['mean']})
else:
mean = pd.DataFrame({i: quality_description.loc['mean']})
means = pd.concat([means, mean], axis=1)
means = means.T
fig, axes = plt.subplots(4, 3, figsize=(25, 15))
fig.suptitle('mean values per quality', fontsize= 40)
for ax, col in zip(axes.flat, means.columns):
ax.plot([4,5,6,7,8], means[col])
ax.scatter([4,5,6,7,8], means[col])
ax.set_title(col, fontsize=20)
plt.setp(axes, xticks=[4, 5, 6, 7, 8])
plt.tight_layout()
plt.show()
반응형