การวิเคราะห์ประสิทธิภาพของ Machine Learning Model ด้วย Learning Curve

4 min readSep 30, 2020

Learning Curve เป็นสิ่งที่แสดงถึงประสิทธิภาพการเรียนรู้ของ Model จาก Training Dataset ซึ่งแกน x ของกราฟจะเป็น Epoch และแกน y จะเป็นประสิทธิภาพของ Model โดยประสิทธิภาพของ Model จะถูกวัดหลังจากการปรับปรุง Weight และ Bias ด้วยข้อมูล 2 ชนิด ได้แก่

Training Dataset ที่ Model กำลังเรียนรู้
Validation Dataset ที่ไม่เคยถูกใช้สอน Model มาก่อน

ประสิทธิภาพของ Model จะวัดจาก Loss และ Accuracy โดยยิ่งค่า Loss หรือ Error ของ Model น้อย แสดงว่า Model มีการเรียนรู้ที่ดี แต่สำหรับค่า Accuracy ยิ่งค่า Accuracy มากแสดงว่า Model มีการเรียนรู้ที่ดี

import library

from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing import sequence
import plotly
import plotly.graph_objs as go
import plotly.express as px
from matplotlib import pyplot
import numpy
from sklearn.datasets import make_moons, make_circles, make_blobs
from pandas import DataFrame
from sklearn.model_selection import train_test_split
import pandas as pd

โหลด Dataset

dataset = pd.read_csv(‘games.csv’)
dataset.shape
dataset

แปลงผลเฉลยจาก String เป็น Integer

df=dataset[[‘white_rating’,’black_rating’,’turns’]]
X=df
maps = {“draw”: “0”, “white”: “1”, “black”: “2”}
y=dataset[‘winner’].map(maps)

แบ่งข้อมูลเป็น 2 ชุด สำหรับ Train 80% และ Test 20%

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle= True)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

สร้าง matrix ขึ้นมาใหม่

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

นิยาม model

model = Sequential()
model.add(Dense(50, input_dim=3, activation=’relu’))
model.add(Dense(30, activation=’relu’))
model.add(Dense(3, activation=’softmax’))
model.compile(loss=’categorical_crossentropy’, optimizer=’adam’, metrics=[‘accuracy’])
model.summary()

Train Model

his = model.fit(X_train, y_train, validation_data=(X_test, y_test),epochs=200)

Plot Loss

plotly.offline.init_notebook_mode(connected=True)
h1 = go.Scatter(y=his.history[‘loss’],
mode=”lines”, line=dict(
width=2,
color=’blue’),
name=”loss”
)
h2 = go.Scatter(y=his.history[‘val_loss’],
mode=”lines”, line=dict(
width=2,
color=’red’),
name=”val_loss”
)
data = [h1,h2]
layout1 = go.Layout(title=’Loss’,
xaxis=dict(title=’epochs’),
yaxis=dict(title=’’))
fig1 = go.Figure(data = data, layout=layout1)
plotly.offline.iplot(fig1, filename=”Intent Classification”)

จะเห็นว่าทั้ง Training Loss และ Validation Loss มีค่าลดลงอย่างต่อเนื่องจนถึงจุดหนึ่งมันจะคงที่

Plot Accuracy

h1 = go.Scatter(y=his.history[‘accuracy’],
mode=”lines”, line=dict(
width=2,
color=’blue’),
name=”acc”
)
h2 = go.Scatter(y=his.history[‘val_accuracy’],
mode=”lines”, line=dict(
width=2,
color=’red’),
name=”val_acc”
)
data = [h1,h2]
layout1 = go.Layout(title=’Accuracy’,
xaxis=dict(title=’epochs’),
yaxis=dict(title=’’))
fig1 = go.Figure(data = data, layout=layout1)
plotly.offline.iplot(fig1, filename=”Intent Classification”)

Load dataset ชุดที่ 2

dataset = pd.read_csv(‘abalone.data.csv’)
dataset.shape
dataset

แปลงผลเฉลยจาก String เป็น Integer

X=dataset.drop([‘Sex’], axis=1)
maps = {“M”: “0”, “F”: “1”, “I”: “2”}
y=dataset[‘Sex’].map(maps)

แบ่งข้อมูลเป็น 2 ชุด สำหรับ Train 80% และ Test 20%

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle= True)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

สร้าง matrix ขึ้นมาใหม่

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

นิยาม model

model = Sequential()
model.add(Dense(50, input_dim=8, activation=’relu’))
model.add(Dense(30, activation=’relu’))
model.add(Dense(3, activation=’softmax’))
model.compile(loss=’categorical_crossentropy’, optimizer=’adam’, metrics=[‘accuracy’])
model.summary()

Test model 200 เหมือนเดิม

Plot Loss ชุด 2

plotly.offline.init_notebook_mode(connected=True)
h1 = go.Scatter(y=his.history[‘loss’],
mode=”lines”, line=dict(
width=2,
color=’blue’),
name=”loss”
)
h2 = go.Scatter(y=his.history[‘val_loss’],
mode=”lines”, line=dict(
width=2,
color=’red’),
name=”val_loss”
)
data = [h1,h2]
layout1 = go.Layout(title=’Loss’,
xaxis=dict(title=’epochs’),
yaxis=dict(title=’’))
fig1 = go.Figure(data = data, layout=layout1)
plotly.offline.iplot(fig1, filename=”Intent Classification”)

Plot Accuracy ชุดที่ 2

h1 = go.Scatter(y=his.history[‘accuracy’],
mode=”lines”, line=dict(
width=2,
color=’blue’),
name=”acc”
)
h2 = go.Scatter(y=his.history[‘val_accuracy’],
mode=”lines”, line=dict(
width=2,
color=’red’),
name=”val_acc”
)
data = [h1,h2]
layout1 = go.Layout(title=’Accuracy’,
xaxis=dict(title=’epochs’),
yaxis=dict(title=’’))
fig1 = go.Figure(data = data, layout=layout1)
plotly.offline.iplot(fig1, filename=”Intent Classification”)