Keras - MNIST 분류
[딥러닝] SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax, Nadam Optimizer를 이용한 MNIST 분류
import keras
Using TensorFlow backend.
MNIST 데이터 불러오기
from keras.datasets import mnist
import numpy as np
np.random.seed(777)
# x : 28 x 28 digit Image
# y : Label
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train.shape
(60000, 28, 28)
x_test.shape
(10000, 28, 28)
# 변수 설정
n_train, width, height = x_train.shape
n_test, _, _ = x_test.shape
데이터 확인
%matplotlib inline
import matplotlib.pyplot as plt
plt.figure(figsize=(12,6))
plt.subplot(131)
plt.imshow(x_train[0,], cmap='gray')
plt.subplot(132)
plt.imshow(x_train[1,], cmap='gray')
plt.subplot(133)
plt.imshow(x_train[2,], cmap='gray')
<matplotlib.image.AxesImage at 0x219cdf00128>
y_train[:23,]
array([5, 0, 4, 1, 9, 2, 1, 3, 1, 4, 3, 5, 3, 6, 1, 7, 2, 8, 6, 9, 4, 0, 9], dtype=uint8)
데이터 전처리
Input
input_train = x_train.reshape(n_train, width*height) # 28 x 28의 2차원 데이터를 1열의 1차원으로 재정의.
input_train.shape
(60000, 784)
input_train.astype('float32') # 데이터타입을 실수형으로 변환.
array([[ 0., 0., 0., ..., 0., 0., 0.],
[ 0., 0., 0., ..., 0., 0., 0.],
[ 0., 0., 0., ..., 0., 0., 0.],
...,
[ 0., 0., 0., ..., 0., 0., 0.],
[ 0., 0., 0., ..., 0., 0., 0.],
[ 0., 0., 0., ..., 0., 0., 0.]], dtype=float32)
x_train[0][6]
array([ 0, 0, 0, 0, 0, 0, 0, 0, 30, 36, 94, 154, 170,
253, 253, 253, 253, 253, 225, 172, 253, 242, 195, 64, 0, 0,
0, 0], dtype=uint8)
input_train = input_train / 255.0 # 원본 데이터는 0 ~ 255 색상값으로 되어 있기 때문에 0 ~ 1 범위로 변환.
input_train.max()
1.0
테스트용 데이터도 동일하게 처리
input_test = x_test.reshape(n_test, width*height)
input_test.astype('float32')
input_test = input_test / 255.0
Output
# Train
output_train = keras.utils.to_categorical(y_train, 10) # 10자리의 리스트. 각 자리를 0 또는 1로 표시. one-hot encoding (dummy coding)
output_train[1] # = 0 위치가 1 값이기 때문에 0 출력
array([ 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
# Test
output_test = keras.utils.to_categorical(y_test, 10)
1. Simple Model
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import RMSprop
model = Sequential()
model.add(Dense(392, activation='tanh', input_shape=(784,))) # activation func.
model.add(Dense(10, activation='softmax'))
model.summary()
# 입력층 784, 은닉층 392, 출력층 10 개 노드.
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_1 (Dense) (None, 392) 307720
_________________________________________________________________
dense_2 (Dense) (None, 10) 3930
=================================================================
Total params: 311,650.0
Trainable params: 311,650
Non-trainable params: 0.0
_________________________________________________________________
model.compile(loss='categorical_crossentropy', # losss = cost function
optimizer=RMSprop(), # optimizer
metrics=['accuracy'])
Model training
batch_size = 128
epochs = 2
history = model.fit(input_train, output_train,
batch_size = batch_size,
epochs = epochs,
verbose = 1,
validation_data = (input_test, output_test))
Train on 60000 samples, validate on 10000 samples
Epoch 1/2
60000/60000 [==============================] - 12s - loss: 0.3335 - acc: 0.9028 - val_loss: 0.2440 - val_acc: 0.9269
Epoch 2/2
60000/60000 [==============================] - 13s - loss: 0.1745 - acc: 0.9494 - val_loss: 0.1442 - val_acc: 0.9578
history.history
{'acc': [0.90275000003178918, 0.94936666669845582],
'loss': [0.3335050887266795, 0.17452930697997412],
'val_acc': [0.92689999999999995, 0.95779999999999998],
'val_loss': [0.24401760475635528, 0.1441631257534027]}
Testset Validation
score = model.evaluate(input_test, output_test, verbose=0) # loss value & metrics value
print('Accuracy :', score[1])
Accuracy : 0.9578
2. Model Practices
https://keras.io/optimizers/#parameters-common-to-all-keras-optimizers
nn_model = Sequential()
nn_model.add(Dense(392, activation='relu', input_shape=(784,)))
nn_model.add(Dense(392, activation='relu'))
nn_model.add(Dense(10, activation='softmax')) # output
nn_model.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_3 (Dense) (None, 392) 307720
_________________________________________________________________
dense_4 (Dense) (None, 392) 154056
_________________________________________________________________
dense_5 (Dense) (None, 10) 3930
=================================================================
Total params: 465,706.0
Trainable params: 465,706
Non-trainable params: 0.0
_________________________________________________________________
from keras.optimizers import SGD, Adagrad, Adadelta, Adam, Adamax, Nadam
batch_size = 128
epochs = 3
Model 01 - SGD
- Stochastic gradient descent optimizer
- decay : 각 스텝마다 learning rate를 줄여나가는 폭
- nesterov : Nesterov momentum 적용 여부
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
nn_model.compile(loss='categorical_crossentropy',
optimizer=sgd,
metrics=['accuracy'])
nn_model.fit(input_train, output_train,
batch_size = batch_size,
epochs = epochs)
score = nn_model.evaluate(input_test, output_test, verbose=0)
print('\nAccuracy :', score[1])
Epoch 1/3
60000/60000 [==============================] - 14s - loss: 0.4346 - acc: 0.8798
Epoch 2/3
60000/60000 [==============================] - 14s - loss: 0.2006 - acc: 0.9417
Epoch 3/3
60000/60000 [==============================] - 14s - loss: 0.1466 - acc: 0.9574
Accuracy : 0.9597
Model 02 - RMSprop
- learning rate 이외의 옵션은 기본값 유지를 추천한다.
- 일반적으로 RNN (recurrent neural networks)에서 좋은 성능을 보인다.
- rho : 감쇄율
- epsilon : 학습 속도
rmsprop = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
nn_model.compile(loss='categorical_crossentropy',
optimizer=rmsprop,
metrics=['accuracy'])
nn_model.fit(input_train, output_train,
batch_size = batch_size,
epochs = epochs)
score = nn_model.evaluate(input_test, output_test, verbose=0)
print('\nAccuracy :', score[1])
Epoch 1/3
60000/60000 [==============================] - 17s - loss: 0.1402 - acc: 0.9583
Epoch 2/3
60000/60000 [==============================] - 16s - loss: 0.0739 - acc: 0.9771
Epoch 3/3
60000/60000 [==============================] - 16s - loss: 0.0522 - acc: 0.9843
Accuracy : 0.9785
Model 03 - Adagrad
adagrad = Adagrad(lr=0.01, epsilon=1e-08, decay=0.0)
nn_model.compile(loss='categorical_crossentropy',
optimizer=adagrad,
metrics=['accuracy'])
nn_model.fit(input_train, output_train,
batch_size = batch_size,
epochs = epochs)
score = nn_model.evaluate(input_test, output_test, verbose=0)
print('\nAccuracy :', score[1])
Epoch 1/3
60000/60000 [==============================] - 16s - loss: 0.0906 - acc: 0.9845
Epoch 2/3
60000/60000 [==============================] - 16s - loss: 0.0132 - acc: 0.9966
Epoch 3/3
60000/60000 [==============================] - 16s - loss: 0.0079 - acc: 0.9982
Accuracy : 0.9841
Model 04 - Adadelta
adadelta = Adadelta(lr=1.0, rho=0.95, epsilon=1e-08, decay=0.0)
nn_model.compile(loss='categorical_crossentropy',
optimizer=adadelta,
metrics=['accuracy'])
nn_model.fit(input_train, output_train,
batch_size = batch_size,
epochs = epochs)
score = nn_model.evaluate(input_test, output_test, verbose=0)
print('\nAccuracy :', score[1])
Epoch 1/3
60000/60000 [==============================] - 22s - loss: 0.0055 - acc: 0.9988
Epoch 2/3
60000/60000 [==============================] - 22s - loss: 0.0040 - acc: 0.9992
Epoch 3/3
60000/60000 [==============================] - 23s - loss: 0.0032 - acc: 0.9993
Accuracy : 0.9849
Model 05 - Adam
- beta_1 & beta_2 : float, 0 < beta < 1. Generally close to 1.
adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
nn_model.compile(loss='categorical_crossentropy',
optimizer=adam,
metrics=['accuracy'])
nn_model.fit(input_train, output_train,
batch_size = batch_size,
epochs = epochs)
score = nn_model.evaluate(input_test, output_test, verbose=0)
print('\nAccuracy :', score[1])
Epoch 1/3
60000/60000 [==============================] - 20s - loss: 0.0297 - acc: 0.9900
Epoch 2/3
60000/60000 [==============================] - 19s - loss: 0.0218 - acc: 0.9929
Epoch 3/3
60000/60000 [==============================] - 19s - loss: 0.0164 - acc: 0.9948
Accuracy : 0.9831
Model 06 - Adamax
- variant of Adam based on the infinity norm
adamax = Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
nn_model.compile(loss='categorical_crossentropy',
optimizer=adamax,
metrics=['accuracy'])
nn_model.fit(input_train, output_train,
batch_size = batch_size,
epochs = epochs)
score = nn_model.evaluate(input_test, output_test, verbose=0)
print('\nAccuracy :', score[1])
Epoch 1/3
60000/60000 [==============================] - 17s - loss: 0.0054 - acc: 0.9984
Epoch 2/3
60000/60000 [==============================] - 17s - loss: 0.0012 - acc: 0.9998
Epoch 3/3
60000/60000 [==============================] - 17s - loss: 6.6886e-04 - acc: 1.0000
Accuracy : 0.985
Model 07 - Nadam
- Nesterov Adam optimizer
nadam = Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004)
nn_model.compile(loss='categorical_crossentropy',
optimizer=nadam,
metrics=['accuracy'])
nn_model.fit(input_train, output_train,
batch_size = batch_size,
epochs = epochs)
score = nn_model.evaluate(input_test, output_test, verbose=0)
print('\nAccuracy :', score[1])
Epoch 1/3
60000/60000 [==============================] - 22s - loss: 0.0438 - acc: 0.9869
Epoch 2/3
60000/60000 [==============================] - 23s - loss: 0.0311 - acc: 0.9902
Epoch 3/3
60000/60000 [==============================] - 23s - loss: 0.0276 - acc: 0.9917
Accuracy : 0.9776