tf32: 一个简单的cnn模型：人脸特征点训练

程序员文章站 2024-03-25 19:56:22

...

你要的答案或许都在这里：小鹏的博客目录

一般在做人脸识别：人脸身份识别，人脸年龄识别，人脸性别识别，人脸表情识别等，都要进行人脸对齐，那么人脸对齐方法就要获取人脸的特征点：

下面是一个很早之前的简单代码，可以根据需要使用目前好的网络：

数据下载：链接：https://pan.baidu.com/s/1oAsq5eU 密码：q952

facial_keypoints_detection.py

import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from util import *

import time
import sys


def error_measure(predictions, labels):
    return np.sum(np.power(predictions - labels, 2)) / (2 * predictions.shape[0])


if __name__ == '__main__':
    train_dataset, train_labels = load_data()
    test_dataset, _ = load_data(test=True)

    # Generate a validation set.
    validation_dataset = train_dataset[:VALIDATION_SIZE, ...]
    validation_labels = train_labels[:VALIDATION_SIZE]
    train_dataset = train_dataset[VALIDATION_SIZE:, ...]
    train_labels = train_labels[VALIDATION_SIZE:]

    train_size = train_labels.shape[0]
    print("train size is %d" % train_size)

    train_data_node = tf.placeholder(
        tf.float32,
        shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
    train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, NUM_LABELS))

    eval_data_node = tf.placeholder(
        tf.float32,
        shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))

    conv1_weights = tf.Variable(
        tf.truncated_normal([5, 5, NUM_CHANNELS, 32],  # 5x5 filter, depth 32.
                            stddev=0.1,
                            seed=SEED))
    conv1_biases = tf.Variable(tf.zeros([32]))

    conv2_weights = tf.Variable(
        tf.truncated_normal([5, 5, 32, 64],
                            stddev=0.1,
                            seed=SEED))
    conv2_biases = tf.Variable(tf.constant(0.1, shape=[64]))

    fc1_weights = tf.Variable(  # fully connected, depth 512.
                                tf.truncated_normal(
                                    [IMAGE_SIZE // 4 * IMAGE_SIZE // 4 * 64, 512],
                                    stddev=0.1,
                                    seed=SEED))
    fc1_biases = tf.Variable(tf.constant(0.1, shape=[512]))

    fc2_weights = tf.Variable(  # fully connected, depth 512.
                                tf.truncated_normal(
                                    [512, 512],
                                    stddev=0.1,
                                    seed=SEED))
    fc2_biases = tf.Variable(tf.constant(0.1, shape=[512]))

    fc3_weights = tf.Variable(
        tf.truncated_normal([512, NUM_LABELS],
                            stddev=0.1,
                            seed=SEED))
    fc3_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS]))

    # We will replicate the model structure for the training subgraph, as well
    # as the evaluation subgraphs, while sharing the trainable parameters.
    def model(data, train=False):
        """The Model definition."""
        # 2D convolution, with 'SAME' padding (i.e. the output feature map has
        # the same size as the input). Note that {strides} is a 4D array whose
        # shape matches the data layout: [image index, y, x, depth].
        conv = tf.nn.conv2d(data,
                            conv1_weights,
                            strides=[1, 1, 1, 1],
                            padding='SAME')
        # conv = tf.Print(conv, [conv], "conv1: ", summarize=10)

        # Bias and rectified linear non-linearity.
        relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases))
        # relu = tf.Print(relu, [relu], "relu1: ", summarize=10)

        # Max pooling. The kernel size spec {ksize} also follows the layout of
        # the data. Here we have a pooling window of 2, and a stride of 2.
        pool = tf.nn.max_pool(relu,
                              ksize=[1, 2, 2, 1],
                              strides=[1, 2, 2, 1],
                              padding='SAME')
        # pool = tf.Print(pool, [pool], "pool1: ", summarize=10)

        conv = tf.nn.conv2d(pool,
                            conv2_weights,
                            strides=[1, 1, 1, 1],
                            padding='SAME')
        # conv = tf.Print(conv, [conv], "conv2: ", summarize=10)

        relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases))
        # relu = tf.Print(relu, [relu], "relu2: ", summarize=10)

        pool = tf.nn.max_pool(relu,
                              ksize=[1, 2, 2, 1],
                              strides=[1, 2, 2, 1],
                              padding='SAME')
        # pool = tf.Print(pool, [pool], "pool2: ", summarize=10)

        # Reshape the feature map cuboid into a 2D matrix to feed it to the
        # fully connected layers.
        pool_shape = pool.get_shape().as_list()
        reshape = tf.reshape(
            pool,
            [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]])
        # reshape = tf.Print(reshape, [reshape], "reshape: ", summarize=10)

        # Fully connected layer. Note that the '+' operation automatically
        # broadcasts the biases.
        hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
        # hidden = tf.Print(hidden, [hidden], "hidden1: ", summarize=10)

        # Add a 50% dropout during training only. Dropout also scales
        # activations such that no rescaling is needed at evaluation time.
        if train:
            hidden = tf.nn.dropout(hidden, 0.5, seed=SEED)

        hidden = tf.nn.relu(tf.matmul(hidden, fc2_weights) + fc2_biases)
        # hidden = tf.Print(hidden, [hidden], "hidden2: ", summarize=10)

        if train:
            hidden = tf.nn.dropout(hidden, 0.5, seed=SEED)

        # return tf.nn.tanh(tf.matmul(hidden, fc3_weights) + fc3_biases)
        return tf.matmul(hidden, fc3_weights) + fc3_biases

    train_prediction = model(train_data_node, True)

    # Minimize the squared errors
    loss = tf.reduce_mean(tf.reduce_sum(tf.square(train_prediction - train_labels_node), 1))

    # L2 regularization for the fully connected parameters.
    regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
                    tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases) +
                    tf.nn.l2_loss(fc3_weights) + tf.nn.l2_loss(fc3_biases))
    # Add the regularization term to the loss.
    loss += 1e-7 * regularizers

    # Predictions for the test and validation, which we'll compute less often.
    eval_prediction = model(eval_data_node)

    # Optimizer: set up a variable that's incremented once per batch and
    # controls the learning rate decay.
    global_step = tf.Variable(0, trainable=False)

    # Decay once per epoch, using an exponential schedule starting at 0.01.
    learning_rate = tf.train.exponential_decay(
        1e-3,                      # Base learning rate.
        global_step * BATCH_SIZE,  # Current index into the dataset.
        train_size,                # Decay step.
        0.95,                      # Decay rate.
        staircase=True)

    # train_step = tf.train.AdamOptimizer(5e-3).minimize(loss)
    # train_step = tf.train.GradientDescentOptimizer(1e-4).minimize(loss)
    # train_step = tf.train.MomentumOptimizer(1e-4, 0.95).minimize(loss)
    train_step = tf.train.AdamOptimizer(learning_rate, 0.95).minimize(loss, global_step=global_step)

    init = tf.initialize_all_variables()
    sess = tf.InteractiveSession()
    sess.run(init)

    loss_train_record = list() # np.zeros(n_epoch)
    loss_valid_record = list() # np.zeros(n_epoch)
    start_time = time.gmtime()

    # early stopping
    best_valid = np.inf
    best_valid_epoch = 0

    current_epoch = 0

    while current_epoch < NUM_EPOCHS:
        # Shuffle data
        shuffled_index = np.arange(train_size)
        np.random.shuffle(shuffled_index)
        train_dataset = train_dataset[shuffled_index]
        train_labels = train_labels[shuffled_index]

        for step in xrange(train_size / BATCH_SIZE):
            offset = step * BATCH_SIZE
            batch_data = train_dataset[offset:(offset + BATCH_SIZE), ...]
            batch_labels = train_labels[offset:(offset + BATCH_SIZE)]
            # This dictionary maps the batch data (as a numpy array) to the
            # node in the graph is should be fed to.
            feed_dict = {train_data_node: batch_data,
                         train_labels_node: batch_labels}
            _, loss_train, current_learning_rate = sess.run([train_step, loss, learning_rate], feed_dict=feed_dict)

        # After one epoch, make validation
        eval_result = eval_in_batches(validation_dataset, sess, eval_prediction, eval_data_node)
        loss_valid = error_measure(eval_result, validation_labels)

        print 'Epoch %04d, train loss %.8f, validation loss %.8f, train/validation %0.8f, learning rate %0.8f' % (
            current_epoch,
            loss_train, loss_valid,
            loss_train / loss_valid,
            current_learning_rate
        )
        loss_train_record.append(np.log10(loss_train))
        loss_valid_record.append(np.log10(loss_valid))
        sys.stdout.flush()

        if loss_valid < best_valid:
            best_valid = loss_valid
            best_valid_epoch = current_epoch
        elif best_valid_epoch + EARLY_STOP_PATIENCE < current_epoch:
            print("Early stopping.")
            print("Best valid loss was {:.6f} at epoch {}.".format(best_valid, best_valid_epoch))
            break

        current_epoch += 1

    print('train finish')
    end_time = time.gmtime()
    print time.strftime('%H:%M:%S', start_time)
    print time.strftime('%H:%M:%S', end_time)

    generate_submission(test_dataset, sess, eval_prediction, eval_data_node)

    # Show an example of comparison
    i = 0
    img = validation_dataset[i]
    lab_y = validation_labels[i]
    lab_p = eval_in_batches(validation_dataset, sess, eval_prediction, eval_data_node)[0]
    plot_sample(img, lab_p, lab_y)

    plot_learning_curve(loss_train_record, loss_valid_record)

util.py

import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import pandas as pd
import numpy as np
from sklearn.externals import joblib
from sklearn.learning_curve import learning_curve

FTRAIN = 'data/training.csv'
FTEST = 'data/test.csv'
FLOOKUP = 'data/IdLookupTable.csv'

BATCH_SIZE = 64
EVAL_BATCH_SIZE = 64
IMAGE_SIZE = 96
NUM_CHANNELS = 1
SEED = 66478  # Set to None for random seed.
NUM_LABELS = 30
NUM_EPOCHS = 1000
VALIDATION_SIZE = 100  # Size of the validation set.
EARLY_STOP_PATIENCE = 100


def load_data(test=False):
    fname = FTEST if test else FTRAIN
    df = pd.read_csv(fname)

    cols = df.columns[:-1]

    df['Image'] = df['Image'].apply(lambda im: np.fromstring(im, sep=' ') / 255.0)
    df = df.dropna()

    X = np.vstack(df['Image'])
    X = X.reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 1)
    if not test:
        # y = (df[cols].values -48) / 48.0
        y = df[cols].values / 96.0
        X, y = shuffle(X, y)
        joblib.dump(cols, 'data/cols.pkl', compress=3)

    else:
        y = None
    return X, y


def plot_sample(x, y, truth=None):
    img = x.reshape(96, 96)
    plt.imshow(img, cmap='gray')
    if y is not None:
        plt.scatter(y[0::2] * 96, y[1::2] * 96)
    if truth is not None:
        plt.scatter(truth[0::2] * 96, truth[1::2] * 96, c='r', marker='x')
    plt.savefig("data/img.png")


# Small utility function to evaluate a dataset by feeding batches of data to
# {eval_data} and pulling the results from {eval_predictions}.
# Saves memory and enables this to run on smaller GPUs.
def eval_in_batches(data, sess, eval_prediction, eval_data_node):
    """Get all predictions for a dataset by running it in small batches."""
    size = data.shape[0]
    if size < EVAL_BATCH_SIZE:
        raise ValueError("batch size for evals larger than dataset: %d" % size)
    predictions = np.ndarray(shape=(size, NUM_LABELS), dtype=np.float32)
    for begin in xrange(0, size, EVAL_BATCH_SIZE):
        end = begin + EVAL_BATCH_SIZE
        if end <= size:
            predictions[begin:end, :] = sess.run(
                eval_prediction,
                feed_dict={eval_data_node: data[begin:end, ...]})
        else:
            batch_predictions = sess.run(
                eval_prediction,
                feed_dict={eval_data_node: data[-EVAL_BATCH_SIZE:, ...]})
            predictions[begin:, :] = batch_predictions[begin - size:, :]
    return predictions


def plot_learning_curve(loss_train_record, loss_valid_record):
    plt.figure()
    plt.plot(loss_train_record, label='train')
    plt.plot(loss_valid_record, c='r', label='validation')
    plt.ylabel("RMSE")
    plt.legend(loc='upper left', frameon=False)
    plt.savefig("data/learning_curve.png")


def generate_submission(test_dataset, sess, eval_prediction, eval_data_node):
    test_labels = eval_in_batches(test_dataset, sess, eval_prediction, eval_data_node)
    test_labels *= 96.0
    test_labels = test_labels.clip(0, 96)

    lookup_table = pd.read_csv(FLOOKUP)
    values = []

    cols = joblib.load('data/cols.pkl')

    for index, row in lookup_table.iterrows():
        values.append((
            row['RowId'],
            test_labels[row.ImageId - 1][np.where(cols == row.FeatureName)[0][0]],
        ))
    submission = pd.DataFrame(values, columns=('RowId', 'Location'))
    submission.to_csv('data/submission.csv', index=False)


def generate_learning_curve(estimator, title, scoring,  X, y, ylim=None, cv=None,
                            n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5)):
    """
    Generate a simple plot of the test and traning learning curve.
    Parameters
    ----------
    estimator : object type that implements the "fit" and "predict" methods
    An object of that type which is cloned for each validation.
    title : string
    Title for the chart.
    X : array-like, shape (n_samples, n_features)
    Training vector, where n_samples is the number of samples and
    n_features is the number of features.
    y : array-like, shape (n_samples) or (n_samples, n_features), optional
    Target relative to X for classification or regression;
    None for unsupervised learning.
    ylim : tuple, shape (ymin, ymax), optional
    Defines minimum and maximum yvalues plotted.
    cv : integer, cross-validation generator, optional
    If an integer is passed, it is the number of folds (defaults to 3).
    Specific cross-validation objects can be passed, see
    sklearn.cross_validation module for the list of possible objects
    n_jobs : integer, optional
    Number of jobs to run in parallel (default 1).
    """
    plt.figure()
    plt.title(title)
    if ylim is not None:
        plt.ylim(*ylim)
    plt.xlabel("Training examples")
    plt.ylabel("Score")
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, scoring=scoring, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    plt.grid()
    plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.1,
                     color="r")
    plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.1, color="g")
    plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
             label="Training score")
    plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
             label="Cross-validation score")
    plt.legend(loc="best")
    plt.savefig("data/learning_curve.png")


def make_submission(test_labels):
    test_labels *= 96.0
    test_labels = test_labels.clip(0, 96)

    lookup_table = pd.read_csv(FLOOKUP)
    values = []

    cols = joblib.load('data/cols.pkl')

    for index, row in lookup_table.iterrows():
        values.append((
            row['RowId'],
            test_labels[row.ImageId - 1][np.where(cols == row.FeatureName)[0][0]],
        ))
    submission = pd.DataFrame(values, columns=('RowId', 'Location'))
    submission.to_csv('data/submission.csv', index=False)


def load_dataframe(test=False):
    fname = FTEST if test else FTRAIN
    df = pd.read_csv(fname)
    cols = df.columns[:-1]
    df['Image'] = df['Image'].apply(lambda im: np.fromstring(im, sep=' ') / 255.0)

    if not test:
        df[cols] = df[cols].apply(lambda y: y / 96.0)
    return df


def extract_test_data(df):
    X = np.vstack(df['Image'].values)
    X = X.astype(np.float32)
    X = X.reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 1)
    return X

def extract_train_data(df, flip_indices, cols):
    data = df[list(cols) + ['Image']].copy()
    data = data.dropna()

    X = np.vstack(data['Image'].values)
    X = X.astype(np.float32)
    X = X.reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 1)

    y = data[data.columns[:-1]].values
    if len(flip_indices) != 0:
        X_flip = X[:, :, ::-1, :]
        X = np.vstack([X, X_flip])
        y_flip = y.copy()
        y_flip[:, ::2] *= -1
        y_flip[:, ::2] += 1
        for a, b in flip_indices:
            y_flip[:, [a, b]] = y_flip[:, [b, a]]

        y = np.vstack([y, y_flip])

    X, y = shuffle(X, y, random_state=42)  # shuffle train data
    y = y.astype(np.float32)

    return X, y


def create_submission(predicted_labels, columns):
    predicted_labels *= 96.0
    predicted_labels = predicted_labels.clip(0, 96)
    df = pd.DataFrame(predicted_labels, columns=columns)
    lookup_table = pd.read_csv(FLOOKUP)
    values = []

    for index, row in lookup_table.iterrows():
        values.append((
            row['RowId'],
            df.ix[row.ImageId - 1][row.FeatureName],
        ))

    submission = pd.DataFrame(values, columns=('RowId', 'Location'))
    submission.to_csv("data/submission.csv", index=False)


SPECIALIST_SETTINGS = [
    dict(
        columns=(
            'left_eye_center_x', 'left_eye_center_y',
            'right_eye_center_x', 'right_eye_center_y',
        ),
        flip_indices=((0, 2), (1, 3)),
        ),

    dict(
        columns=(
            'nose_tip_x', 'nose_tip_y',
        ),
        flip_indices=(),
        ),

    dict(
        columns=(
            'mouth_left_corner_x', 'mouth_left_corner_y',
            'mouth_right_corner_x', 'mouth_right_corner_y',
            'mouth_center_top_lip_x', 'mouth_center_top_lip_y',
        ),
        flip_indices=((0, 2), (1, 3)),
        ),

    dict(
        columns=(
            'mouth_center_bottom_lip_x',
            'mouth_center_bottom_lip_y',
        ),
        flip_indices=(),
        ),

    dict(
        columns=(
            'left_eye_inner_corner_x', 'left_eye_inner_corner_y',
            'right_eye_inner_corner_x', 'right_eye_inner_corner_y',
            'left_eye_outer_corner_x', 'left_eye_outer_corner_y',
            'right_eye_outer_corner_x', 'right_eye_outer_corner_y',
        ),
        flip_indices=((0, 2), (1, 3), (4, 6), (5, 7)),
        ),

    dict(
        columns=(
            'left_eyebrow_inner_end_x', 'left_eyebrow_inner_end_y',
            'right_eyebrow_inner_end_x', 'right_eyebrow_inner_end_y',
            'left_eyebrow_outer_end_x', 'left_eyebrow_outer_end_y',
            'right_eyebrow_outer_end_x', 'right_eyebrow_outer_end_y',
        ),
        flip_indices=((0, 2), (1, 3), (4, 6), (5, 7)),
        ),
    ]

上面的facial_keypoints_detection.py代码可以换成这个：准确率更高。

#coding=utf-8
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from util import *

import time
import sys
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"


def error_measure(predictions, labels, sess):
    return np.sum(np.power(predictions - labels, 2)) / (2 * predictions.shape[0])
    #loss = tf.reduce_mean(tf.reduce_sum(tf.square(predictions - labels), 1))
    #loss = sess.run(loss)
    #return loss

def fully_connected(prev_layer, num_units):

    layer = tf.layers.dense(prev_layer, num_units, use_bias=True, activation=None)
    # layer = tf.layers.batch_normalization(layer, training=is_training)
    layer = tf.nn.relu(layer)
    return layer


def conv_layer(prev_layer, layer_depth):

    strides = 2 if layer_depth % 3 == 0 else 1
    conv_layer = tf.layers.conv2d(prev_layer, layer_depth*16, 3, strides, 'same', use_bias=True, activation=None)
    # conv_layer = tf.layers.batch_normalization(conv_layer, training=is_training)
    conv_layer = tf.nn.relu(conv_layer)

    return conv_layer


if __name__ == '__main__':
    train_dataset, train_labels = load_data()
    test_dataset, _ = load_data(test=True)

    # Generate a validation set.
    validation_dataset = train_dataset[:VALIDATION_SIZE, ...]
    validation_labels = train_labels[:VALIDATION_SIZE]
    train_dataset = train_dataset[VALIDATION_SIZE:, ...]
    train_labels = train_labels[VALIDATION_SIZE:]

    train_size = train_labels.shape[0]
    print("train size is %d" % train_size)

    train_data_node = tf.placeholder(
        tf.float32,
        shape=(None, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
    train_labels_node = tf.placeholder(tf.float32, shape=(None, NUM_LABELS))

    eval_data_node = tf.placeholder(
        tf.float32,
        shape=(None, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))


    # We will replicate the model structure for the training subgraph, as well
    # as the evaluation subgraphs, while sharing the trainable parameters.
    def model(data, train=False):
        """The Model definition."""

        # Feed the inputs into a series of 20 convolutional layers
        layer = tf.reshape(data, shape=[-1, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS])
        for layer_i in [2, 4, 8, 16]:
            for n in range(1):
                layer = conv_layer(layer, layer_i)
            layer = tf.nn.max_pool(layer, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')

        # Flatten the output from the convolutional layers
        orig_shape = layer.get_shape().as_list()
        layer = tf.reshape(layer, shape=[-1, orig_shape[1] * orig_shape[2] * orig_shape[3]])

        # Add one fully connected layer
        layer = fully_connected(layer, 512)
        # layer = tf.nn.dropout(layer, keep_prob_fc)
        if train:
            layer = tf.nn.dropout(layer, 0.5)
        else:
            layer = tf.nn.dropout(layer, 1.0)
        layer = fully_connected(layer, 512)

        if train:
            layer = tf.nn.dropout(layer, 0.5)
        else:
            layer = tf.nn.dropout(layer, 1.0)

        logits = tf.layers.dense(layer, NUM_LABELS)

        #if test:
        #    logits = tf.nn.sigmoid(logits)

        return logits


    train_prediction = model(train_data_node, True)

    # Minimize the squared errors
    loss = tf.reduce_mean(tf.reduce_sum(tf.square(train_prediction - train_labels_node), 1))

    # L2 regularization for the fully connected parameters.
    # regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
    #                tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases) +
    #                tf.nn.l2_loss(fc3_weights) + tf.nn.l2_loss(fc3_biases))
    # Add the regularization term to the loss.
    # loss += 1e-7 * regularizers

    # Predictions for the test and validation, which we'll compute less often.
    eval_prediction = model(eval_data_node)

    # Optimizer: set up a variable that's incremented once per batch and
    # controls the learning rate decay.
    global_step = tf.Variable(0, trainable=False)

    # Decay once per epoch, using an exponential schedule starting at 0.01.
    learning_rate = tf.train.exponential_decay(
        1e-3,                      # Base learning rate.
        global_step * BATCH_SIZE,  # Current index into the dataset.
        train_size,                # Decay step.
        1.0,                      # Decay rate.
        staircase=True)

    # train_step = tf.train.AdamOptimizer(5e-3).minimize(loss)
    # train_step = tf.train.GradientDescentOptimizer(1e-4).minimize(loss)
    # train_step = tf.train.MomentumOptimizer(1e-4, 0.95).minimize(loss)
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)

    init = tf.initialize_all_variables()
    sess = tf.InteractiveSession()
    sess.run(init)

    loss_train_record = list() # np.zeros(n_epoch)
    loss_valid_record = list() # np.zeros(n_epoch)
    start_time = time.gmtime()

    # early stopping
    best_valid = np.inf
    best_valid_epoch = 0

    current_epoch = 0

    while current_epoch < NUM_EPOCHS:
        # Shuffle data
        shuffled_index = np.arange(train_size)
        np.random.shuffle(shuffled_index)
        train_dataset = train_dataset[shuffled_index]
        train_labels = train_labels[shuffled_index]

        for step in xrange(train_size / BATCH_SIZE):
            offset = step * BATCH_SIZE
            batch_data = train_dataset[offset:(offset + BATCH_SIZE), ...]
            batch_labels = train_labels[offset:(offset + BATCH_SIZE)]
            # This dictionary maps the batch data (as a numpy array) to the
            # node in the graph is should be fed to.
            feed_dict = {train_data_node: batch_data,
                         train_labels_node: batch_labels}
            _, loss_train, current_learning_rate = sess.run([train_step, loss, learning_rate], feed_dict=feed_dict)

        # After one epoch, make validation
        # eval_result = eval_in_batches(validation_dataset, sess, eval_prediction, eval_data_node)
        # loss_valid = error_measure(eval_result, validation_labels, sess)
        feed_dict = {train_data_node: validation_dataset,
                         train_labels_node: validation_labels}
        loss_valid = sess.run(loss, feed_dict=feed_dict)

        print ('Epoch %04d, train loss %.8f, validation loss %.8f, train/validation %0.8f, learning rate %0.8f' % (
            current_epoch,
            loss_train, loss_valid,
            loss_train / loss_valid,
            current_learning_rate
        ))
        loss_train_record.append(np.log10(loss_train))
        loss_valid_record.append(np.log10(loss_valid))
        sys.stdout.flush()

        if loss_valid < best_valid:
            best_valid = loss_valid
            best_valid_epoch = current_epoch
        elif best_valid_epoch + EARLY_STOP_PATIENCE < current_epoch:
            print("Early stopping.")
            print("Best valid loss was {:.6f} at epoch {}.".format(best_valid, best_valid_epoch))
            break

        current_epoch += 1

    print('train finish')
    end_time = time.gmtime()
    print (time.strftime('%H:%M:%S', start_time))
    print (time.strftime('%H:%M:%S', end_time))

    # feed_dict = {eval_data_node: test_dataset}
    # test_labels  = sess.run(eval_prediction, feed_dict=feed_dict)

    generate_submission(test_dataset, sess, train_prediction, train_data_node)

    # Show an example of comparison
    i = 0
    img = validation_dataset[i]
    lab_y = validation_labels[i]
    lab_p = eval_in_batches(validation_dataset, sess, eval_prediction, eval_data_node)[0]
    plot_sample(img, lab_p, lab_y)

    plot_learning_curve(loss_train_record, loss_valid_record)