欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

利用深度学习进行时间序列分类

程序员文章站 2022-07-12 08:11:10
...

问题描述:有一批tsv格式(或csv格式)的时间序列数据,需要按照标签进行分类
所用语言:python
数据类型:tsv格式,可以用Excel打开,数据的第0列为类别标签,第1列往后为时间序列数据。
利用深度学习进行时间序列分类
代码中用到的示例数据下载地址:
代码:

from __future__ import print_function
 
from keras.models import Model
from keras.utils import np_utils
import numpy as np
import pandas as pd
import keras 
from keras.callbacks import ReduceLROnPlateau
import matplotlib.pyplot as plt
      

#训练次数
nb_epochs = 1000

#读取训练集和测试集、标签
#第0列为标签
#第1列往后为数据
x_train = np.loadtxt('./Adiac/Ham_TRAIN.tsv', delimiter = '\t')[:,1:]#tsv文件中使用\t作为分隔符,如果是csv文件使用‘,’作为分隔符就把这里的关键字改一下
y_train = np.loadtxt('./Adiac/Ham_TRAIN.tsv', delimiter = '\t')[:,0]
x_test = np.loadtxt('./Adiac/Ham_TEST.tsv', delimiter = '\t')[:,1:]
y_test = np.loadtxt('./Adiac/Ham_TEST.tsv', delimiter = '\t')[:,0]



#有几个类别,np.nique函数是去除数组中的重复数字
nb_classes = len(np.unique(y_test))
#设定批的大小,//表示整除
batch_size = min(x_train.shape[0]//10, 16)

#归一化
y_train = (y_train - y_train.min())/(y_train.max()-y_train.min())*(nb_classes-1)
y_test = (y_test - y_test.min())/(y_test.max()-y_test.min())*(nb_classes-1)

#转换为独热编码
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

x_train_mean = x_train.mean()
#std代表标准差
x_train_std = x_train.std()
#z-score标准化
x_train = (x_train - x_train_mean)/(x_train_std)

x_test = (x_test - x_train_mean)/(x_train_std)

x_train = x_train.reshape(x_train.shape + (1,1,))
x_test = x_test.reshape(x_test.shape + (1,1,))

x = keras.layers.Input(x_train.shape[1:])
#    drop_out = Dropout(0.2)(x)
conv1 = keras.layers.Conv2D(128, 8, 1, border_mode='same')(x)
conv1 = keras.layers.normalization.BatchNormalization()(conv1)
conv1 = keras.layers.Activation('relu')(conv1)

#    drop_out = Dropout(0.2)(conv1)
conv2 = keras.layers.Conv2D(256, 5, 1, border_mode='same')(conv1)
conv2 = keras.layers.normalization.BatchNormalization()(conv2)
conv2 = keras.layers.Activation('relu')(conv2)

#    drop_out = Dropout(0.2)(conv2)
conv3 = keras.layers.Conv2D(128, 3, 1, border_mode='same')(conv2)
conv3 = keras.layers.normalization.BatchNormalization()(conv3)
conv3 = keras.layers.Activation('relu')(conv3)

full = keras.layers.pooling.GlobalAveragePooling2D()(conv3)    
out = keras.layers.Dense(nb_classes, activation='softmax')(full)


model = Model(input=x, output=out)

optimizer = keras.optimizers.Adam()
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

reduce_lr = ReduceLROnPlateau(monitor = 'loss', factor=0.5,
                  patience=50, min_lr=0.0001) 
history = model.fit(x_train, Y_train, batch_size=batch_size, nb_epoch=nb_epochs,
          verbose=1, validation_data=(x_test, Y_test), callbacks = [reduce_lr])
model.save('FCN_CBF_1500.h5')
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()


结果:
训练1000次后,
利用深度学习进行时间序列分类
利用深度学习进行时间序列分类

另外一个版本的代码:
(更详细)转自格拉迪沃的博客
数据存放路径:
数据放在Ham文件夹下(包括训练集和测试集),代码放在文件夹外面,与文件夹并列。
或者直接更改代码中的路径即可。

#!/usr/bin/env 
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 30 20:11:19 2016

@author: stephen
"""
 
from __future__ import print_function
 
from keras.models import Model
from keras.utils import np_utils
import numpy as np
import pandas as pd
import keras 
from keras.callbacks import ReduceLROnPlateau
import matplotlib.pyplot as plt
      
def readucr(filename):
    data = np.loadtxt(filename, delimiter = '\t')#tsv文件中使用\t作为分隔符,如果是csv文件使用‘,’作为分隔符就把这里的关键字改一下
    Y = data[:,0]#第0列为标签
    X = data[:,1:]#第1列往后为数据
    return X, Y
  
nb_epochs = 1000

#根据需要把这里的文件夹名字做更改,diac是众多数据集中的一个。
flist  = ['Ham']
for each in flist:
    fname = each
    x_train, y_train = readucr(fname+'/'+fname+'_TRAIN.tsv')
    x_test, y_test = readucr(fname+'/'+fname+'_TEST.tsv')
    nb_classes = len(np.unique(y_test))
    batch_size = min(x_train.shape[0]/10, 16)
    
    y_train = (y_train - y_train.min())/(y_train.max()-y_train.min())*(nb_classes-1)
    y_test = (y_test - y_test.min())/(y_test.max()-y_test.min())*(nb_classes-1)
    
    
    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)
    
    x_train_mean = x_train.mean()
    x_train_std = x_train.std()
    x_train = (x_train - x_train_mean)/(x_train_std)
     
    x_test = (x_test - x_train_mean)/(x_train_std)
    x_train = x_train.reshape(x_train.shape + (1,1,))
    x_test = x_test.reshape(x_test.shape + (1,1,))

    x = keras.layers.Input(x_train.shape[1:])
#    drop_out = Dropout(0.2)(x)
    conv1 = keras.layers.Conv2D(128, 8, 1, border_mode='same')(x)
    conv1 = keras.layers.normalization.BatchNormalization()(conv1)
    conv1 = keras.layers.Activation('relu')(conv1)
    
#    drop_out = Dropout(0.2)(conv1)
    conv2 = keras.layers.Conv2D(256, 5, 1, border_mode='same')(conv1)
    conv2 = keras.layers.normalization.BatchNormalization()(conv2)
    conv2 = keras.layers.Activation('relu')(conv2)
    
#    drop_out = Dropout(0.2)(conv2)
    conv3 = keras.layers.Conv2D(128, 3, 1, border_mode='same')(conv2)
    conv3 = keras.layers.normalization.BatchNormalization()(conv3)
    conv3 = keras.layers.Activation('relu')(conv3)
    
    full = keras.layers.pooling.GlobalAveragePooling2D()(conv3)    
    out = keras.layers.Dense(nb_classes, activation='softmax')(full)
    
    
    model = Model(input=x, output=out)
     
    optimizer = keras.optimizers.Adam()
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
     
    reduce_lr = ReduceLROnPlateau(monitor = 'loss', factor=0.5,
                      patience=50, min_lr=0.0001) 
    history = model.fit(x_train, Y_train, batch_size=batch_size, nb_epoch=nb_epochs,
              verbose=1, validation_data=(x_test, Y_test), callbacks = [reduce_lr])
    model.save('FCN_CBF_1500.h5')
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()


运行结果:
利用深度学习进行时间序列分类