语音预处理(相同话语不同人的对比)
程序员文章站
2022-06-27 14:16:19
显示同一段语音,不同说话人对比下效果波形图import waveimport matplotlib.pyplot as pltimport numpy as npfrom scipy.io import wavfile"""读取双通道波形并绘制波形图"""f = wavfile.read("./test1.wav")nframes = len(f[1]) #获取采样点数nchannels = 1 # 通道数=1framerate = f[0] # 获取采样频率wave_data...
显示同一段语音,不同说话人对比下效果
波形图
import wave
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import wavfile
"""读取双通道波形并绘制波形图"""
f = wavfile.read("./test1.wav")
nframes = len(f[1]) #获取采样点数
nchannels = 1 # 通道数=1
framerate = f[0] # 获取采样频率
wave_data = f[1] # 获取音频数据
wave_data = np.reshape(wave_data, [nframes, nchannels])
# 最后通过采样点数和取样频率计算出每个取样的时间
time = np.arange(0, nframes) * (1.0 / framerate)
plt.figure()
plt.subplot(3, 1, 1)
plt.plot(time, wave_data[:, 0])
plt.xlabel("time (seconds)")
plt.ylabel("Amplitude")
plt.title("people_one")
plt.grid() # 标尺
plt.subplot(3, 1, 3)
f = wavfile.read("./test2.wav")
nframes = len(f[1]) #获取采样点数
nchannels = 1 # 通道数=1
framerate = f[0] # 获取采样频率
wave_data = f[1] # 获取音频数据
wave_data = np.reshape(wave_data, [nframes, nchannels])
time = np.arange(0, nframes) * (1.0 / framerate)
plt.plot(time, wave_data[:, 0], c="g")
plt.xlabel("time (seconds)")
plt.ylabel("Amplitude")
plt.title("people_two")
plt.grid()
plt.show()
从时域转化到频域
看看效果是什么
import numpy as np
from scipy.io import wavfile
import matplotlib.pyplot as plt
def fft(path):
sampling_freq, audio = wavfile.read(path) # 读取文件
audio = audio / np.max(audio) # 归一化,标准化
# 应用傅里叶变换
fft_signal = np.fft.fft(audio)
fft_signal = abs(fft_signal)
# 建立时间轴
Freq = np.arange(0, len(fft_signal))
return Freq,fft_signal
plt.figure()
plt.subplot(3, 1, 1)
test1_freq , test1_signal = fft("./test1.wav")
plt.plot(test1_freq, test1_signal, color='blue')
plt.xlabel('Freq (in kHz)')
plt.ylabel('Amplitude')
plt.title("people_one")
plt.grid() # 标尺
plt.subplot(3, 1, 3)
f = wavfile.read("./test2.wav")
test2_freq , test2_signal = fft("./test2.wav")
plt.plot(test2_freq, test2_signal, color='g')
plt.xlabel('Freq (in kHz)')
plt.ylabel('Amplitude')
plt.title("people_two")
plt.grid()
plt.show()
生成语谱图
import numpy as np
from scipy.io import wavfile
import matplotlib.pyplot as plt
from python_speech_features import mfcc, logfbank
import wave
def test(path):
f = wave.open(path, 'rb')
params = f.getparams()
nchannels, sampwidth, framerate, nframes = params[:4]
strdata = f.readframes(nframes)
wavedata = np.fromstring(strdata, dtype=np.int16)
wavedata = wavedata * 1.0 / (max(abs(wavedata)))
wavedata = np.reshape(wavedata, [nframes, nchannels]).T
f.close()
return wavedata,framerate
plt.figure()
plt.subplot(3, 1, 1)
wave_1, framerate_1 = test('test1.wav')
plt.specgram(wave_1[0], Fs=framerate_1, scale_by_freq=True, sides = 'default')
plt.ylabel('Frequency(HZ)')
plt.xlabel('Time(s)')
plt.grid() # 标尺
plt.subplot(3, 1, 3)
wave_2, framerate_2 = test('test2.wav')
plt.specgram(wave_2[0], Fs=framerate_2, scale_by_freq=True, sides = 'default')
plt.ylabel('Frequency(HZ)')
plt.xlabel('Time(s)')
plt.grid()
plt.show()
提取mfcc特征图
from scipy.io import wavfile
import matplotlib.pyplot as plt
from python_speech_features import mfcc, logfbank
def test(path):
# 读取输入音频文件
sampling_freq, audio = wavfile.read(path)
# 提取MFCC特征
mfcc_features = mfcc(audio, sampling_freq)
mfcc_features = mfcc_features.T
return mfcc_features
# plt.figure()
# plt.subplot(3, 1, 1)
mfcc_1 = test('test1.wav')
plt.matshow(mfcc_1)
plt.xlabel('MFCC_windows '+ str(mfcc_1.shape[0]) + ' ' +'MFCC_features_length ' + str(mfcc_1.shape[1]))
plt.grid() # 标尺
# plt.subplot(3, 1, 3)
mfcc_2 = test('test2.wav')
plt.matshow(mfcc_2)
plt.xlabel('MFCC_windows '+ str(mfcc_2.shape[0]) + ' ' +'MFCC_features_length ' + str(mfcc_2.shape[1]))
plt.grid() # 标尺
plt.show()
本文地址:https://blog.csdn.net/qq_40703471/article/details/109860777
上一篇: 获取浏览器可视区域宽高