对python中Librosa的mfcc步骤详解
程序员文章站
2023-09-04 12:11:25
1.对语音数据归一化
如16000hz的数据,会将每个点/32768
2.计算窗函数:(*注意librosa中不进行预处理)
3.进行数据扩展填充,他进行的是镜像填充...
1.对语音数据归一化
如16000hz的数据,会将每个点/32768
2.计算窗函数:(*注意librosa中不进行预处理)
3.进行数据扩展填充,他进行的是镜像填充("reflect")
如原数据为 12345 -》 填充为4的,左右各填充4 即:5432123454321 即:5432-12345-4321
4.分帧
5.加窗:对每一帧进行加窗,
6.进行fft傅里叶变换
librosa中fft计算,可以使用.net中的system.numerics
mathnet.numerics.integraltransforms.fourier.forward(fft_frame, fourieroptions.matlab) 计算,结果相同
7.mel计算(每一帧取20个特征点)
imports system.numerics imports mathnet.numerics imports mathnet.numerics.integraltransforms module mfcc_module public class librosa end class dim pi as double = 3.1415926535897931 public function spectrum(fft_data(,) as complex) as double(,) dim new_data(fft_data.getlength(0) - 1, fft_data.getlength(1) - 1) as double for n = 0 to fft_data.getlength(0) - 1 ' debug.print("////////////////////////spectrum//////////////////") ' debug.print("////////////////////////spectrum//////////////////") for i = 0 to fft_data.getlength(1) - 1 new_data(n, i) = fft_data(n, i).magnitudesquared ' debug.write(new_data(n, i) & " ") next next return new_data end function public function fft(data as double(,)) as complex(,) dim result(data.getlength(0) - 1, 1024) as complex '2049 加了一个 数组类型 0 开始 dim fft_frame as complex() = new complex(data.getlength(1) - 1) {} for n = 0 to data.getlength(0) - 1 for i as integer = 0 to data.getlength(1) - 1 fft_frame(i) = data(n, i) next mathnet.numerics.integraltransforms.fourier.forward(fft_frame, fourieroptions.matlab) for k = 0 to 1024 result(n, k) = fft_frame(k) next 'debug.print("fft **************") 'for each mem in fft_frame ' debug.print(mem.tostring & " ") 'next next n return result end function public function _mfcc(dct_ as double(,), power_to_db_ as double(,)) as double(,) 'dct 20,128 'power_to_db 5,128 'result = 20,5 dim result(dct_.getlength(0) - 1, power_to_db_.getlength(1) - 1) as double dim r1, r2 as double for n = 0 to dct_.getlength(0) - 1 '20 for i = 0 to power_to_db_.getlength(1) - 1 '5 r2 = 0 for k = 0 to dct_.getlength(1) - 1 '128 r1 = dct_(n, k) * power_to_db_(k, i) r2 = r2 + r1 next result(n, i) = r2 next next return result end function public function dct(n_filters as integer, n_input as integer) as double(,) dim t1 as double = 2 * n_input dim samples(n_input - 1) as double dim basis(n_filters - 1, n_input - 1) as double dim n as integer = 1 for i = 0 to n_input - 1 samples(i) = n * pi / (2 * n_input) n = n + 2 next i for i = 0 to n_input - 1 basis(0, i) = 1 / math.sqrt(n_input) next for n = 1 to n_filters - 1 for i = 0 to n_input - 1 basis(n, i) = math.cos(n * samples(i)) * math.sqrt(2 / n_input) next next return basis end function '1e-10 = 0.0000000001 public function power_to_db(s as double(,), optional ref as double = 1, optional admin as double = 0.0000000001, optional top_db as double = 80) as double(,) dim result(s.getlength(0) - 1, s.getlength(1) - 1) as double dim log_spec as double for n = 0 to s.getlength(0) - 1 for i = 0 to s.getlength(1) - 1 log_spec = 10 * math.log10(math.max(admin, s(n, i))) result(n, i) = log_spec - 10 * math.log10(math.max(admin, ref)) next next 'if top_db <> 0 then ' for n = 0 to s.getlength(0) - 1 ' for i = 0 to s.getlength(1) - 1 ' 'result(n, i) = math.max(result(n, i), result(n, i) - top_db) ' next ' next 'end if return result end function public function melspectrogram(mel_basis(,) as double, s(,) as double) as double(,) 'mel_basis 128,1025 's 5 ,1025 -> 1025,5 ' result 128,5 dim result(mel_basis.getlength(0) - 3, s.getlength(0) - 1) as double dim r1, r2 as double for n = 0 to mel_basis.getlength(0) - 3 for i = 0 to s.getlength(0) - 1 for k = 0 to mel_basis.getlength(1) - 1 r1 = mel_basis(n, k) * s(i, k) r2 = r2 + r1 next result(n, i) = r2 r2 = 0 next next return result end function public function normal(mel_f as double(), weights(,) as double) as double(,) dim enorm(mel_f.length - 2) as double ' debug.print("*************normal//////////////") ' debug.print("*************normal//////////////") for i = 0 to mel_f.length - 3 enorm(i) = 2 / (mel_f(2 + i) - mel_f(i)) next for i = 0 to weights.getlength(1) - 1 for n = 0 to weights.getlength(0) - 2 weights(n, i) = weights(n, i) * enorm(n) next next return weights end function public function weight(a as double(,), fdiff as double()) as double(,) dim lower, upper as double dim data(a.getlength(0) - 1, a.getlength(1) - 1) as double for n = 0 to a.getlength(0) - 3 for i = 0 to a.getlength(1) - 1 lower = -(a(n, i) / fdiff(n)) upper = a(n + 2, i) / fdiff(n + 1) data(n, i) = math.max(0, math.min(lower, upper)) next next return data end function public function ramps(a as double(), b as double()) as double(,) dim data(a.length - 1, b.length - 1) as double ' debug.print("ramps*********************") for n = 0 to a.length - 1 'debug.print("******") 'debug.print("------") for i = 0 to b.length - 1 data(n, i) = a(n) - b(i) 'debug.write(data(n, i) & " ") next next return data end function public function diff(arr as double()) as double() dim data(arr.length - 2) as double for i = 1 to arr.length - 1 data(i - 1) = arr(i) - arr(i - 1) 'debug.print(data(i - 1)) next return data end function '分帧 算法2 public function frame2(y as double(), optional n_ftt as integer = 2048, optional hop as integer = 512) as double(,) dim tim as integer = math.floor((y.length - n_ftt) / hop) + 1 dim new_buff(tim - 1, n_ftt - 1) as double dim copypos as integer = 0 for i = 0 to tim - 1 for k = 0 to n_ftt - 1 new_buff(i, k) = y(copypos + k) next copypos = copypos + hop next 'for k = 0 to tim - 1 ' debug.print("//////////////////////////////////////") ' debug.print("///////////////fram2///////////////////////" & k) ' for i = 0 to n_ftt - 1 ' debug.print(new_buff(k, i) & " ") ' next 'next k return new_buff end function ' public function frame(y as double(), optional n_ftt as integer = 2048, optional hop as integer = 512) as double() dim tim as integer = math.floor((y.length - n_ftt) / hop) + 1 dim new_buff(tim * n_ftt) as double dim pos as integer = 0 dim copypos as integer = 0 for i = 0 to tim - 1 array.copy(y, copypos, new_buff, pos, n_ftt) 'buffer.blockcopy(y, 0, new_buff, pos, n_ftt) copypos = copypos + hop pos = pos + n_ftt next for k = 0 to tim - 1 'debug.print("//////////////////////////////////////") 'debug.print("//////////////////////////////////////") for i = 0 to n_ftt - 1 debug.write(new_buff(k * n_ftt + i) & " ") next next k return new_buff end function public function melfilter() as double() dim filter_points(128 + 1) as integer '40个滤波器,需要41点 const samplerate as integer = 16000 '采样频率 16000 const filternum as integer = 128 '滤波器数量 取40个 const framesize as integer = 512 '帧长512 dim fremax as double = samplerate / 2 '实际最大频率 dim fremin as double = 0 '实际最小频率 dim melfremax as double = hz_to_mel(fremax) '将实际频率转换成梅尔频率 dim melfremin as double = 1125 * math.log(1 + fremin / 700) dim k as double = (melfremax - melfremin) / (filternum + 1) dim m as double() = new double(filternum + 1) {} dim h as double() = new double(filternum + 1) {} for i as integer = 0 to filternum + 1 m(i) = melfremin + k * i 'h(i) = 700 * (math.exp(m(i) / 1125) - 1) '将梅尔频率转换成实际频率 filter_points(i) = mel_to_hz(m(i)) 'debug.print(m(i)) next dim hzs as double() = mel_to_hz2(m) 'for i = 0 to filternum + 1 ' ' debug.print(hzs(i)) 'next return hzs end function public function hz_to_mel(frequencies as double, optional htk as boolean = false) as double dim mels as double if htk then mels = 1125 * math.log(1 + frequencies / 700) else dim f_min as double = 0.0 dim f_sp as double = 200.0 / 3 dim min_log_hz as double = 1000.0 ' beginning of log region (hz) dim min_log_mel as double = (min_log_hz - f_min) / f_sp ' same (mels) dim logstep as double = math.log(6.4) / 27.0 ' step size for log region mels = min_log_mel + math.log(frequencies / min_log_hz) / logstep end if return mels end function public function mel_to_hz2(mel() as double, optional htk as boolean = false) as double() dim hz(mel.length - 1) as double dim f_min as double = 0.0 dim f_sp as double = 200.0 / 3 dim freqs(mel.length - 1) as double for i = 0 to mel.length - 1 freqs(i) = f_min + f_sp * mel(i) next i dim min_log_hz as double = 1000.0 ' beginning of log region (hz) dim min_log_mel as double = (min_log_hz - f_min) / f_sp ' same (mels) dim logstep as double = math.log(6.4) / 27.0 for i = 0 to mel.length - 1 if (mel(i) > min_log_mel) then freqs(i) = min_log_hz * math.exp(logstep * (mel(i) - min_log_mel)) end if next 'hz = min_log_hz * math.exp(logstep * (mel - min_log_mel)) return freqs end function public function mel_to_hz(mel as double, optional htk as boolean = false) as double dim hz as double if htk then hz = 700 * (math.exp(mel) / 1125) - 1 else dim f_min as double = 0.0 dim f_sp as double = 200.0 / 3 dim freqs = f_min + f_sp * mel dim min_log_hz as double = 1000.0 ' beginning of log region (hz) dim min_log_mel as double = (min_log_hz - f_min) / f_sp ' same (mels) dim logstep as double = math.log(6.4) / 27.0 hz = min_log_hz * math.exp(logstep * (mel - min_log_mel)) 'hz = min_log_hz * math.exp(logstep * (mel - min_log_mel)) end if return hz end function public function fft_frequencies(sr as integer, n_fft as integer) as double() dim fft_data(n_fft / 2) as double for i = 0 to n_fft / 2 fft_data(i) = i * sr / n_fft next return fft_data end function '左右填充,优化 public function padreflect2(data() as double, num as integer) 'pad 10 ,10 dim tim(data.length - 3) as double for i = 0 to data.length - 3 tim(i) = data(data.length - 2 - i) next dim dump() as double = data.concat(tim).toarray() 'for each i in dump ' debug.write(i) end function public function padreflect(data() as double, num as integer) 'pad 10 ,10 dim tim(data.length - 3) as double for i = 0 to data.length - 3 tim(i) = data(data.length - 2 - i) next dim dump() as double = data.concat(tim).toarray() 'for each i in dump ' debug.write(i) 'next 'left_edge ' debug.print("***************************") dim left_edge(num - 1) as double _copydup(left_edge, dump, true) 'for i = 0 to num - 1 ' debug.write(left_edge(i)) 'next 'right_edge 'debug.print("***************************") dim right_edge(num + data.length) as double _copydup(right_edge, dump, false) 'for i = 0 to num - 1 ' debug.write(right_edge(i)) 'next 'debug.print("***************************") dim result as double() = left_edge.concat(right_edge).toarray() return result end function 'copy tim to data dumply public function _copydup(data() as double, tim() as double, optional left as boolean = true) dim last as integer = data.length mod tim.length dim times as integer = math.floor(data.length / tim.length) dim pos as integer if left then array.copy(tim, tim.length - last, data, 0, last) pos = last for i = 0 to times - 1 array.copy(tim, 0, data, pos, tim.length) pos = pos + tim.length next else 'right pos = 0 for i = 0 to times - 1 array.copy(tim, 0, data, pos, tim.length) pos = pos + tim.length next array.copy(tim, 0, data, pos, last) end if end function public function general_cosine(m as integer, alpha as double(), sym as boolean) as double() if not sym then m = m + 1 end if dim tim as double = (2 * pi) / (m - 1) dim x(m) as double dim w(m) as double 'debug.print("ine") for i = 0 to m - 1 x(i) = -pi + tim * i 'debug.write(x(i) & " ") next 'debug.print("******") for i = 0 to alpha.getlength(0) - 1 for k = 0 to m - 1 w(k) = w(k) + alpha(i) * math.cos(i * x(k)) 'debug.write(w(k) & " ") next next return w end function ''' <summary> ''' 汉明窗 ''' </summary> ''' <param name="m"> 窗长</param> ''' <returns></returns> public function general_hamming(m as integer) as double() dim db as double() = {0.5, 1 - 0.5} return general_cosine(m, db, false) '进行加1 ,若sys为false end function public function get_window(m as integer) as double() return general_hamming(m) end function end module
imports system.io imports system.numerics imports tensorflow 'install-package tensorflowsharp public class keyworddetect dim graph as tfgraph dim session as tfsession '加载模型 public sub new() dim model as byte() = file.readallbytes("f:\graph1.pb") '导入graphdef graph = new tfgraph() graph.import(model, "") session = new tfsession(graph) ' threading.threadpool.setmaxthreads(5, 5) end sub protected overrides sub finalize() session.closesession() end sub '将声音数据变为mfcc byte数据 public function databtomfcc(datab() as byte) as double(,) dim buff16(datab.length / 2 - 1) as int16 buffer.blockcopy(datab, 0, buff16, 0, datab.length - 1) dim result(,) as double = mfcc(buff16) return result end function '将声音数据变为mfcc public function datatomfcc(datai() as int16) as double(,) dim result(,) as double = mfcc(datai) return result end function '将mfcc变为输入数据格式 public function mfcctovect(mfcc as double(,)) as double(,,) dim data(0, 1, 129) as double dim n as integer = 0, m as integer = 0 for i = 0 to mfcc.getlength(0) - 1 for k = 0 to mfcc.getlength(1) - 1 data(0, m, n) = mfcc(i, k) n = n + 1 next if n = 130 then m = 1 n = 0 end if next return data end function dim output dim runner as tfsession.runner dim result dim rshape '关键字检测 public function detected(data(,,) as double) as double ' dim tensor as tftensor = new tftensor(data) runner = session.getrunner() runner.addinput(graph("input")(0), data).fetch(graph("out")(0)) output = runner.run() result = output(0) rshape = result.shape dim rt as double rt = result.getvalue(true)(0)(0) 'for k = 0 to rshape.getvalue(0) - 1 ' rt = result.getvalue(true)(k)(0) ' 'debug.print(rt) ' if (rt > 0.8) then ' debug.print("-----------recogxili") ' ' msgbox("recgo") ' end if 'next return rt end function 'public function runb(datab() as byte) ' dim mfccd as double(,) = databtomfcc(datab) ' dim inputx as double(,,) = mfcctovect(mfccd) ' detected(inputx) 'end function 'public function threadpoolrun(datai() as int16) ' threading.threadpool.queueuserworkitem(run(datai), datai) ' ' dim thrd1 as new threading.thread(new threading.parameterizedthreadstart(addressof run)) ' ' thrd1.start(datai) 'end function 'delegate function delgrun(datai() as int16) 'public function threadrun(datai() as int16) ' ' dim drun as new delgrun(addressof run) ' dim thrd1 as new threading.thread(new threading.parameterizedthreadstart(addressof run)) ' thrd1.start(datai) 'end function public function run(datai() as int16) as double ' debug.print("thread *****1") dim mfccd as double(,) = datatomfcc(datai) dim inputx as double(,,) = mfcctovect(mfccd) return detected(inputx) end function public function mfcc(buff16() as int16) as double(,) dim datalen as integer = buff16.length * 2 dim double_buff(datalen / 2 - 1) as double dim len as integer = datalen / 2 array.copy(buff16, double_buff, len) '****************** for i = 0 to double_buff.length - 1 double_buff(i) = double_buff(i) / 32768 ' debug.print(double_buff(i)) next '汉明窗create dim hann_window as double() = get_window(2048) 'debug.print("--------------------------") 'debug.print("hann_window**************") for each i in hann_window 'debug.print(i & " ") next 'debug.print("--------------------------") 'debug.print("*************pad reflect**************") dim y as double() = padreflect(double_buff, 1024) ' dim y as double() = double_buff 'for each i in y ' 'debug.print(i & " ") 'next 'debug.print("--------------------------") 'debug.print("***************frame************") dim frams as double(,) = frame2(y) dim tim as integer = frams.getlength(0) 'debug.print("--------------------------") 'debug.print("**********hann * data**************") dim hanndata(tim - 1, 2047) as double for n = 0 to tim - 1 for i = 0 to 2048 - 1 hanndata(n, i) = frams(n, i) * hann_window(i) ' debug.print(hanndata(i) & " ") next next n '\\\\\\\\\\\\\\\\melspecture dim specturm1(,) as complex = fft(hanndata) 'for i = 0 to specturm1.getlength(0) - 1 ' debug.print("--------------------------------------") ' debug.print("--------------------------------------") ' for k = 0 to specturm1.getlength(1) - 1 ' debug.print(specturm1(i, k).real & " " & specturm1(i, k).imaginary) ' next 'next dim s as double(,) = spectrum(specturm1) dim fftfreqs() as double = fft_frequencies(16000, 2048) 'debug.print("***************fftfreqs*****************") 'debug.print("***************fftfreqs*****************") 'debug.print("fftfreqs.shape", fftfreqs.length) 'for i = 0 to fftfreqs.length - 1 ' 'debug.write(fftfreqs(i) & " ") 'next ''''''''''''''''mel * specturm1 'debug.print("**************") 'debug.print("****滤波器创建**********") dim mel_f as double() = melfilter() 'debug.print("--------------------------") 'debug.print("hann_window**************") 'debug.print("diff") dim fdiff as double() = diff(mel_f) dim ramps_ as double(,) = ramps(mel_f, fftfreqs) dim weights(,) as double = weight(ramps_, fdiff) normal(mel_f, weights) 's*weight = melspectrogram 'weight 128,1025 's 5 ,1025 dim melspectrogram_(,) as double = melspectrogram(weights, s) dim power_to_db_ as double(,) = power_to_db(melspectrogram_) dim dct_ as double(,) = dct(20, 128) return _mfcc(dct_, power_to_db_) end function end class
以上这篇对python中librosa的mfcc步骤详解就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持。
下一篇: VBS教程:函数-Int、Fix 函数