对python中Librosa的mfcc步骤详解

程序员文章站 2023-09-04 12:11:25

1.对语音数据归一化如16000hz的数据，会将每个点/32768 2.计算窗函数：（*注意librosa中不进行预处理） 3.进行数据扩展填充，他进行的是镜像填充...

1.对语音数据归一化

如16000hz的数据，会将每个点/32768

2.计算窗函数：（*注意librosa中不进行预处理）

3.进行数据扩展填充，他进行的是镜像填充（"reflect")

如原数据为 12345 -》填充为4的，左右各填充4 即：5432123454321 即：5432-12345-4321

4.分帧

5.加窗：对每一帧进行加窗，

6.进行fft傅里叶变换

librosa中fft计算，可以使用.net中的system.numerics

mathnet.numerics.integraltransforms.fourier.forward(fft_frame, fourieroptions.matlab) 计算，结果相同

7.mel计算（每一帧取20个特征点）

imports system.numerics
imports mathnet.numerics
imports mathnet.numerics.integraltransforms
 
module mfcc_module
 
  public class librosa
 
  end class
 
  dim pi as double = 3.1415926535897931
 
 
 
 
 
  public function spectrum(fft_data(,) as complex) as double(,)
    dim new_data(fft_data.getlength(0) - 1, fft_data.getlength(1) - 1) as double
 
 
    for n = 0 to fft_data.getlength(0) - 1
      ' debug.print("////////////////////////spectrum//////////////////")
      ' debug.print("////////////////////////spectrum//////////////////")
      for i = 0 to fft_data.getlength(1) - 1
        new_data(n, i) = fft_data(n, i).magnitudesquared
        ' debug.write(new_data(n, i) & "  ")
      next
    next
 
    return new_data
 
  end function
 
 
  public function fft(data as double(,)) as complex(,)
 
    dim result(data.getlength(0) - 1, 1024) as complex
    '2049 加了一个 数组类型 0 开始
    dim fft_frame as complex() = new complex(data.getlength(1) - 1) {}
 
    for n = 0 to data.getlength(0) - 1
      for i as integer = 0 to data.getlength(1) - 1
        fft_frame(i) = data(n, i)
      next
      mathnet.numerics.integraltransforms.fourier.forward(fft_frame, fourieroptions.matlab)
 
      for k = 0 to 1024
        result(n, k) = fft_frame(k)
      next
 
      'debug.print("fft **************")
      'for each mem in fft_frame
      '  debug.print(mem.tostring & "  ")
      'next
 
 
    next n
 
 
 
    return result
 
 
  end function
 
  public function _mfcc(dct_ as double(,）, power_to_db_ as double(,)) as double(,)
    'dct 20,128
    'power_to_db 5,128
    'result = 20,5
    dim result(dct_.getlength(0) - 1, power_to_db_.getlength(1) - 1) as double
    dim r1, r2 as double
    for n = 0 to dct_.getlength(0) - 1 '20
      for i = 0 to power_to_db_.getlength(1) - 1 '5
        r2 = 0
        for k = 0 to dct_.getlength(1) - 1 '128
          r1 = dct_(n, k) * power_to_db_(k, i)
          r2 = r2 + r1
 
        next
        result(n, i) = r2
      next
    next
 
    return result
  end function
 
  public function dct(n_filters as integer, n_input as integer) as double(,)
 
    dim t1 as double = 2 * n_input
 
    dim samples(n_input - 1) as double
    dim basis(n_filters - 1, n_input - 1） as double
 
    dim n as integer = 1
    for i = 0 to n_input - 1
      samples(i) = n * pi / (2 * n_input)
      n = n + 2
    next i
 
    for i = 0 to n_input - 1
      basis(0, i) = 1 / math.sqrt(n_input)
    next
    for n = 1 to n_filters - 1
      for i = 0 to n_input - 1
        basis(n, i) = math.cos(n * samples(i)) * math.sqrt(2 / n_input)
 
      next
    next
 
    return basis
  end function
 
 
  '1e-10 = 0.0000000001
  public function power_to_db(s as double(,), optional ref as double = 1, optional admin as double = 0.0000000001, optional top_db as double = 80) as double(,)
 
    dim result(s.getlength(0) - 1, s.getlength(1) - 1） as double
 
    dim log_spec as double
 
 
    for n = 0 to s.getlength(0) - 1
      for i = 0 to s.getlength(1) - 1
 
        log_spec = 10 * math.log10(math.max(admin, s(n, i)))
        result(n, i) = log_spec - 10 * math.log10(math.max(admin, ref))
      next
    next
 
 
 
    'if top_db <> 0 then
    '  for n = 0 to s.getlength(0) - 1
    '    for i = 0 to s.getlength(1) - 1
    '      'result(n, i) = math.max(result(n, i), result(n, i) - top_db)
    '    next
    '  next
 
    'end if
 
    return result
 
 
 
  end function
 
 
 
  public function melspectrogram(mel_basis(,) as double, s(,) as double) as double(,)
    'mel_basis 128,1025
    's 5 ,1025 -> 1025,5
    ' result 128,5
    dim result(mel_basis.getlength(0) - 3, s.getlength(0) - 1) as double
    dim r1, r2 as double
 
    for n = 0 to mel_basis.getlength(0) - 3
 
      for i = 0 to s.getlength(0) - 1
        for k = 0 to mel_basis.getlength(1) - 1
 
 
          r1 = mel_basis(n, k) * s(i, k)
          r2 = r2 + r1
        next
        result(n, i) = r2
        r2 = 0
      next
 
    next
    return result
 
  end function
 
  public function normal(mel_f as double(), weights(,) as double) as double(,)
    dim enorm(mel_f.length - 2) as double
 
    ' debug.print("*************normal//////////////")
    ' debug.print("*************normal//////////////")
    for i = 0 to mel_f.length - 3
      enorm(i) = 2 / (mel_f(2 + i) - mel_f(i))
    next
 
 
    for i = 0 to weights.getlength(1) - 1
      for n = 0 to weights.getlength(0) - 2
        weights(n, i) = weights(n, i) * enorm(n)
      next
    next
    return weights
  end function
 
 
  public function weight(a as double(,), fdiff as double()) as double(,)
    dim lower, upper as double
 
    dim data(a.getlength(0) - 1, a.getlength(1) - 1) as double
 
    for n = 0 to a.getlength(0) - 3
      for i = 0 to a.getlength(1) - 1
        lower = -(a(n, i) / fdiff(n))
        upper = a(n + 2, i) / fdiff(n + 1)
        data(n, i) = math.max(0, math.min(lower, upper))
      next
    next
    return data
  end function
 
  public function ramps(a as double(), b as double()) as double(,)
    dim data(a.length - 1, b.length - 1) as double
 
    ' debug.print("ramps*********************")
    for n = 0 to a.length - 1
      'debug.print("******")
      'debug.print("------")
      for i = 0 to b.length - 1
        data(n, i) = a(n) - b(i)
        'debug.write(data(n, i) & "  ")
      next
    next
    return data
 
  end function
  public function diff(arr as double()) as double()
    dim data(arr.length - 2) as double
    for i = 1 to arr.length - 1
      data(i - 1) = arr(i) - arr(i - 1)
      'debug.print(data(i - 1))
    next
 
    return data
  end function
 
  '分帧 算法2
  public function frame2(y as double(), optional n_ftt as integer = 2048, optional hop as integer = 512) as double(,)
    dim tim as integer = math.floor((y.length - n_ftt) / hop) + 1
    dim new_buff(tim - 1, n_ftt - 1) as double
    dim copypos as integer = 0
    for i = 0 to tim - 1
      for k = 0 to n_ftt - 1
        new_buff(i, k) = y(copypos + k)
      next
      copypos = copypos + hop
    next
 
    'for k = 0 to tim - 1
    '  debug.print("//////////////////////////////////////")
    '  debug.print("///////////////fram2///////////////////////" & k)
    '  for i = 0 to n_ftt - 1
    '    debug.print(new_buff(k, i) & " ")
    '  next
    'next k
 
    return new_buff
 
 
  end function
 
  '
  public function frame(y as double(), optional n_ftt as integer = 2048, optional hop as integer = 512) as double()
    dim tim as integer = math.floor((y.length - n_ftt) / hop) + 1
    dim new_buff(tim * n_ftt) as double
    dim pos as integer = 0
    dim copypos as integer = 0
    for i = 0 to tim - 1
      array.copy(y, copypos, new_buff, pos, n_ftt)
      'buffer.blockcopy(y, 0, new_buff, pos, n_ftt)
      copypos = copypos + hop
      pos = pos + n_ftt
    next
 
    for k = 0 to tim - 1
      'debug.print("//////////////////////////////////////")
      'debug.print("//////////////////////////////////////")
      for i = 0 to n_ftt - 1
        debug.write(new_buff(k * n_ftt + i) & " ")
      next
    next k
 
    return new_buff
 
  end function
 
 
  public function melfilter() as double()
    dim filter_points（128 + 1） as integer '40个滤波器，需要41点
    const samplerate as integer = 16000 '采样频率 16000
    const filternum as integer = 128 '滤波器数量 取40个
    const framesize as integer = 512 '帧长512
 
    dim fremax as double = samplerate / 2  '实际最大频率 
    dim fremin as double = 0  '实际最小频率 
    dim melfremax as double = hz_to_mel(fremax)   '将实际频率转换成梅尔频率 
    dim melfremin as double = 1125 * math.log(1 + fremin / 700)
 
    dim k as double = (melfremax - melfremin) / (filternum + 1)
 
    dim m as double() = new double(filternum + 1) {}
    dim h as double() = new double(filternum + 1) {}
 
    for i as integer = 0 to filternum + 1
      m(i) = melfremin + k * i
      'h(i) = 700 * (math.exp(m(i) / 1125) - 1)
      '将梅尔频率转换成实际频率 
      filter_points(i) = mel_to_hz(m(i))
 
      'debug.print(m(i))
    next
 
    dim hzs as double() = mel_to_hz2(m)
    'for i = 0 to filternum + 1
    '  ' debug.print(hzs(i))
    'next
    return hzs
 
 
  end function
 
  public function hz_to_mel(frequencies as double, optional htk as boolean = false) as double
 
    dim mels as double
 
    if htk then
      mels = 1125 * math.log(1 + frequencies / 700)
    else
      dim f_min as double = 0.0
      dim f_sp as double = 200.0 / 3
      dim min_log_hz as double = 1000.0             ' beginning of log region (hz)
      dim min_log_mel as double = (min_log_hz - f_min) / f_sp  ' same (mels)
      dim logstep as double = math.log(6.4) / 27.0        ' step size for log region
      mels = min_log_mel + math.log(frequencies / min_log_hz) / logstep
    end if
    return mels
  end function
 
  public function mel_to_hz2(mel() as double, optional htk as boolean = false) as double()
    dim hz(mel.length - 1) as double
 
    dim f_min as double = 0.0
    dim f_sp as double = 200.0 / 3
    dim freqs(mel.length - 1) as double
 
    for i = 0 to mel.length - 1
      freqs(i) = f_min + f_sp * mel(i)
    next i
 
    dim min_log_hz as double = 1000.0             ' beginning of log region (hz)
    dim min_log_mel as double = (min_log_hz - f_min) / f_sp  ' same (mels)
    dim logstep as double = math.log(6.4) / 27.0
 
    for i = 0 to mel.length - 1
      if (mel(i) > min_log_mel) then
        freqs(i) = min_log_hz * math.exp(logstep * (mel(i) - min_log_mel))
      end if
 
    next
    'hz = min_log_hz * math.exp(logstep * (mel - min_log_mel))
 
 
    return freqs
  end function
 
  public function mel_to_hz(mel as double, optional htk as boolean = false) as double
    dim hz as double
    if htk then
      hz = 700 * (math.exp(mel) / 1125) - 1
    else
      dim f_min as double = 0.0
      dim f_sp as double = 200.0 / 3
      dim freqs = f_min + f_sp * mel
 
      dim min_log_hz as double = 1000.0             ' beginning of log region (hz)
      dim min_log_mel as double = (min_log_hz - f_min) / f_sp  ' same (mels)
      dim logstep as double = math.log(6.4) / 27.0
      hz = min_log_hz * math.exp(logstep * (mel - min_log_mel))
      'hz = min_log_hz * math.exp(logstep * (mel - min_log_mel))
 
    end if
    return hz
  end function
 
 
  public function fft_frequencies(sr as integer, n_fft as integer) as double()
    dim fft_data(n_fft / 2) as double
    for i = 0 to n_fft / 2
      fft_data(i) = i * sr / n_fft
    next
    return fft_data
  end function
 
 
 
  '左右填充，优化
  public function padreflect2(data() as double, num as integer)
    'pad 10 ,10 
    dim tim(data.length - 3) as double
    for i = 0 to data.length - 3
      tim(i) = data(data.length - 2 - i)
    next
 
    dim dump() as double = data.concat(tim).toarray()
 
    'for each i in dump
    '  debug.write(i)
  end function
 
  public function padreflect(data() as double, num as integer)
 
    'pad 10 ,10 
    dim tim(data.length - 3) as double
    for i = 0 to data.length - 3
      tim(i) = data(data.length - 2 - i)
    next
 
    dim dump() as double = data.concat(tim).toarray()
 
    'for each i in dump
    '  debug.write(i)
    'next
 
    'left_edge
 
    ' debug.print("***************************")
    dim left_edge(num - 1) as double
    _copydup(left_edge, dump, true)
    'for i = 0 to num - 1
    '  debug.write(left_edge(i))
    'next
 
    'right_edge
    'debug.print("***************************")
    dim right_edge(num + data.length) as double
    _copydup(right_edge, dump, false)
    'for i = 0 to num - 1
    '  debug.write(right_edge(i))
    'next
    'debug.print("***************************")
    dim result as double() = left_edge.concat(right_edge).toarray()
    return result
 
  end function
 
  'copy tim to data dumply
  public function _copydup(data() as double, tim() as double, optional left as boolean = true)
    dim last as integer = data.length mod tim.length
    dim times as integer = math.floor(data.length / tim.length)
    dim pos as integer
    if left then
      array.copy(tim, tim.length - last, data, 0, last)
      pos = last
      for i = 0 to times - 1
        array.copy(tim, 0, data, pos, tim.length)
        pos = pos + tim.length
      next
 
    else
 
      'right
      pos = 0
      for i = 0 to times - 1
        array.copy(tim, 0, data, pos, tim.length)
        pos = pos + tim.length
      next
 
      array.copy(tim, 0, data, pos, last)
 
    end if
 
 
  end function
 
 
 
  public function general_cosine(m as integer, alpha as double(), sym as boolean) as double()
 
    if not sym then
      m = m + 1
    end if
 
 
    dim tim as double = (2 * pi) / (m - 1)
    dim x(m) as double
    dim w(m) as double
 
    'debug.print("ine")
    for i = 0 to m - 1
      x(i) = -pi + tim * i
      'debug.write(x(i) & "  ")
    next
    'debug.print("******")
    for i = 0 to alpha.getlength(0) - 1
      for k = 0 to m - 1
        w(k) = w(k) + alpha(i) * math.cos(i * x(k))
        'debug.write(w(k) & "  ")
      next
 
    next
 
    return w
 
  end function
 
  ''' <summary>
  ''' 汉明窗
  ''' </summary>
  ''' <param name="m"> 窗长</param>
  ''' <returns></returns>
  public function general_hamming(m as integer) as double()
    dim db as double() = {0.5, 1 - 0.5}
    return general_cosine(m, db, false)  '进行加1 ，若sys为false
  end function
 
  public function get_window(m as integer) as double()
    return general_hamming(m)
 
  end function
 
 
 
end module

 
imports system.io
imports system.numerics
imports tensorflow
 
'install-package tensorflowsharp
 
public class keyworddetect
 
  dim graph as tfgraph
  dim session as tfsession
 
  '加载模型
  public sub new()
    dim model as byte() = file.readallbytes("f:\graph1.pb")
    '导入graphdef
 
    graph = new tfgraph()
    graph.import(model, "")
 
    session = new tfsession(graph)
 
    ' threading.threadpool.setmaxthreads(5, 5)
  end sub
 
  protected overrides sub finalize()
 
    session.closesession()
 
 
  end sub
 
  '将声音数据变为mfcc byte数据
  public function databtomfcc(datab() as byte) as double(,)
    dim buff16(datab.length / 2 - 1) as int16
    buffer.blockcopy(datab, 0, buff16, 0, datab.length - 1)
 
    dim result(,) as double = mfcc(buff16)
    return result
  end function
 
 
  '将声音数据变为mfcc
  public function datatomfcc(datai() as int16) as double(,)
 
    dim result(,) as double = mfcc(datai)
    return result
  end function
 
 
  '将mfcc变为输入数据格式
  public function mfcctovect(mfcc as double(,)) as double(,,)
    dim data(0, 1, 129) as double
 
    dim n as integer = 0, m as integer = 0
    for i = 0 to mfcc.getlength(0) - 1
      for k = 0 to mfcc.getlength(1) - 1
        data(0, m, n) = mfcc(i, k)
        n = n + 1
      next
      if n = 130 then
 
        m = 1
        n = 0
      end if
    next
    return data
  end function
 
  dim output
  dim runner as tfsession.runner
  dim result
  dim rshape
 
  '关键字检测
  public function detected(data(,,) as double) as double
 
    ' dim tensor as tftensor = new tftensor(data)
    runner = session.getrunner()
 
    runner.addinput(graph("input")(0), data).fetch(graph("out")(0))
 
    output = runner.run()
 
 
    result = output(0)
    rshape = result.shape
    dim rt as double
    rt = result.getvalue(true)(0)(0)
    'for k = 0 to rshape.getvalue(0) - 1
    '  rt = result.getvalue(true)(k)(0)
    '  'debug.print(rt)
    '  if (rt > 0.8) then
    '    debug.print("-----------recogxili")
    '    ' msgbox("recgo")
    '  end if
    'next
 
    return rt
 
  end function
 
 
 
  'public function runb(datab() as byte)
  '  dim mfccd as double(,) = databtomfcc(datab)
  '  dim inputx as double(,,) = mfcctovect(mfccd)
  '  detected(inputx)
  'end function
 
 
  'public function threadpoolrun(datai() as int16)
 
  '  threading.threadpool.queueuserworkitem(run(datai), datai)
  '  '  dim thrd1 as new threading.thread(new threading.parameterizedthreadstart(addressof run))
  '  ' thrd1.start(datai)
  'end function
  'delegate function delgrun(datai() as int16)
  'public function threadrun(datai() as int16)
  '  ' dim drun as new delgrun(addressof run)
 
  '  dim thrd1 as new threading.thread(new threading.parameterizedthreadstart(addressof run))
  '  thrd1.start(datai)
 
  'end function
 
 
  public function run(datai() as int16) as double
    ' debug.print("thread *****1")
    dim mfccd as double(,) = datatomfcc(datai)
    dim inputx as double(,,) = mfcctovect(mfccd)
    return detected(inputx)
  end function
 
  public function mfcc(buff16() as int16) as double(,)
    dim datalen as integer = buff16.length * 2
    dim double_buff(datalen / 2 - 1) as double
    dim len as integer = datalen / 2
    array.copy(buff16, double_buff, len)
 
    '******************
    for i = 0 to double_buff.length - 1
      double_buff(i) = double_buff(i) / 32768
      ' debug.print(double_buff(i))
    next
 
 
    '汉明窗create
    dim hann_window as double() = get_window(2048)
    'debug.print("--------------------------")
    'debug.print("hann_window**************")
    for each i in hann_window
      'debug.print(i & "  ")
    next
 
    'debug.print("--------------------------")
    'debug.print("*************pad reflect**************")
    dim y as double() = padreflect(double_buff, 1024)
    ' dim y as double() = double_buff
    'for each i in y
    '  'debug.print(i & "  ")
    'next
 
    'debug.print("--------------------------")
    'debug.print("***************frame************")
    dim frams as double(,) = frame2(y)
 
    dim tim as integer = frams.getlength(0)
 
    'debug.print("--------------------------")
    'debug.print("**********hann * data**************")
    dim hanndata(tim - 1, 2047) as double
 
 
 
    for n = 0 to tim - 1
      for i = 0 to 2048 - 1
        hanndata(n, i) = frams(n, i) * hann_window(i)
        ' debug.print(hanndata(i) & "  ")
      next
 
    next n
 
 
    '\\\\\\\\\\\\\\\\melspecture 
    dim specturm1(,) as complex = fft(hanndata)
 
 
    'for i = 0 to specturm1.getlength(0) - 1
    '  debug.print("--------------------------------------")
    '  debug.print("--------------------------------------")
    '  for k = 0 to specturm1.getlength(1) - 1
    '    debug.print(specturm1（i, k).real & "  " & specturm1(i, k).imaginary)
    '  next
    'next
 
    dim s as double(,) = spectrum(specturm1)
 
    dim fftfreqs() as double = fft_frequencies(16000, 2048)
    'debug.print("***************fftfreqs*****************")
    'debug.print("***************fftfreqs*****************")
    'debug.print("fftfreqs.shape", fftfreqs.length)
    'for i = 0 to fftfreqs.length - 1
    '  'debug.write(fftfreqs(i) & "  ")
    'next
 
    ''''''''''''''''mel * specturm1
    'debug.print("**************")
    'debug.print("****滤波器创建**********")
    dim mel_f as double() = melfilter()
 
 
    'debug.print("--------------------------")
    'debug.print("hann_window**************")
    'debug.print("diff")
    dim fdiff as double() = diff(mel_f)
 
    dim ramps_ as double(,) = ramps(mel_f, fftfreqs)
 
    dim weights(,) as double = weight(ramps_, fdiff)
 
    normal(mel_f, weights)
 
    's*weight = melspectrogram
    'weight 128,1025
    's 5 ,1025
    dim melspectrogram_(,) as double = melspectrogram(weights, s)
    dim power_to_db_ as double（,) = power_to_db(melspectrogram_)
 
    dim dct_ as double(,) = dct(20, 128)
 
    return _mfcc(dct_, power_to_db_)
  end function
 
end class

以上这篇对python中librosa的mfcc步骤详解就是小编分享给大家的全部内容了，希望能给大家一个参考，也希望大家多多支持。

上一篇： PHP数组排序函数合集以及它们之间的联系分析

下一篇： VBS教程：函数-Int、Fix 函数

对python中Librosa的mfcc步骤详解

对python多线程中互斥锁Threading.Lock的简单应用详解

详解Python2.x中对Unicode编码的使用

Python编程中归并排序算法的实现步骤详解

对python中list的拷贝与numpy的array的拷贝详解

对python中的乘法dot和对应分量相乘multiply详解

对Python中gensim库word2vec的使用详解

对Python中TKinter模块中的Label组件实例详解

对Python中创建进程的两种方式以及进程池详解

对Python中实现两个数的值交换的集中方法详解

对python模块中多个类的用法详解