欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

对python中Librosa的mfcc步骤详解

程序员文章站 2023-09-04 12:11:25
1.对语音数据归一化 如16000hz的数据,会将每个点/32768 2.计算窗函数:(*注意librosa中不进行预处理) 3.进行数据扩展填充,他进行的是镜像填充...

1.对语音数据归一化

如16000hz的数据,会将每个点/32768

2.计算窗函数:(*注意librosa中不进行预处理)

3.进行数据扩展填充,他进行的是镜像填充("reflect")

如原数据为 12345 -》 填充为4的,左右各填充4 即:5432123454321 即:5432-12345-4321

4.分帧

5.加窗:对每一帧进行加窗,

6.进行fft傅里叶变换

librosa中fft计算,可以使用.net中的system.numerics

mathnet.numerics.integraltransforms.fourier.forward(fft_frame, fourieroptions.matlab) 计算,结果相同

7.mel计算(每一帧取20个特征点)

imports system.numerics
imports mathnet.numerics
imports mathnet.numerics.integraltransforms
 
module mfcc_module
 
  public class librosa
 
  end class
 
  dim pi as double = 3.1415926535897931
 
 
 
 
 
  public function spectrum(fft_data(,) as complex) as double(,)
    dim new_data(fft_data.getlength(0) - 1, fft_data.getlength(1) - 1) as double
 
 
    for n = 0 to fft_data.getlength(0) - 1
      ' debug.print("////////////////////////spectrum//////////////////")
      ' debug.print("////////////////////////spectrum//////////////////")
      for i = 0 to fft_data.getlength(1) - 1
        new_data(n, i) = fft_data(n, i).magnitudesquared
        ' debug.write(new_data(n, i) & "  ")
      next
    next
 
    return new_data
 
  end function
 
 
  public function fft(data as double(,)) as complex(,)
 
    dim result(data.getlength(0) - 1, 1024) as complex
    '2049 加了一个 数组类型 0 开始
    dim fft_frame as complex() = new complex(data.getlength(1) - 1) {}
 
    for n = 0 to data.getlength(0) - 1
      for i as integer = 0 to data.getlength(1) - 1
        fft_frame(i) = data(n, i)
      next
      mathnet.numerics.integraltransforms.fourier.forward(fft_frame, fourieroptions.matlab)
 
      for k = 0 to 1024
        result(n, k) = fft_frame(k)
      next
 
      'debug.print("fft **************")
      'for each mem in fft_frame
      '  debug.print(mem.tostring & "  ")
      'next
 
 
    next n
 
 
 
    return result
 
 
  end function
 
  public function _mfcc(dct_ as double(,), power_to_db_ as double(,)) as double(,)
    'dct 20,128
    'power_to_db 5,128
    'result = 20,5
    dim result(dct_.getlength(0) - 1, power_to_db_.getlength(1) - 1) as double
    dim r1, r2 as double
    for n = 0 to dct_.getlength(0) - 1 '20
      for i = 0 to power_to_db_.getlength(1) - 1 '5
        r2 = 0
        for k = 0 to dct_.getlength(1) - 1 '128
          r1 = dct_(n, k) * power_to_db_(k, i)
          r2 = r2 + r1
 
        next
        result(n, i) = r2
      next
    next
 
    return result
  end function
 
  public function dct(n_filters as integer, n_input as integer) as double(,)
 
    dim t1 as double = 2 * n_input
 
    dim samples(n_input - 1) as double
    dim basis(n_filters - 1, n_input - 1) as double
 
    dim n as integer = 1
    for i = 0 to n_input - 1
      samples(i) = n * pi / (2 * n_input)
      n = n + 2
    next i
 
    for i = 0 to n_input - 1
      basis(0, i) = 1 / math.sqrt(n_input)
    next
    for n = 1 to n_filters - 1
      for i = 0 to n_input - 1
        basis(n, i) = math.cos(n * samples(i)) * math.sqrt(2 / n_input)
 
      next
    next
 
    return basis
  end function
 
 
  '1e-10 = 0.0000000001
  public function power_to_db(s as double(,), optional ref as double = 1, optional admin as double = 0.0000000001, optional top_db as double = 80) as double(,)
 
    dim result(s.getlength(0) - 1, s.getlength(1) - 1) as double
 
    dim log_spec as double
 
 
    for n = 0 to s.getlength(0) - 1
      for i = 0 to s.getlength(1) - 1
 
        log_spec = 10 * math.log10(math.max(admin, s(n, i)))
        result(n, i) = log_spec - 10 * math.log10(math.max(admin, ref))
      next
    next
 
 
 
    'if top_db <> 0 then
    '  for n = 0 to s.getlength(0) - 1
    '    for i = 0 to s.getlength(1) - 1
    '      'result(n, i) = math.max(result(n, i), result(n, i) - top_db)
    '    next
    '  next
 
    'end if
 
    return result
 
 
 
  end function
 
 
 
  public function melspectrogram(mel_basis(,) as double, s(,) as double) as double(,)
    'mel_basis 128,1025
    's 5 ,1025 -> 1025,5
    ' result 128,5
    dim result(mel_basis.getlength(0) - 3, s.getlength(0) - 1) as double
    dim r1, r2 as double
 
    for n = 0 to mel_basis.getlength(0) - 3
 
      for i = 0 to s.getlength(0) - 1
        for k = 0 to mel_basis.getlength(1) - 1
 
 
          r1 = mel_basis(n, k) * s(i, k)
          r2 = r2 + r1
        next
        result(n, i) = r2
        r2 = 0
      next
 
    next
    return result
 
  end function
 
  public function normal(mel_f as double(), weights(,) as double) as double(,)
    dim enorm(mel_f.length - 2) as double
 
    ' debug.print("*************normal//////////////")
    ' debug.print("*************normal//////////////")
    for i = 0 to mel_f.length - 3
      enorm(i) = 2 / (mel_f(2 + i) - mel_f(i))
    next
 
 
    for i = 0 to weights.getlength(1) - 1
      for n = 0 to weights.getlength(0) - 2
        weights(n, i) = weights(n, i) * enorm(n)
      next
    next
    return weights
  end function
 
 
  public function weight(a as double(,), fdiff as double()) as double(,)
    dim lower, upper as double
 
    dim data(a.getlength(0) - 1, a.getlength(1) - 1) as double
 
    for n = 0 to a.getlength(0) - 3
      for i = 0 to a.getlength(1) - 1
        lower = -(a(n, i) / fdiff(n))
        upper = a(n + 2, i) / fdiff(n + 1)
        data(n, i) = math.max(0, math.min(lower, upper))
      next
    next
    return data
  end function
 
  public function ramps(a as double(), b as double()) as double(,)
    dim data(a.length - 1, b.length - 1) as double
 
    ' debug.print("ramps*********************")
    for n = 0 to a.length - 1
      'debug.print("******")
      'debug.print("------")
      for i = 0 to b.length - 1
        data(n, i) = a(n) - b(i)
        'debug.write(data(n, i) & "  ")
      next
    next
    return data
 
  end function
  public function diff(arr as double()) as double()
    dim data(arr.length - 2) as double
    for i = 1 to arr.length - 1
      data(i - 1) = arr(i) - arr(i - 1)
      'debug.print(data(i - 1))
    next
 
    return data
  end function
 
  '分帧 算法2
  public function frame2(y as double(), optional n_ftt as integer = 2048, optional hop as integer = 512) as double(,)
    dim tim as integer = math.floor((y.length - n_ftt) / hop) + 1
    dim new_buff(tim - 1, n_ftt - 1) as double
    dim copypos as integer = 0
    for i = 0 to tim - 1
      for k = 0 to n_ftt - 1
        new_buff(i, k) = y(copypos + k)
      next
      copypos = copypos + hop
    next
 
    'for k = 0 to tim - 1
    '  debug.print("//////////////////////////////////////")
    '  debug.print("///////////////fram2///////////////////////" & k)
    '  for i = 0 to n_ftt - 1
    '    debug.print(new_buff(k, i) & " ")
    '  next
    'next k
 
    return new_buff
 
 
  end function
 
  '
  public function frame(y as double(), optional n_ftt as integer = 2048, optional hop as integer = 512) as double()
    dim tim as integer = math.floor((y.length - n_ftt) / hop) + 1
    dim new_buff(tim * n_ftt) as double
    dim pos as integer = 0
    dim copypos as integer = 0
    for i = 0 to tim - 1
      array.copy(y, copypos, new_buff, pos, n_ftt)
      'buffer.blockcopy(y, 0, new_buff, pos, n_ftt)
      copypos = copypos + hop
      pos = pos + n_ftt
    next
 
    for k = 0 to tim - 1
      'debug.print("//////////////////////////////////////")
      'debug.print("//////////////////////////////////////")
      for i = 0 to n_ftt - 1
        debug.write(new_buff(k * n_ftt + i) & " ")
      next
    next k
 
    return new_buff
 
  end function
 
 
  public function melfilter() as double()
    dim filter_points(128 + 1) as integer '40个滤波器,需要41点
    const samplerate as integer = 16000 '采样频率 16000
    const filternum as integer = 128 '滤波器数量 取40个
    const framesize as integer = 512 '帧长512
 
    dim fremax as double = samplerate / 2  '实际最大频率 
    dim fremin as double = 0  '实际最小频率 
    dim melfremax as double = hz_to_mel(fremax)   '将实际频率转换成梅尔频率 
    dim melfremin as double = 1125 * math.log(1 + fremin / 700)
 
    dim k as double = (melfremax - melfremin) / (filternum + 1)
 
    dim m as double() = new double(filternum + 1) {}
    dim h as double() = new double(filternum + 1) {}
 
    for i as integer = 0 to filternum + 1
      m(i) = melfremin + k * i
      'h(i) = 700 * (math.exp(m(i) / 1125) - 1)
      '将梅尔频率转换成实际频率 
      filter_points(i) = mel_to_hz(m(i))
 
      'debug.print(m(i))
    next
 
    dim hzs as double() = mel_to_hz2(m)
    'for i = 0 to filternum + 1
    '  ' debug.print(hzs(i))
    'next
    return hzs
 
 
  end function
 
  public function hz_to_mel(frequencies as double, optional htk as boolean = false) as double
 
    dim mels as double
 
    if htk then
      mels = 1125 * math.log(1 + frequencies / 700)
    else
      dim f_min as double = 0.0
      dim f_sp as double = 200.0 / 3
      dim min_log_hz as double = 1000.0             ' beginning of log region (hz)
      dim min_log_mel as double = (min_log_hz - f_min) / f_sp  ' same (mels)
      dim logstep as double = math.log(6.4) / 27.0        ' step size for log region
      mels = min_log_mel + math.log(frequencies / min_log_hz) / logstep
    end if
    return mels
  end function
 
  public function mel_to_hz2(mel() as double, optional htk as boolean = false) as double()
    dim hz(mel.length - 1) as double
 
    dim f_min as double = 0.0
    dim f_sp as double = 200.0 / 3
    dim freqs(mel.length - 1) as double
 
    for i = 0 to mel.length - 1
      freqs(i) = f_min + f_sp * mel(i)
    next i
 
    dim min_log_hz as double = 1000.0             ' beginning of log region (hz)
    dim min_log_mel as double = (min_log_hz - f_min) / f_sp  ' same (mels)
    dim logstep as double = math.log(6.4) / 27.0
 
    for i = 0 to mel.length - 1
      if (mel(i) > min_log_mel) then
        freqs(i) = min_log_hz * math.exp(logstep * (mel(i) - min_log_mel))
      end if
 
    next
    'hz = min_log_hz * math.exp(logstep * (mel - min_log_mel))
 
 
    return freqs
  end function
 
  public function mel_to_hz(mel as double, optional htk as boolean = false) as double
    dim hz as double
    if htk then
      hz = 700 * (math.exp(mel) / 1125) - 1
    else
      dim f_min as double = 0.0
      dim f_sp as double = 200.0 / 3
      dim freqs = f_min + f_sp * mel
 
      dim min_log_hz as double = 1000.0             ' beginning of log region (hz)
      dim min_log_mel as double = (min_log_hz - f_min) / f_sp  ' same (mels)
      dim logstep as double = math.log(6.4) / 27.0
      hz = min_log_hz * math.exp(logstep * (mel - min_log_mel))
      'hz = min_log_hz * math.exp(logstep * (mel - min_log_mel))
 
    end if
    return hz
  end function
 
 
  public function fft_frequencies(sr as integer, n_fft as integer) as double()
    dim fft_data(n_fft / 2) as double
    for i = 0 to n_fft / 2
      fft_data(i) = i * sr / n_fft
    next
    return fft_data
  end function
 
 
 
  '左右填充,优化
  public function padreflect2(data() as double, num as integer)
    'pad 10 ,10 
    dim tim(data.length - 3) as double
    for i = 0 to data.length - 3
      tim(i) = data(data.length - 2 - i)
    next
 
    dim dump() as double = data.concat(tim).toarray()
 
    'for each i in dump
    '  debug.write(i)
  end function
 
  public function padreflect(data() as double, num as integer)
 
    'pad 10 ,10 
    dim tim(data.length - 3) as double
    for i = 0 to data.length - 3
      tim(i) = data(data.length - 2 - i)
    next
 
    dim dump() as double = data.concat(tim).toarray()
 
    'for each i in dump
    '  debug.write(i)
    'next
 
    'left_edge
 
    ' debug.print("***************************")
    dim left_edge(num - 1) as double
    _copydup(left_edge, dump, true)
    'for i = 0 to num - 1
    '  debug.write(left_edge(i))
    'next
 
    'right_edge
    'debug.print("***************************")
    dim right_edge(num + data.length) as double
    _copydup(right_edge, dump, false)
    'for i = 0 to num - 1
    '  debug.write(right_edge(i))
    'next
    'debug.print("***************************")
    dim result as double() = left_edge.concat(right_edge).toarray()
    return result
 
  end function
 
  'copy tim to data dumply
  public function _copydup(data() as double, tim() as double, optional left as boolean = true)
    dim last as integer = data.length mod tim.length
    dim times as integer = math.floor(data.length / tim.length)
    dim pos as integer
    if left then
      array.copy(tim, tim.length - last, data, 0, last)
      pos = last
      for i = 0 to times - 1
        array.copy(tim, 0, data, pos, tim.length)
        pos = pos + tim.length
      next
 
    else
 
      'right
      pos = 0
      for i = 0 to times - 1
        array.copy(tim, 0, data, pos, tim.length)
        pos = pos + tim.length
      next
 
      array.copy(tim, 0, data, pos, last)
 
    end if
 
 
  end function
 
 
 
  public function general_cosine(m as integer, alpha as double(), sym as boolean) as double()
 
    if not sym then
      m = m + 1
    end if
 
 
    dim tim as double = (2 * pi) / (m - 1)
    dim x(m) as double
    dim w(m) as double
 
    'debug.print("ine")
    for i = 0 to m - 1
      x(i) = -pi + tim * i
      'debug.write(x(i) & "  ")
    next
    'debug.print("******")
    for i = 0 to alpha.getlength(0) - 1
      for k = 0 to m - 1
        w(k) = w(k) + alpha(i) * math.cos(i * x(k))
        'debug.write(w(k) & "  ")
      next
 
    next
 
    return w
 
  end function
 
  ''' <summary>
  ''' 汉明窗
  ''' </summary>
  ''' <param name="m"> 窗长</param>
  ''' <returns></returns>
  public function general_hamming(m as integer) as double()
    dim db as double() = {0.5, 1 - 0.5}
    return general_cosine(m, db, false)  '进行加1 ,若sys为false
  end function
 
  public function get_window(m as integer) as double()
    return general_hamming(m)
 
  end function
 
 
 
end module
 
imports system.io
imports system.numerics
imports tensorflow
 
'install-package tensorflowsharp
 
public class keyworddetect
 
  dim graph as tfgraph
  dim session as tfsession
 
  '加载模型
  public sub new()
    dim model as byte() = file.readallbytes("f:\graph1.pb")
    '导入graphdef
 
    graph = new tfgraph()
    graph.import(model, "")
 
    session = new tfsession(graph)
 
    ' threading.threadpool.setmaxthreads(5, 5)
  end sub
 
  protected overrides sub finalize()
 
    session.closesession()
 
 
  end sub
 
  '将声音数据变为mfcc byte数据
  public function databtomfcc(datab() as byte) as double(,)
    dim buff16(datab.length / 2 - 1) as int16
    buffer.blockcopy(datab, 0, buff16, 0, datab.length - 1)
 
    dim result(,) as double = mfcc(buff16)
    return result
  end function
 
 
  '将声音数据变为mfcc
  public function datatomfcc(datai() as int16) as double(,)
 
    dim result(,) as double = mfcc(datai)
    return result
  end function
 
 
  '将mfcc变为输入数据格式
  public function mfcctovect(mfcc as double(,)) as double(,,)
    dim data(0, 1, 129) as double
 
    dim n as integer = 0, m as integer = 0
    for i = 0 to mfcc.getlength(0) - 1
      for k = 0 to mfcc.getlength(1) - 1
        data(0, m, n) = mfcc(i, k)
        n = n + 1
      next
      if n = 130 then
 
        m = 1
        n = 0
      end if
    next
    return data
  end function
 
  dim output
  dim runner as tfsession.runner
  dim result
  dim rshape
 
  '关键字检测
  public function detected(data(,,) as double) as double
 
    ' dim tensor as tftensor = new tftensor(data)
    runner = session.getrunner()
 
    runner.addinput(graph("input")(0), data).fetch(graph("out")(0))
 
    output = runner.run()
 
 
    result = output(0)
    rshape = result.shape
    dim rt as double
    rt = result.getvalue(true)(0)(0)
    'for k = 0 to rshape.getvalue(0) - 1
    '  rt = result.getvalue(true)(k)(0)
    '  'debug.print(rt)
    '  if (rt > 0.8) then
    '    debug.print("-----------recogxili")
    '    ' msgbox("recgo")
    '  end if
    'next
 
    return rt
 
  end function
 
 
 
  'public function runb(datab() as byte)
  '  dim mfccd as double(,) = databtomfcc(datab)
  '  dim inputx as double(,,) = mfcctovect(mfccd)
  '  detected(inputx)
  'end function
 
 
  'public function threadpoolrun(datai() as int16)
 
  '  threading.threadpool.queueuserworkitem(run(datai), datai)
  '  '  dim thrd1 as new threading.thread(new threading.parameterizedthreadstart(addressof run))
  '  ' thrd1.start(datai)
  'end function
  'delegate function delgrun(datai() as int16)
  'public function threadrun(datai() as int16)
  '  ' dim drun as new delgrun(addressof run)
 
  '  dim thrd1 as new threading.thread(new threading.parameterizedthreadstart(addressof run))
  '  thrd1.start(datai)
 
  'end function
 
 
  public function run(datai() as int16) as double
    ' debug.print("thread *****1")
    dim mfccd as double(,) = datatomfcc(datai)
    dim inputx as double(,,) = mfcctovect(mfccd)
    return detected(inputx)
  end function
 
  public function mfcc(buff16() as int16) as double(,)
    dim datalen as integer = buff16.length * 2
    dim double_buff(datalen / 2 - 1) as double
    dim len as integer = datalen / 2
    array.copy(buff16, double_buff, len)
 
    '******************
    for i = 0 to double_buff.length - 1
      double_buff(i) = double_buff(i) / 32768
      ' debug.print(double_buff(i))
    next
 
 
    '汉明窗create
    dim hann_window as double() = get_window(2048)
    'debug.print("--------------------------")
    'debug.print("hann_window**************")
    for each i in hann_window
      'debug.print(i & "  ")
    next
 
    'debug.print("--------------------------")
    'debug.print("*************pad reflect**************")
    dim y as double() = padreflect(double_buff, 1024)
    ' dim y as double() = double_buff
    'for each i in y
    '  'debug.print(i & "  ")
    'next
 
    'debug.print("--------------------------")
    'debug.print("***************frame************")
    dim frams as double(,) = frame2(y)
 
    dim tim as integer = frams.getlength(0)
 
    'debug.print("--------------------------")
    'debug.print("**********hann * data**************")
    dim hanndata(tim - 1, 2047) as double
 
 
 
    for n = 0 to tim - 1
      for i = 0 to 2048 - 1
        hanndata(n, i) = frams(n, i) * hann_window(i)
        ' debug.print(hanndata(i) & "  ")
      next
 
    next n
 
 
    '\\\\\\\\\\\\\\\\melspecture 
    dim specturm1(,) as complex = fft(hanndata)
 
 
    'for i = 0 to specturm1.getlength(0) - 1
    '  debug.print("--------------------------------------")
    '  debug.print("--------------------------------------")
    '  for k = 0 to specturm1.getlength(1) - 1
    '    debug.print(specturm1(i, k).real & "  " & specturm1(i, k).imaginary)
    '  next
    'next
 
    dim s as double(,) = spectrum(specturm1)
 
    dim fftfreqs() as double = fft_frequencies(16000, 2048)
    'debug.print("***************fftfreqs*****************")
    'debug.print("***************fftfreqs*****************")
    'debug.print("fftfreqs.shape", fftfreqs.length)
    'for i = 0 to fftfreqs.length - 1
    '  'debug.write(fftfreqs(i) & "  ")
    'next
 
    ''''''''''''''''mel * specturm1
    'debug.print("**************")
    'debug.print("****滤波器创建**********")
    dim mel_f as double() = melfilter()
 
 
    'debug.print("--------------------------")
    'debug.print("hann_window**************")
    'debug.print("diff")
    dim fdiff as double() = diff(mel_f)
 
    dim ramps_ as double(,) = ramps(mel_f, fftfreqs)
 
    dim weights(,) as double = weight(ramps_, fdiff)
 
    normal(mel_f, weights)
 
    's*weight = melspectrogram
    'weight 128,1025
    's 5 ,1025
    dim melspectrogram_(,) as double = melspectrogram(weights, s)
    dim power_to_db_ as double(,) = power_to_db(melspectrogram_)
 
    dim dct_ as double(,) = dct(20, 128)
 
    return _mfcc(dct_, power_to_db_)
  end function
 
end class

以上这篇对python中librosa的mfcc步骤详解就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持。