文章标题
程序员文章站
2024-03-01 23:57:58
...
.wav文件根据8K16bit语音分离后的字节的重组方法
废话不多说,直接上代码
@Override
public void saveSplitVoiceByBatch(String originFileId, String jobInfoJson,
String formatFileId) {
try {
if (StringUtils.isNotBlank(originFileId)
&& StringUtils.isNotBlank(jobInfoJson)) {
// 对引擎返回的字节数组进行序列化
List<VoiceData> voiceDataList = JSON.parseArray(jobInfoJson,
VoiceData.class);
for (VoiceData voiceData : voiceDataList) {
Integer voiceDataLength = 0;
for (VoiceDataDTO voiceD : voiceData.getSegs()) {
voiceDataLength += voiceD.getLength();
}
// 设置数组中所有字节长度
voiceData.setLength(voiceDataLength);
}
// 通过对应的集合中的字节总长度进行排序
Collections.sort(voiceDataList, new Comparator<VoiceData>() {
@Override
public int compare(VoiceData data1, VoiceData data2) {
if (data1.getLength() > data2.getLength()) {
return -1;
} else if (data1.getLength() < data2.getLength()) {
return 1;
} else {// 相等
return 0;
}
}
});
//获取源文件的文件信息
AttachFile attachFile = attachFileDao.select(formatFileId);
//当分离的语音文件大于等于1份
if (voiceDataList.size() >= 1) {
//获取总长度第一大的集合
List<VoiceDataDTO> voiceA = voiceDataList.get(0).getSegs();
//获取总长度第二大的集合
List<VoiceDataDTO> voiceB = voiceDataList.get(1).getSegs();
//得到源文件的字节数组长度
byte[] waveFile = getVoiceFromFdfs(attachFile);
// 通过引擎返回的分离长度信息和源文件字节数组对A语音文件进行组装
InputStream inputStreamA = splitVoiceInfo(waveFile, voiceA);
//将新合成的A语音文件进行上传
String attachFileIdA = uploadSplitVoice(attachFile, "A",
inputStreamA);
String attachFileIdB = "";
//当分离的语音文件大于1份,只取前2份引擎分离字节数组字节总长度最大的
if (voiceDataList.size() > 1) {
// 通过引擎返回的分离长度信息和源文件字节数组对A语音文件进行组装
InputStream inputStreamB = splitVoiceInfo(waveFile, voiceB);
//将新合成的A语音文件进行上传
attachFileIdB = uploadSplitVoice(attachFile, "B", inputStreamB);
}
//通过源文件Id,将新合成的A、B语音文件主键更新在对应的任务表中
regTaskDao.updateBatchSplit(originFileId, attachFileIdA,
attachFileIdB);
logger.info("形成新的A、B语音处理结束,A、B语音对应的文件主键分别为:A:"
+ attachFileIdA + " B:" + attachFileIdB);
} else {
regTaskDao.updateBatchSplit(originFileId, formatFileId, "");
}
} else {
logger.info("引擎未返回有效数据");
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 对分离后的语音文件进行上传并返回对应的文件主键
*
* @param attachFile
* 原语音文件
* @param inputStream
* 原语音文件输入流
* @return 上传文件主键
*/
public String uploadSplitVoice(AttachFile attachFile, String fileName, InputStream inputStream) {
AttachFile attach = new AttachFile();
String attachFileId = null;
try {
attach.setFileSize((long) (inputStream.available()));
attach.setOriginalName(attachFile.getOriginalName().substring(0,
attachFile.getOriginalName().lastIndexOf("."))
+ "."
+ fileName
+ "说话人语音"
+ attachFile.getOriginalName().substring( attachFile.getOriginalName().lastIndexOf(".")));
attachFileId = attachFileService.uploadAndConfirm(attach, inputStream);
inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
return attachFileId;
}
/**
* 通过语音文件获取语音文件的字节数组
*
* @param attachFile
* 原语音文件
* @return byte[] 语音文件对应的字符数组
*/
public byte[] getVoiceFromFdfs(AttachFile attachFile) {
FdfsFile fdfsFile = (FdfsFile) attachFile;
byte[] waveFile = null;
ByteArrayOutputStream out = new ByteArrayOutputStream();
try {
fdfsClient.downloadFile(out, fdfsFile.getStoreLocation());
waveFile = out.toByteArray();
} catch (Exception e) {
}
return waveFile;
}
/**
*
*
* @param waveFile
* 原始语音字节数组
* @param voiceData
* 引擎处理后的语音片段字节长度
* @return InputStream 输入流
*/
public InputStream splitVoiceInfo(byte[] waveFile,
List<VoiceDataDTO> voiceData) {
byte[] bytes = new byte[0];
byte[] data = new byte[0];
byte[] dataHead = new byte[44];
int dataLength = 0;
int byteLength = 0;
// 语音文件格式头部
dataHead = Arrays.copyOfRange(waveFile, 0, 44);
// 8k16bit的语音
int lenPerMs = 8000 * 16 / 8 / 1000;
// B说话人语音
for (VoiceDataDTO voice : voiceData) {
if (voice.getEnd() * lenPerMs <= waveFile.length) {
bytes = new byte[(voice.getEnd() - voice.getBegin()) * lenPerMs];
bytes = Arrays.copyOfRange(waveFile, voice.getBegin()
* lenPerMs, voice.getEnd() * lenPerMs);
dataLength = data.length;
byteLength = bytes.length;
byte[] tempData = Arrays.copyOf(data, dataLength + byteLength);// 扩容
System.arraycopy(bytes, 0, tempData, dataLength, byteLength);// 将第二个数组与第一个数组合并
data = tempData;
}
}
logger.info("准备 进入语音头部信息转换:");
dataHead = changeHead(dataHead, data);
InputStream inputStream = new ByteArrayInputStream(dataHead);
return inputStream;
}
/**
* 对语音文件头信息进行组合
*/
public byte[] changeHead(byte[] headBytes, byte[] data) {
logger.debug("进入语音头部信息转换:");
// 合并语音头和语音数据
int validLen = headBytes.length + data.length;
byte[] validBytes = new byte[validLen];
System.arraycopy(headBytes, 0, validBytes, 0, headBytes.length);
System.arraycopy(data, 0, validBytes, headBytes.length, data.length);
byte[] orgPreSize = new byte[4];
orgPreSize = Arrays.copyOfRange(validBytes, 4, 8);
byte[] orgNexSize = new byte[4];
orgNexSize = Arrays.copyOfRange(validBytes, 40, 44);
byte[] orgDataType = new byte[4];
orgDataType = Arrays.copyOfRange(validBytes, 36, 40);
logger.info("原始头部信息中: " + CommonUtils.lBytesToInt(orgPreSize) + " ,"
+ CommonUtils.lBytesToInt(orgNexSize) + " ,"
+ new String(orgDataType));
// 修改头部信息
int dataPreSize = validLen - 8;
int dataNextSize = validLen - 44;
String dataType = "data";
byte[] dataPreSizeBytes = CommonUtils.toLH(dataPreSize);
byte[] dataNextSizeBytes = CommonUtils.toLH(dataNextSize);
byte[] dataTypeBytes = dataType.getBytes();
System.arraycopy(dataPreSizeBytes, 0, validBytes, 4,
dataPreSizeBytes.length);
System.arraycopy(dataNextSizeBytes, 0, validBytes, 40,
dataNextSizeBytes.length);
System.arraycopy(dataTypeBytes, 0, validBytes, 36, dataTypeBytes.length);
byte[] nowPreSize = new byte[4];
nowPreSize = Arrays.copyOfRange(validBytes, 4, 8);
byte[] nowNexSize = new byte[4];
nowNexSize = Arrays.copyOfRange(validBytes, 40, 44);
byte[] nowDataType = new byte[4];
nowDataType = Arrays.copyOfRange(validBytes, 36, 40);
logger.info("新头部信息中: " + CommonUtils.lBytesToInt(nowPreSize) + " ,"
+ CommonUtils.lBytesToInt(nowNexSize) + " ,"
+ new String(nowDataType));
return validBytes;
}
/**
* 接收引擎处理后的数组中的语音片段
*/
public class VoiceData {
/**
* 开始字节长度
*/
private List<VoiceDataDTO> segs;
/**
* 结束字节长度
*/
private Integer spk;
/**
* 数据字节总长度
*/
private Integer length;
/**
* @return the segs
*/
public List<VoiceDataDTO> getSegs() {
return segs;
}
/**
* @param segs the segs to set
*/
public void setSegs(List<VoiceDataDTO> segs) {
this.segs = segs;
}
/**
* @return the spk
*/
public Integer getSpk() {
return spk;
}
/**
* @param spk the spk to set
*/
public void setSpk(Integer spk) {
this.spk = spk;
}
/**
* @return the length
*/
public Integer getLength() {
return length;
}
/**
* @param length the length to set
*/
public void setLength(Integer length) {
this.length = length;
}
}
/**
* 接收引擎处理后的语音片段
*/
public class VoiceDataDTO {
/**
* 开始字节长度
*/
private Integer begin;
/**
* 结束字节长度
*/
private Integer end;
/**
* 类型
*/
private String type;
/**
* 字节长度
*/
private Integer length;
/**
* @return the begin
*/
public Integer getBegin() {
return begin;
}
/**
* @param begin the begin to set
*/
public void setBegin(Integer begin) {
this.begin = begin;
}
/**
* @return the end
*/
public Integer getEnd() {
return end;
}
/**
* @param end the end to set
*/
public void setEnd(Integer end) {
this.end = end;
}
/**
* @return the type
*/
public String getType() {
return type;
}
/**
* @param type the type to set
*/
public void setType(String type) {
this.type = type;
}
/**
* @return the length
*/
public Integer getLength() {
return this.end - this.begin;
}
/**
* @param length the length to set
*/
public void setLength(Integer length) {
this.length = length;
}
}
wav头文件格式:
8KHz采样、16比特量化的线性PCM语音信号的WAVE文件头格式表(共44字节)
偏移地址 字节数 数据类型 内容 文件头定义为
00H 4 char “RIFF” char riff_id[4]=”RIFF”
04H 4 long int 文件总长-8 long int size0=文总长-8
08H 8 char “WAVEfmt ” char wave_fmt[8]
10H 4 long int 10 00 00 00H(PCM) long int size1=0x10
14H 2 int 01 00H int fmttag=0x01
16H 2 int int channel=1 或2
18H 4 long int 采样率 long int samplespersec
1CH 4 long int 每秒播放字节数 long int bytepersec
20H 2 int 采样一次占字节数 int blockalign=声道数*量化数/8
22H 2 int 量化数 int bitpersamples=8或16
24H 4 char “data” char data_id=”data”
28H 4 long int 采样数据字节数 long int size2=文长-44
2CH 到文尾 char 采样数据