欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

文章标题

程序员文章站 2024-03-01 23:57:58
...

.wav文件根据8K16bit语音分离后的字节的重组方法

废话不多说,直接上代码

@Override
    public void saveSplitVoiceByBatch(String originFileId, String jobInfoJson,
            String formatFileId) {
        try {
            if (StringUtils.isNotBlank(originFileId)
                    && StringUtils.isNotBlank(jobInfoJson)) {

                // 对引擎返回的字节数组进行序列化
                List<VoiceData> voiceDataList = JSON.parseArray(jobInfoJson,
                        VoiceData.class);

                for (VoiceData voiceData : voiceDataList) {

                    Integer voiceDataLength = 0;
                    for (VoiceDataDTO voiceD : voiceData.getSegs()) {
                        voiceDataLength += voiceD.getLength();
                    }
                    // 设置数组中所有字节长度
                    voiceData.setLength(voiceDataLength);
                }
                // 通过对应的集合中的字节总长度进行排序
                Collections.sort(voiceDataList, new Comparator<VoiceData>() {
                    @Override
                    public int compare(VoiceData data1, VoiceData data2) {
                        if (data1.getLength() > data2.getLength()) {
                            return -1;
                        } else if (data1.getLength() < data2.getLength()) {
                            return 1;
                        } else {// 相等
                            return 0;
                        }
                    }
                });

                //获取源文件的文件信息
                AttachFile attachFile = attachFileDao.select(formatFileId);

                //当分离的语音文件大于等于1份
                if (voiceDataList.size() >= 1) {
                    //获取总长度第一大的集合
                    List<VoiceDataDTO> voiceA = voiceDataList.get(0).getSegs();
                    //获取总长度第二大的集合
                    List<VoiceDataDTO> voiceB = voiceDataList.get(1).getSegs();

                    //得到源文件的字节数组长度
                    byte[] waveFile = getVoiceFromFdfs(attachFile);

                    // 通过引擎返回的分离长度信息和源文件字节数组对A语音文件进行组装
                    InputStream inputStreamA = splitVoiceInfo(waveFile, voiceA);
                    //将新合成的A语音文件进行上传
                    String attachFileIdA = uploadSplitVoice(attachFile, "A",
                            inputStreamA);

                    String attachFileIdB = "";

                    //当分离的语音文件大于1份,只取前2份引擎分离字节数组字节总长度最大的
                    if (voiceDataList.size() > 1) {
                        // 通过引擎返回的分离长度信息和源文件字节数组对A语音文件进行组装
                        InputStream inputStreamB = splitVoiceInfo(waveFile, voiceB);
                        //将新合成的A语音文件进行上传
                        attachFileIdB = uploadSplitVoice(attachFile, "B", inputStreamB);

                    }
                    //通过源文件Id,将新合成的A、B语音文件主键更新在对应的任务表中
                    regTaskDao.updateBatchSplit(originFileId, attachFileIdA,
                            attachFileIdB);
                    logger.info("形成新的A、B语音处理结束,A、B语音对应的文件主键分别为:A:"
                            + attachFileIdA + "    B:" + attachFileIdB);

                } else {
                    regTaskDao.updateBatchSplit(originFileId, formatFileId, "");
                }

            } else {
                logger.info("引擎未返回有效数据");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 对分离后的语音文件进行上传并返回对应的文件主键
     * 
     * @param attachFile
     *            原语音文件
     * @param inputStream
     *            原语音文件输入流
     * @return 上传文件主键
     */
    public String uploadSplitVoice(AttachFile attachFile, String fileName, InputStream inputStream) {
        AttachFile attach = new AttachFile();
        String attachFileId = null;
        try {
            attach.setFileSize((long) (inputStream.available()));
            attach.setOriginalName(attachFile.getOriginalName().substring(0,
                    attachFile.getOriginalName().lastIndexOf("."))
                    + "."
                    + fileName
                    + "说话人语音"
                    + attachFile.getOriginalName().substring( attachFile.getOriginalName().lastIndexOf(".")));

            attachFileId = attachFileService.uploadAndConfirm(attach, inputStream);
            inputStream.close();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return attachFileId;

    }

    /**
     * 通过语音文件获取语音文件的字节数组
     * 
     * @param attachFile
     *            原语音文件
     * @return byte[] 语音文件对应的字符数组
     */
    public byte[] getVoiceFromFdfs(AttachFile attachFile) {
        FdfsFile fdfsFile = (FdfsFile) attachFile;
        byte[] waveFile = null;
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        try {
            fdfsClient.downloadFile(out, fdfsFile.getStoreLocation());
            waveFile = out.toByteArray();
        } catch (Exception e) {

        }
        return waveFile;
    }

    /**
     * 
     * 
     * @param waveFile
     *            原始语音字节数组
     * @param voiceData
     *            引擎处理后的语音片段字节长度
     * @return InputStream 输入流
     */
    public InputStream splitVoiceInfo(byte[] waveFile,
            List<VoiceDataDTO> voiceData) {
        byte[] bytes = new byte[0];
        byte[] data = new byte[0];
        byte[] dataHead = new byte[44];
        int dataLength = 0;
        int byteLength = 0;
        // 语音文件格式头部
        dataHead = Arrays.copyOfRange(waveFile, 0, 44);
        // 8k16bit的语音
        int lenPerMs = 8000 * 16 / 8 / 1000;
        // B说话人语音
        for (VoiceDataDTO voice : voiceData) {
            if (voice.getEnd() * lenPerMs <= waveFile.length) {

                bytes = new byte[(voice.getEnd() - voice.getBegin()) * lenPerMs];
                bytes = Arrays.copyOfRange(waveFile, voice.getBegin()
                        * lenPerMs, voice.getEnd() * lenPerMs);

                dataLength = data.length;
                byteLength = bytes.length;

                byte[] tempData = Arrays.copyOf(data, dataLength + byteLength);// 扩容
                System.arraycopy(bytes, 0, tempData, dataLength, byteLength);// 将第二个数组与第一个数组合并
                data = tempData;
            }
        }
        logger.info("准备        进入语音头部信息转换:");
        dataHead = changeHead(dataHead, data);
        InputStream inputStream = new ByteArrayInputStream(dataHead);
        return inputStream;
    }

    /**
     * 对语音文件头信息进行组合
     */
    public byte[] changeHead(byte[] headBytes, byte[] data) {
        logger.debug("进入语音头部信息转换:");
        // 合并语音头和语音数据
        int validLen = headBytes.length + data.length;
        byte[] validBytes = new byte[validLen];
        System.arraycopy(headBytes, 0, validBytes, 0, headBytes.length);
        System.arraycopy(data, 0, validBytes, headBytes.length, data.length);

        byte[] orgPreSize = new byte[4];
        orgPreSize = Arrays.copyOfRange(validBytes, 4, 8);
        byte[] orgNexSize = new byte[4];
        orgNexSize = Arrays.copyOfRange(validBytes, 40, 44);

        byte[] orgDataType = new byte[4];
        orgDataType = Arrays.copyOfRange(validBytes, 36, 40);
        logger.info("原始头部信息中: " + CommonUtils.lBytesToInt(orgPreSize) + " ,"
                + CommonUtils.lBytesToInt(orgNexSize) + " ,"
                + new String(orgDataType));

        // 修改头部信息
        int dataPreSize = validLen - 8;
        int dataNextSize = validLen - 44;
        String dataType = "data";
        byte[] dataPreSizeBytes = CommonUtils.toLH(dataPreSize);
        byte[] dataNextSizeBytes = CommonUtils.toLH(dataNextSize);
        byte[] dataTypeBytes = dataType.getBytes();

        System.arraycopy(dataPreSizeBytes, 0, validBytes, 4,
                dataPreSizeBytes.length);
        System.arraycopy(dataNextSizeBytes, 0, validBytes, 40,
                dataNextSizeBytes.length);
        System.arraycopy(dataTypeBytes, 0, validBytes, 36, dataTypeBytes.length);

        byte[] nowPreSize = new byte[4];
        nowPreSize = Arrays.copyOfRange(validBytes, 4, 8);
        byte[] nowNexSize = new byte[4];
        nowNexSize = Arrays.copyOfRange(validBytes, 40, 44);
        byte[] nowDataType = new byte[4];
        nowDataType = Arrays.copyOfRange(validBytes, 36, 40);
        logger.info("新头部信息中: " + CommonUtils.lBytesToInt(nowPreSize) + " ,"
                + CommonUtils.lBytesToInt(nowNexSize) + " ,"
                + new String(nowDataType));
        return validBytes;

    }

/**
 * 接收引擎处理后的数组中的语音片段
 */
public class VoiceData {
    /**
     * 开始字节长度
     */
    private List<VoiceDataDTO> segs;
    /**
     * 结束字节长度
     */
    private Integer spk;

    /**
     * 数据字节总长度
     */
    private Integer length;
    /**
     * @return the segs
     */
    public List<VoiceDataDTO> getSegs() {
        return segs;
    }
    /**
     * @param segs the segs to set
     */
    public void setSegs(List<VoiceDataDTO> segs) {
        this.segs = segs;
    }
    /**
     * @return the spk
     */
    public Integer getSpk() {
        return spk;
    }
    /**
     * @param spk the spk to set
     */
    public void setSpk(Integer spk) {
        this.spk = spk;
    }
    /**
     * @return the length
     */
    public Integer getLength() {
        return length;
    }
    /**
     * @param length the length to set
     */
    public void setLength(Integer length) {
        this.length = length;
    }
}

/**
 * 接收引擎处理后的语音片段
 */
public class VoiceDataDTO {
    /**
     * 开始字节长度
     */
    private Integer begin;
    /**
     * 结束字节长度
     */
    private Integer end;
    /**
     * 类型
     */
    private String type;

    /**
     * 字节长度
     */
    private Integer length;
    /**
     * @return the begin
     */
    public Integer getBegin() {
        return begin;
    }
    /**
     * @param begin the begin to set
     */
    public void setBegin(Integer begin) {
        this.begin = begin;
    }
    /**
     * @return the end
     */
    public Integer getEnd() {
        return end;
    }
    /**
     * @param end the end to set
     */
    public void setEnd(Integer end) {
        this.end = end;
    }
    /**
     * @return the type
     */
    public String getType() {
        return type;
    }
    /**
     * @param type the type to set
     */
    public void setType(String type) {
        this.type = type;
    }
    /**
     * @return the length
     */
    public Integer getLength() {
        return this.end - this.begin;
    }
    /**
     * @param length the length to set
     */
    public void setLength(Integer length) {
        this.length = length;
    }

}


wav头文件格式:

8KHz采样、16比特量化的线性PCM语音信号的WAVE文件头格式表(共44字节)
偏移地址 字节数 数据类型 内容 文件头定义为
00H 4 char “RIFF” char riff_id[4]=”RIFF”
04H 4 long int 文件总长-8 long int size0=文总长-8
08H 8 char “WAVEfmt ” char wave_fmt[8]
10H 4 long int 10 00 00 00H(PCM) long int size1=0x10
14H 2 int 01 00H int fmttag=0x01
16H 2 int int channel=1 或2
18H 4 long int 采样率 long int samplespersec
1CH 4 long int 每秒播放字节数 long int bytepersec
20H 2 int 采样一次占字节数 int blockalign=声道数*量化数/8
22H 2 int 量化数 int bitpersamples=8或16
24H 4 char “data” char data_id=”data”
28H 4 long int 采样数据字节数 long int size2=文长-44
2CH 到文尾 char 采样数据  

相关标签: wav文件组装