欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

openOffice word转pdf,pdf转图片优化版

程序员文章站 2022-03-31 12:38:39
之前写了一个版本的,不过代码繁琐而且不好用,效率有些问题。尤其pdf转图片速度太慢。下面是优化版本的代码。 spriing_boot 版本信息:2.0.1.RELEASE 1、配置信息: application.yml 2、转换入口 pdf 转图片参考 https://gitee.com/cycmy ......

之前写了一个版本的,不过代码繁琐而且不好用,效率有些问题。尤其pdf转图片速度太慢。下面是优化版本的代码。

spriing_boot 版本信息:2.0.1.release

1、配置信息:

package com.yunfatong.conf;

import lombok.extern.slf4j.slf4j;
import org.apache.commons.lang3.arrayutils;
import org.apache.commons.lang3.stringutils;
import org.apache.commons.lang3.math.numberutils;
import org.jodconverter.documentconverter;
import org.jodconverter.localconverter;
import org.jodconverter.office.localofficemanager;
import org.jodconverter.office.officemanager;
import org.springframework.boot.autoconfigure.condition.conditionalonbean;
import org.springframework.boot.autoconfigure.condition.conditionalonclass;
import org.springframework.boot.autoconfigure.condition.conditionalonmissingbean;
import org.springframework.boot.autoconfigure.condition.conditionalonproperty;
import org.springframework.boot.context.properties.enableconfigurationproperties;
import org.springframework.context.annotation.bean;
import org.springframework.context.annotation.configuration;

import java.util.hashset;
import java.util.set;

/**
 * openoffice 配置
 *
 * @author liran
 * @date 20190517
 */
@configuration
@conditionalonclass({documentconverter.class})
@conditionalonproperty(prefix = "jodconverter", name = {"enabled"}, havingvalue = "true", matchifmissing = true)
@enableconfigurationproperties({jodconverterproperties.class})
@slf4j
public class jodconverterautoconfiguration {
    private final jodconverterproperties properties;

    public jodconverterautoconfiguration(jodconverterproperties properties) {
        this.properties = properties;
    }

    private officemanager createofficemanager() {
        localofficemanager.builder builder = localofficemanager.builder();
        if (!stringutils.isblank(this.properties.getportnumbers())) {
            set<integer> iports = new hashset<>();
            string[] var3 = stringutils.split(this.properties.getportnumbers(), ", ");
            int var4 = var3.length;

            for (int var5 = 0; var5 < var4; ++var5) {
                string portnumber = var3[var5];
                iports.add(numberutils.toint(portnumber, 2002));
            }

            builder.portnumbers(arrayutils.toprimitive(iports.toarray(new integer[iports.size()])));
        }
        builder.officehome(this.properties.getofficehome());
        builder.workingdir(this.properties.getworkingdir());
        builder.templateprofiledir(this.properties.gettemplateprofiledir());
        builder.killexistingprocess(this.properties.iskillexistingprocess());
        builder.processtimeout(this.properties.getprocesstimeout());
        builder.processretryinterval(this.properties.getprocessretryinterval());
        builder.taskexecutiontimeout(this.properties.gettaskexecutiontimeout());
        builder.maxtasksperprocess(this.properties.getmaxtasksperprocess());
        builder.taskqueuetimeout(this.properties.gettaskqueuetimeout());
        return builder.build();
    }

    @bean(initmethod = "start", destroymethod = "stop")
    @conditionalonmissingbean
    public officemanager officemanager() {
        return this.createofficemanager();
    }

    @bean
    @conditionalonmissingbean
    @conditionalonbean({officemanager.class})
    public documentconverter jodconverter(officemanager officemanager) {
        return localconverter.make(officemanager);
    }
}
package com.yunfatong.conf;

import org.springframework.boot.context.properties.configurationproperties;

import java.util.regex.pattern;

/**
 * openoffice 配置
 *
 * @author liran
 * @date 20190517
 */
@configurationproperties("jodconverter")
public class jodconverterproperties {
    private boolean enabled;
    private string officehome;
    private string portnumbers = "2002";
    private string workingdir;
    private string templateprofiledir;
    private boolean killexistingprocess = true;
    private long processtimeout = 120000l;
    private long processretryinterval = 250l;
    private long taskexecutiontimeout = 120000l;
    private int maxtasksperprocess = 200;
    private long taskqueuetimeout = 30000l;

    public jodconverterproperties() {
    }

    public boolean isenabled() {
        return this.enabled;
    }

    public void setenabled(boolean enabled) {
        this.enabled = enabled;
    }

    public string getofficehome() {
        //根据不同系统分别设置
        //office-home: c:\program files (x86)\openoffice 4  #windows下默认 不用修改
        // #office-home: /opt/openoffice4      #linux 默认 不用修改
        string osname = system.getproperty("os.name");
        if (pattern.matches("linux.*", osname)) {
            this.officehome = "/opt/openoffice4";
        } else if (pattern.matches("windows.*", osname)) {
            this.officehome = "c:\\program files (x86)\\openoffice 4";
        } else {
            this.officehome = "/opt/openoffice4";
        }
        return this.officehome;
    }

    public void setofficehome(string officehome) {
        this.officehome = officehome;
    }

    public string getportnumbers() {
        return this.portnumbers;
    }

    public void setportnumbers(string portnumbers) {
        this.portnumbers = portnumbers;
    }

    public string getworkingdir() {
        return this.workingdir;
    }

    public void setworkingdir(string workingdir) {
        this.workingdir = workingdir;
    }

    public string gettemplateprofiledir() {
        return this.templateprofiledir;
    }

    public void settemplateprofiledir(string templateprofiledir) {
        this.templateprofiledir = templateprofiledir;
    }

    public boolean iskillexistingprocess() {
        return this.killexistingprocess;
    }

    public void setkillexistingprocess(boolean killexistingprocess) {
        this.killexistingprocess = killexistingprocess;
    }

    public long getprocesstimeout() {
        return this.processtimeout;
    }

    public void setprocesstimeout(long processtimeout) {
        this.processtimeout = processtimeout;
    }

    public long getprocessretryinterval() {
        return this.processretryinterval;
    }

    public void setprocessretryinterval(long procesretryinterval) {
        this.processretryinterval = procesretryinterval;
    }

    public long gettaskexecutiontimeout() {
        return this.taskexecutiontimeout;
    }

    public void settaskexecutiontimeout(long taskexecutiontimeout) {
        this.taskexecutiontimeout = taskexecutiontimeout;
    }

    public int getmaxtasksperprocess() {
        return this.maxtasksperprocess;
    }

    public void setmaxtasksperprocess(int maxtasksperprocess) {
        this.maxtasksperprocess = maxtasksperprocess;
    }

    public long gettaskqueuetimeout() {
        return this.taskqueuetimeout;
    }

    public void settaskqueuetimeout(long taskqueuetimeout) {
        this.taskqueuetimeout = taskqueuetimeout;
    }
}

application.yml 

jodconverter:
  enabled: true
  office-home: linuxorwindows
  #/opt/openoffice4      #linux 默认 不用修改 c:\program files (x86)\openoffice 4  #windows下默认 不用修改
  port-numbers: 2002
  max-tasks-per-process: 10

 

2、转换入口

package com.yunfatong.ojd.util.pdf;

import cn.hutool.core.date.datepattern;
import com.yunfatong.ojd.common.exception.commonexception;
import com.yunfatong.ojd.service.filesystemstorageservice;
import com.yunfatong.ojd.util.springutil;
import lombok.extern.slf4j.slf4j;
import org.apache.commons.io.fileutils;
import org.apache.commons.lang.stringutils;
import org.jodconverter.documentconverter;
import org.springframework.beans.factory.annotation.autowired;
import org.springframework.stereotype.component;

import java.io.file;
import java.time.localdatetime;
import java.time.format.datetimeformatter;
import java.util.arraylist;
import java.util.list;

/**
* word 转pdf
*
* @author lr
*/
@component
@slf4j
public class transferutil {
//这里没有@autowired 主要是配置不启用的话 无法注入
private documentconverter documentconverter;
@autowired
private filesystemstorageservice filesystemstorageservice;
/**
* filesystemstorageservice 就是拼接出本地路径的作用
* storage.winlocation=d:\\ojd\\upload\\images\\
* ##上传图片linux存储路径
* storage.linuxlocation=/home/ojd/upload/images/
*/

final static string word_suffix_doc = "doc";
final static string word_suffix_docx = "docx";

/**
* word ->pdf
*
* @param webpath 浏览器可访问路径(数据库存的)如 /test/wd.word
* @return 相同文件夹下的转换后的pdf 路径 如/test/wd_20190517151515333.pdf
* @throws exception
*/
public string transferwordtopdf(string webpath) throws exception {
if(documentconverter==null){
documentconverter = springutil.getbean(documentconverter.class);
}
//转换成本地实际磁盘路径
string originlocalfilepath = filesystemstorageservice.getlocation(webpath);
file inputfile = new file(originlocalfilepath);
if (!inputfile.exists() || !inputfile.isfile() || (!stringutils.contains(inputfile.getname(), word_suffix_doc) && !stringutils.contains(inputfile.getname(), word_suffix_docx))) {
throw new commonexception("word -> pdf转换错误 当前文件不是word或 文件不存在: " + webpath);
}

datetimeformatter formatter = datetimeformatter.ofpattern(datepattern.pure_datetime_ms_pattern);
string timenow = formatter.format(localdatetime.now());
string newpdfwebpath = stringutils.substringbeforelast(webpath, ".") + "_" + timenow + ".pdf";
try {
file outputfile = new file(filesystemstorageservice.getlocation(newpdfwebpath));
documentconverter.convert(inputfile).to(outputfile).execute();
} catch (exception e) {
log.error("word->pdf 转换错误------------> exception:{}", e);
throw e;
}
return newpdfwebpath;
}

public list<string> transferpdftoimage(string webpath) throws exception {
string originlocalfilepath = filesystemstorageservice.getlocation(webpath);
file inputfile = new file(originlocalfilepath);
if (!inputfile.exists() || !inputfile.isfile() || webpath.lastindexof(".pdf") < 0) {
throw new commonexception("pdf-> img 源文件不是pdf文件 或者文件不存在!" + webpath);
}
string localpdfpath = filesystemstorageservice.getlocation(webpath);
string newimgwebpathpresuffix = stringutils.substringbeforelast(webpath, ".");
string localimgpath = filesystemstorageservice.getlocation(newimgwebpathpresuffix);
pdftransferutil pdftranfer = new pdftransferutil();
list<byte[]> ins = pdftranfer.pdf2image(localpdfpath, "png", 1.5f);
list<string> webpaths = new arraylist<>(ins.size());
for (int i = 0; i < ins.size(); i++) {
byte[] data = ins.get(i);
string pathreal = localimgpath + "_ojd_" + i + ".png";
fileutils.writebytearraytofile(new file(pathreal), data);
webpaths.add(pathreal);
}
return webpaths;
}

}

 pdf 转图片参考 https://gitee.com/cycmy/pdftranfer.git

package com.yunfatong.ojd.util.pdf;

import lombok.extern.slf4j.slf4j;
import org.icepdf.core.pobjects.document;
import org.icepdf.core.pobjects.page;
import org.icepdf.core.util.graphicsrenderinghints;

import javax.imageio.imageio;
import javax.imageio.stream.imageoutputstream;
import java.awt.image.bufferedimage;
import java.io.bytearrayoutputstream;
import java.io.inputstream;
import java.util.arraylist;
import java.util.list;
import java.util.concurrent.callable;
import java.util.concurrent.executorservice;
import java.util.concurrent.executors;
import java.util.concurrent.future;

/**
 * @author lr
 *  //原文:https://blog.csdn.net/qq_35974759/article/details/83149734
 */
@slf4j
public class pdftransferutil {

    //*********************************pdf to image **********************************************************

    /**
     * 将指定pdf字节数组转换为指定格式图片二进制数组
     *
     * @param pdfbytes  pdf字节数组
     * @param imagetype 转换图片格式  默认png
     * @param zoom      缩略图显示倍数,1表示不缩放,0.3则缩小到30%
     * @return list<byte [ ]>
     * @throws exception
     */
    public list<byte[]> pdf2image(byte[] pdfbytes, string imagetype, float zoom) throws exception {
        document document = new document();
        document.setbytearray(pdfbytes, 0, pdfbytes.length, null);
        return pageextraction(document, imagetype, 0f, zoom);
    }

    /**
     * 将指定pdf输入流转换为指定格式图片二进制数组
     *
     * @param inputpdf  pdf二进制流
     * @param imagetype 转换图片格式 默认png
     * @param zoom      缩略图显示倍数,1表示不缩放,0.3则缩小到30%
     * @return list<byte [ ]>
     * @throws exception
     */
    public list<byte[]> pdf2image(inputstream inputpdf, string imagetype, float zoom) throws exception {

        document document = new document();
        document.setinputstream(inputpdf, null);
        return pageextraction(document, imagetype, 0f, zoom);
    }

    /**
     * 将指定pdf文件转换为指定格式图片二进制数组
     *
     * @param pdfpath   原文件路径,例如d:/test.pdf
     * @param imagetype 转换图片格式 默认png
     * @param zoom      缩略图显示倍数,1表示不缩放,0.3则缩小到30%
     * @return list<byte [ ]>
     * @throws exception
     */
    public list<byte[]> pdf2image(string pdfpath, string imagetype, float zoom) throws exception {
        document document = new document();
        document.setfile(pdfpath);
        return pageextraction(document, imagetype, 0f, zoom);
    }
    //*********************************pdf to image **********************************************************

    private list<byte[]> pageextraction(document document, string imagetype, float rotation, float zoom) {
        // setup two threads to handle image extraction.
        executorservice executorservice = executors.newfixedthreadpool(5);
        try {
            // create a list of callables.
            int pages = document.getnumberofpages();
            list<byte[]> result = new arraylist<byte[]>(pages);
            list<callable<byte[]>> callables = new arraylist<callable<byte[]>>(pages);
            for (int i = 0; i < pages; i++) {
                callables.add(new capturepage(document, i, imagetype, rotation, zoom));
            }
            list<future<byte[]>> listfuture = executorservice.invokeall(callables);
            executorservice.submit(new documentcloser(document)).get();
            for (future<byte[]> future : listfuture) {
                result.add(future.get());
            }
            return result;
        } catch (exception ex) {
            log.error(" pdf 转换图片错误  error handling pdf document " + ex);
        } finally {
            executorservice.shutdown();
        }
        return null;
    }

    public class capturepage implements callable<byte[]> {
        private document document;
        private int pagenumber;
        private string imagetype;
        private float rotation;
        private float zoom;

        private capturepage(document document, int pagenumber, string imagetype, float rotation, float zoom) {
            this.document = document;
            this.pagenumber = pagenumber;
            this.imagetype = imagetype;
            this.rotation = rotation;
            this.zoom = zoom;
        }

        @override
        public byte[] call() throws exception {
            bufferedimage image = (bufferedimage) document.getpageimage(pagenumber, graphicsrenderinghints.screen, page.boundary_cropbox, rotation, zoom);
            bytearrayoutputstream bs = new bytearrayoutputstream();
            imageoutputstream imout = imageio.createimageoutputstream(bs);
            imageio.write(image, imagetype, imout);
            image.flush();
            return bs.tobytearray();
        }
    }

    /**
     * disposes the document.
     */
    public class documentcloser implements callable<void> {
        private document document;

        private documentcloser(document document) {
            this.document = document;
        }

        @override
        public void call() {
            if (document != null) {
                document.dispose();
                log.info("document disposed");
            }
            return null;
        }
    }

}

springutils

package com.yunfatong.ojd.util;

/**
 * @auther liran
 * @date 2018/8/30 14:49
 * @description
 */
import org.springframework.beans.beansexception;
import org.springframework.context.applicationcontext;
import org.springframework.context.applicationcontextaware;
import org.springframework.stereotype.component;

@component
public class springutil implements applicationcontextaware {

    private static applicationcontext applicationcontext;

    @override
    public void setapplicationcontext(applicationcontext applicationcontext) throws beansexception {
        if(springutil.applicationcontext == null) {
            springutil.applicationcontext = applicationcontext;
        }
        system.out.println("========applicationcontext配置成功,在普通类可以通过调用springutils.getappcontext()获取applicationcontext对象,applicationcontext="+springutil.applicationcontext+"========");

    }

    //获取applicationcontext
    public static applicationcontext getapplicationcontext() {
        return applicationcontext;
    }

    //通过name获取 bean.
    public static object getbean(string name){
        return getapplicationcontext().getbean(name);
    }

    //通过class获取bean.
    public static <t> t getbean(class<t> clazz){
        return getapplicationcontext().getbean(clazz);
    }

    //通过name,以及clazz返回指定的bean
    public static <t> t getbean(string name,class<t> clazz){
        return getapplicationcontext().getbean(name, clazz);
    }

}

 

pom.xml

 

  <!--word转换pdf begin-->
        <dependency>
            <groupid>org.jodconverter</groupid>
            <artifactid>jodconverter-core</artifactid>
            <version>4.2.2</version>
        </dependency>

        <dependency>
            <groupid>org.jodconverter</groupid>
            <artifactid>jodconverter-local</artifactid>
            <version>4.2.2</version>
        </dependency>
        <dependency>
            <groupid>org.jodconverter</groupid>
            <artifactid>jodconverter-spring-boot-starter</artifactid>
            <version>4.2.2</version>
        </dependency>
        <!--word转换pdf end-->

        <!--pdf转图片-->
        <dependency>
            <groupid>org.icepdf.os</groupid>
            <artifactid>icepdf-core</artifactid>
            <version>6.2.2</version>
            <exclusions>
                <exclusion>
                    <groupid>javax.media</groupid>
                    <artifactid>jai_core</artifactid>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupid>org.icepdf.os</groupid>
            <artifactid>icepdf-viewer</artifactid>
            <version>6.2.2</version>
        </dependency>
        <!--pdf转图片 end-->

 

3、调用测试:

import com.yunfatong.ojd.service.filesystemstorageservice;
import lombok.extern.slf4j.slf4j;
import org.junit.test;
import org.junit.runner.runwith;
import org.springframework.beans.factory.annotation.autowired;
import org.springframework.boot.test.context.springboottest;
import org.springframework.test.context.junit4.springrunner;

import java.util.list;

@runwith(springrunner.class)
@springboottest
@slf4j
public class wordtransferpdfutiltest {
    @autowired
    transferutil wordtransferpdfutil;
    @autowired
    filesystemstorageservice filesystemstorageservice;


    @test
    public void transferlocalfile() {
   try {
/*******************word 转pdf******************/
long time = system.currenttimemillis();
system.out.println("start :======" + time);
wordtransferpdfutil.transferwordtopdf("courtchongqing/test_new/555.docx");
log.error(system.currenttimemillis() + " time============================== :" + ((system.currenttimemillis() - time) / 1000));

/*******************pdf转图片******************/
long time2 = system.currenttimemillis();
list<string> pdfimages2 = wordtransferpdfutil.transferpdftoimage("courtchongqing/test_new/333.pdf");
for (string pdfimage : pdfimages2) {
log.error(pdfimage);
}
log.error(" time===============================22222222 :" + ((system.currenttimemillis() - time2) / 1000));
// system.out.println("pdf path =============" + path);
} catch (exception e) {
e.printstacktrace();
}
 } }