openOffice word转pdf,pdf转图片优化版
程序员文章站
2022-03-31 12:38:39
之前写了一个版本的,不过代码繁琐而且不好用,效率有些问题。尤其pdf转图片速度太慢。下面是优化版本的代码。 spriing_boot 版本信息:2.0.1.RELEASE 1、配置信息: application.yml 2、转换入口 pdf 转图片参考 https://gitee.com/cycmy ......
之前写了一个版本的,不过代码繁琐而且不好用,效率有些问题。尤其pdf转图片速度太慢。下面是优化版本的代码。
spriing_boot 版本信息:2.0.1.release
1、配置信息:
package com.yunfatong.conf; import lombok.extern.slf4j.slf4j; import org.apache.commons.lang3.arrayutils; import org.apache.commons.lang3.stringutils; import org.apache.commons.lang3.math.numberutils; import org.jodconverter.documentconverter; import org.jodconverter.localconverter; import org.jodconverter.office.localofficemanager; import org.jodconverter.office.officemanager; import org.springframework.boot.autoconfigure.condition.conditionalonbean; import org.springframework.boot.autoconfigure.condition.conditionalonclass; import org.springframework.boot.autoconfigure.condition.conditionalonmissingbean; import org.springframework.boot.autoconfigure.condition.conditionalonproperty; import org.springframework.boot.context.properties.enableconfigurationproperties; import org.springframework.context.annotation.bean; import org.springframework.context.annotation.configuration; import java.util.hashset; import java.util.set; /** * openoffice 配置 * * @author liran * @date 20190517 */ @configuration @conditionalonclass({documentconverter.class}) @conditionalonproperty(prefix = "jodconverter", name = {"enabled"}, havingvalue = "true", matchifmissing = true) @enableconfigurationproperties({jodconverterproperties.class}) @slf4j public class jodconverterautoconfiguration { private final jodconverterproperties properties; public jodconverterautoconfiguration(jodconverterproperties properties) { this.properties = properties; } private officemanager createofficemanager() { localofficemanager.builder builder = localofficemanager.builder(); if (!stringutils.isblank(this.properties.getportnumbers())) { set<integer> iports = new hashset<>(); string[] var3 = stringutils.split(this.properties.getportnumbers(), ", "); int var4 = var3.length; for (int var5 = 0; var5 < var4; ++var5) { string portnumber = var3[var5]; iports.add(numberutils.toint(portnumber, 2002)); } builder.portnumbers(arrayutils.toprimitive(iports.toarray(new integer[iports.size()]))); } builder.officehome(this.properties.getofficehome()); builder.workingdir(this.properties.getworkingdir()); builder.templateprofiledir(this.properties.gettemplateprofiledir()); builder.killexistingprocess(this.properties.iskillexistingprocess()); builder.processtimeout(this.properties.getprocesstimeout()); builder.processretryinterval(this.properties.getprocessretryinterval()); builder.taskexecutiontimeout(this.properties.gettaskexecutiontimeout()); builder.maxtasksperprocess(this.properties.getmaxtasksperprocess()); builder.taskqueuetimeout(this.properties.gettaskqueuetimeout()); return builder.build(); } @bean(initmethod = "start", destroymethod = "stop") @conditionalonmissingbean public officemanager officemanager() { return this.createofficemanager(); } @bean @conditionalonmissingbean @conditionalonbean({officemanager.class}) public documentconverter jodconverter(officemanager officemanager) { return localconverter.make(officemanager); } }
package com.yunfatong.conf; import org.springframework.boot.context.properties.configurationproperties; import java.util.regex.pattern; /** * openoffice 配置 * * @author liran * @date 20190517 */ @configurationproperties("jodconverter") public class jodconverterproperties { private boolean enabled; private string officehome; private string portnumbers = "2002"; private string workingdir; private string templateprofiledir; private boolean killexistingprocess = true; private long processtimeout = 120000l; private long processretryinterval = 250l; private long taskexecutiontimeout = 120000l; private int maxtasksperprocess = 200; private long taskqueuetimeout = 30000l; public jodconverterproperties() { } public boolean isenabled() { return this.enabled; } public void setenabled(boolean enabled) { this.enabled = enabled; } public string getofficehome() { //根据不同系统分别设置 //office-home: c:\program files (x86)\openoffice 4 #windows下默认 不用修改 // #office-home: /opt/openoffice4 #linux 默认 不用修改 string osname = system.getproperty("os.name"); if (pattern.matches("linux.*", osname)) { this.officehome = "/opt/openoffice4"; } else if (pattern.matches("windows.*", osname)) { this.officehome = "c:\\program files (x86)\\openoffice 4"; } else { this.officehome = "/opt/openoffice4"; } return this.officehome; } public void setofficehome(string officehome) { this.officehome = officehome; } public string getportnumbers() { return this.portnumbers; } public void setportnumbers(string portnumbers) { this.portnumbers = portnumbers; } public string getworkingdir() { return this.workingdir; } public void setworkingdir(string workingdir) { this.workingdir = workingdir; } public string gettemplateprofiledir() { return this.templateprofiledir; } public void settemplateprofiledir(string templateprofiledir) { this.templateprofiledir = templateprofiledir; } public boolean iskillexistingprocess() { return this.killexistingprocess; } public void setkillexistingprocess(boolean killexistingprocess) { this.killexistingprocess = killexistingprocess; } public long getprocesstimeout() { return this.processtimeout; } public void setprocesstimeout(long processtimeout) { this.processtimeout = processtimeout; } public long getprocessretryinterval() { return this.processretryinterval; } public void setprocessretryinterval(long procesretryinterval) { this.processretryinterval = procesretryinterval; } public long gettaskexecutiontimeout() { return this.taskexecutiontimeout; } public void settaskexecutiontimeout(long taskexecutiontimeout) { this.taskexecutiontimeout = taskexecutiontimeout; } public int getmaxtasksperprocess() { return this.maxtasksperprocess; } public void setmaxtasksperprocess(int maxtasksperprocess) { this.maxtasksperprocess = maxtasksperprocess; } public long gettaskqueuetimeout() { return this.taskqueuetimeout; } public void settaskqueuetimeout(long taskqueuetimeout) { this.taskqueuetimeout = taskqueuetimeout; } }
application.yml
jodconverter: enabled: true office-home: linuxorwindows #/opt/openoffice4 #linux 默认 不用修改 c:\program files (x86)\openoffice 4 #windows下默认 不用修改 port-numbers: 2002 max-tasks-per-process: 10
2、转换入口
package com.yunfatong.ojd.util.pdf;
import cn.hutool.core.date.datepattern;
import com.yunfatong.ojd.common.exception.commonexception;
import com.yunfatong.ojd.service.filesystemstorageservice;
import com.yunfatong.ojd.util.springutil;
import lombok.extern.slf4j.slf4j;
import org.apache.commons.io.fileutils;
import org.apache.commons.lang.stringutils;
import org.jodconverter.documentconverter;
import org.springframework.beans.factory.annotation.autowired;
import org.springframework.stereotype.component;
import java.io.file;
import java.time.localdatetime;
import java.time.format.datetimeformatter;
import java.util.arraylist;
import java.util.list;
/**
* word 转pdf
*
* @author lr
*/
@component
@slf4j
public class transferutil {
//这里没有@autowired 主要是配置不启用的话 无法注入
private documentconverter documentconverter;
@autowired
private filesystemstorageservice filesystemstorageservice;
/**
* filesystemstorageservice 就是拼接出本地路径的作用
* storage.winlocation=d:\\ojd\\upload\\images\\
* ##上传图片linux存储路径
* storage.linuxlocation=/home/ojd/upload/images/
*/
final static string word_suffix_doc = "doc";
final static string word_suffix_docx = "docx";
/**
* word ->pdf
*
* @param webpath 浏览器可访问路径(数据库存的)如 /test/wd.word
* @return 相同文件夹下的转换后的pdf 路径 如/test/wd_20190517151515333.pdf
* @throws exception
*/
public string transferwordtopdf(string webpath) throws exception {
if(documentconverter==null){
documentconverter = springutil.getbean(documentconverter.class);
}
//转换成本地实际磁盘路径
string originlocalfilepath = filesystemstorageservice.getlocation(webpath);
file inputfile = new file(originlocalfilepath);
if (!inputfile.exists() || !inputfile.isfile() || (!stringutils.contains(inputfile.getname(), word_suffix_doc) && !stringutils.contains(inputfile.getname(), word_suffix_docx))) {
throw new commonexception("word -> pdf转换错误 当前文件不是word或 文件不存在: " + webpath);
}
datetimeformatter formatter = datetimeformatter.ofpattern(datepattern.pure_datetime_ms_pattern);
string timenow = formatter.format(localdatetime.now());
string newpdfwebpath = stringutils.substringbeforelast(webpath, ".") + "_" + timenow + ".pdf";
try {
file outputfile = new file(filesystemstorageservice.getlocation(newpdfwebpath));
documentconverter.convert(inputfile).to(outputfile).execute();
} catch (exception e) {
log.error("word->pdf 转换错误------------> exception:{}", e);
throw e;
}
return newpdfwebpath;
}
public list<string> transferpdftoimage(string webpath) throws exception {
string originlocalfilepath = filesystemstorageservice.getlocation(webpath);
file inputfile = new file(originlocalfilepath);
if (!inputfile.exists() || !inputfile.isfile() || webpath.lastindexof(".pdf") < 0) {
throw new commonexception("pdf-> img 源文件不是pdf文件 或者文件不存在!" + webpath);
}
string localpdfpath = filesystemstorageservice.getlocation(webpath);
string newimgwebpathpresuffix = stringutils.substringbeforelast(webpath, ".");
string localimgpath = filesystemstorageservice.getlocation(newimgwebpathpresuffix);
pdftransferutil pdftranfer = new pdftransferutil();
list<byte[]> ins = pdftranfer.pdf2image(localpdfpath, "png", 1.5f);
list<string> webpaths = new arraylist<>(ins.size());
for (int i = 0; i < ins.size(); i++) {
byte[] data = ins.get(i);
string pathreal = localimgpath + "_ojd_" + i + ".png";
fileutils.writebytearraytofile(new file(pathreal), data);
webpaths.add(pathreal);
}
return webpaths;
}
}
pdf 转图片参考 https://gitee.com/cycmy/pdftranfer.git
package com.yunfatong.ojd.util.pdf; import lombok.extern.slf4j.slf4j; import org.icepdf.core.pobjects.document; import org.icepdf.core.pobjects.page; import org.icepdf.core.util.graphicsrenderinghints; import javax.imageio.imageio; import javax.imageio.stream.imageoutputstream; import java.awt.image.bufferedimage; import java.io.bytearrayoutputstream; import java.io.inputstream; import java.util.arraylist; import java.util.list; import java.util.concurrent.callable; import java.util.concurrent.executorservice; import java.util.concurrent.executors; import java.util.concurrent.future; /** * @author lr * //原文:https://blog.csdn.net/qq_35974759/article/details/83149734 */ @slf4j public class pdftransferutil { //*********************************pdf to image ********************************************************** /** * 将指定pdf字节数组转换为指定格式图片二进制数组 * * @param pdfbytes pdf字节数组 * @param imagetype 转换图片格式 默认png * @param zoom 缩略图显示倍数,1表示不缩放,0.3则缩小到30% * @return list<byte [ ]> * @throws exception */ public list<byte[]> pdf2image(byte[] pdfbytes, string imagetype, float zoom) throws exception { document document = new document(); document.setbytearray(pdfbytes, 0, pdfbytes.length, null); return pageextraction(document, imagetype, 0f, zoom); } /** * 将指定pdf输入流转换为指定格式图片二进制数组 * * @param inputpdf pdf二进制流 * @param imagetype 转换图片格式 默认png * @param zoom 缩略图显示倍数,1表示不缩放,0.3则缩小到30% * @return list<byte [ ]> * @throws exception */ public list<byte[]> pdf2image(inputstream inputpdf, string imagetype, float zoom) throws exception { document document = new document(); document.setinputstream(inputpdf, null); return pageextraction(document, imagetype, 0f, zoom); } /** * 将指定pdf文件转换为指定格式图片二进制数组 * * @param pdfpath 原文件路径,例如d:/test.pdf * @param imagetype 转换图片格式 默认png * @param zoom 缩略图显示倍数,1表示不缩放,0.3则缩小到30% * @return list<byte [ ]> * @throws exception */ public list<byte[]> pdf2image(string pdfpath, string imagetype, float zoom) throws exception { document document = new document(); document.setfile(pdfpath); return pageextraction(document, imagetype, 0f, zoom); } //*********************************pdf to image ********************************************************** private list<byte[]> pageextraction(document document, string imagetype, float rotation, float zoom) { // setup two threads to handle image extraction. executorservice executorservice = executors.newfixedthreadpool(5); try { // create a list of callables. int pages = document.getnumberofpages(); list<byte[]> result = new arraylist<byte[]>(pages); list<callable<byte[]>> callables = new arraylist<callable<byte[]>>(pages); for (int i = 0; i < pages; i++) { callables.add(new capturepage(document, i, imagetype, rotation, zoom)); } list<future<byte[]>> listfuture = executorservice.invokeall(callables); executorservice.submit(new documentcloser(document)).get(); for (future<byte[]> future : listfuture) { result.add(future.get()); } return result; } catch (exception ex) { log.error(" pdf 转换图片错误 error handling pdf document " + ex); } finally { executorservice.shutdown(); } return null; } public class capturepage implements callable<byte[]> { private document document; private int pagenumber; private string imagetype; private float rotation; private float zoom; private capturepage(document document, int pagenumber, string imagetype, float rotation, float zoom) { this.document = document; this.pagenumber = pagenumber; this.imagetype = imagetype; this.rotation = rotation; this.zoom = zoom; } @override public byte[] call() throws exception { bufferedimage image = (bufferedimage) document.getpageimage(pagenumber, graphicsrenderinghints.screen, page.boundary_cropbox, rotation, zoom); bytearrayoutputstream bs = new bytearrayoutputstream(); imageoutputstream imout = imageio.createimageoutputstream(bs); imageio.write(image, imagetype, imout); image.flush(); return bs.tobytearray(); } } /** * disposes the document. */ public class documentcloser implements callable<void> { private document document; private documentcloser(document document) { this.document = document; } @override public void call() { if (document != null) { document.dispose(); log.info("document disposed"); } return null; } } }
springutils
package com.yunfatong.ojd.util; /** * @auther liran * @date 2018/8/30 14:49 * @description */ import org.springframework.beans.beansexception; import org.springframework.context.applicationcontext; import org.springframework.context.applicationcontextaware; import org.springframework.stereotype.component; @component public class springutil implements applicationcontextaware { private static applicationcontext applicationcontext; @override public void setapplicationcontext(applicationcontext applicationcontext) throws beansexception { if(springutil.applicationcontext == null) { springutil.applicationcontext = applicationcontext; } system.out.println("========applicationcontext配置成功,在普通类可以通过调用springutils.getappcontext()获取applicationcontext对象,applicationcontext="+springutil.applicationcontext+"========"); } //获取applicationcontext public static applicationcontext getapplicationcontext() { return applicationcontext; } //通过name获取 bean. public static object getbean(string name){ return getapplicationcontext().getbean(name); } //通过class获取bean. public static <t> t getbean(class<t> clazz){ return getapplicationcontext().getbean(clazz); } //通过name,以及clazz返回指定的bean public static <t> t getbean(string name,class<t> clazz){ return getapplicationcontext().getbean(name, clazz); } }
pom.xml
<!--word转换pdf begin--> <dependency> <groupid>org.jodconverter</groupid> <artifactid>jodconverter-core</artifactid> <version>4.2.2</version> </dependency> <dependency> <groupid>org.jodconverter</groupid> <artifactid>jodconverter-local</artifactid> <version>4.2.2</version> </dependency> <dependency> <groupid>org.jodconverter</groupid> <artifactid>jodconverter-spring-boot-starter</artifactid> <version>4.2.2</version> </dependency> <!--word转换pdf end--> <!--pdf转图片--> <dependency> <groupid>org.icepdf.os</groupid> <artifactid>icepdf-core</artifactid> <version>6.2.2</version> <exclusions> <exclusion> <groupid>javax.media</groupid> <artifactid>jai_core</artifactid> </exclusion> </exclusions> </dependency> <dependency> <groupid>org.icepdf.os</groupid> <artifactid>icepdf-viewer</artifactid> <version>6.2.2</version> </dependency> <!--pdf转图片 end-->
3、调用测试:
import com.yunfatong.ojd.service.filesystemstorageservice; import lombok.extern.slf4j.slf4j; import org.junit.test; import org.junit.runner.runwith; import org.springframework.beans.factory.annotation.autowired; import org.springframework.boot.test.context.springboottest; import org.springframework.test.context.junit4.springrunner; import java.util.list; @runwith(springrunner.class) @springboottest @slf4j public class wordtransferpdfutiltest { @autowired transferutil wordtransferpdfutil; @autowired filesystemstorageservice filesystemstorageservice; @test public void transferlocalfile() {
try {
/*******************word 转pdf******************/
long time = system.currenttimemillis();
system.out.println("start :======" + time);
wordtransferpdfutil.transferwordtopdf("courtchongqing/test_new/555.docx");
log.error(system.currenttimemillis() + " time============================== :" + ((system.currenttimemillis() - time) / 1000));
/*******************pdf转图片******************/
long time2 = system.currenttimemillis();
list<string> pdfimages2 = wordtransferpdfutil.transferpdftoimage("courtchongqing/test_new/333.pdf");
for (string pdfimage : pdfimages2) {
log.error(pdfimage);
}
log.error(" time===============================22222222 :" + ((system.currenttimemillis() - time2) / 1000));
// system.out.println("pdf path =============" + path);
} catch (exception e) {
e.printstacktrace();
}
} }
推荐阅读
-
C#实现HTML转WORD及WORD转PDF的方法
-
Vue项目pdf(base64)转图片遇到的问题及解决方法
-
pdf转txt工具 图片转换文字识别软件图文教程
-
比较好用用的pdf转txt文本文件 图片文字提取工具使用介绍
-
Java实现Word/Pdf/TXT转html
-
『Pdf转Word』Easy PDF to Word Converter V2.0.3(汉化版)
-
php用windows COM组件调用openoffice接口实现word转pdf文件时报错的解决办法
-
用python 制作图片转pdf工具
-
Java实现Word/Excel/TXT转PDF
-
如何pdf转成ppt(免费pdf转word软件推荐)