PDF格式文件的读取
程序员文章站
2022-05-28 16:54:35
...
1、导入坐标
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.21</version>
</dependency>
2、编写业务类
public class Test {
public static String getTextFromPDF(String pdfFilePath) {
String result = null;
FileInputStream is = null;
PDDocument document = null;
try {
is = new FileInputStream(pdfFilePath);
PDFParser parser = new PDFParser(new RandomAccessBuffer(is));
parser.parse();
document = parser.getPDDocument();
PDFTextStripper stripper = new PDFTextStripper();
result = stripper.getText(document);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (is != null) {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (document != null) {
try {
document.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
}
public static void main(String[] args) {
String str = Test.getTextFromPDF("D:\\Download\\Google\\123_20201106142215.pdf");
System.out.println(str);
}
}
上一篇: 小哥哥们不能错过的话题,什么补肾最好
下一篇: 关于java生成PDF格式文件