欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

itext,jxl实现pdf转为txt,txt转excel

程序员文章站 2022-04-11 10:21:28
...

itext,jxl实现pdf转为txt,txt转excel

pom.xml配置

<!--管理依赖的版本号-->
<properties>
    <com.itextpdf.version>5.3.2</com.itextpdf.version>
    <org.bouncycastle.version>1.52</org.bouncycastle.version>
    <jxl.version>1.0</jxl.version>
</properties>
   
<!--依赖-->
<dependencies>
   <dependency>
       <groupId>com.itextpdf</groupId>
       <artifactId>itextpdf</artifactId>
       <version>${com.itextpdf.version}</version>
       <scope>compile</scope>
   </dependency>
   <!--读取pdf-->
   <dependency>
       <groupId>org.bouncycastle</groupId>
       <artifactId>bcpg-jdk15on</artifactId>
       <version>${org.bouncycastle.version}</version>
   </dependency>
   <!--text-->
   <dependency>
       <groupId>jxl</groupId>
       <artifactId>jxl</artifactId>
       <version>${jxl.version}</version>
   </dependency>
</dependencies>

itext 读取pdf->txt

package itext;


import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;

import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.FilteredTextRenderListener;
import com.itextpdf.text.pdf.parser.LocationTextExtractionStrategy;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
import com.itextpdf.text.pdf.parser.RegionTextRenderFilter;
import com.itextpdf.text.pdf.parser.RenderFilter;
import com.itextpdf.text.pdf.parser.TextExtractionStrategy;

public class ReadPdfByiText {

    public static void main(String[] args) throws IOException {
        String outputPath = "D:\\developcodespace\\PdfContent_1.txt";
        PrintWriter writer = new PrintWriter(new FileOutputStream(outputPath));
        String fileName = "D:\\developcodespace\\20190323175137823782.pdf";

        readPdf(writer, fileName);//直接读全PDF面

        //readPdf_filter(fileName);//读取PDF面的某个区域

    }

    public static void readPdf(PrintWriter writer,String fileName){
        String pageContent = "";
        try {
            PdfReader reader = new PdfReader(fileName);
            int pageNum = reader.getNumberOfPages();
            for(int i=1;i<=pageNum;i++){
                String textFromPage = PdfTextExtractor.getTextFromPage(reader, i);
                pageContent += textFromPage;//读取第i页的文档内容
//                pageContent += PdfTextExtractor.getTextFromPage(reader, i);//读取第i页的文档内容
            }
            writer.write(pageContent);
        } catch (Exception e) {
            e.printStackTrace();
        }finally{
            writer.close();
        }
    }

    public static void readPdf_filter(PrintWriter writer,String fileName){
        String pageContent = "";
        try {
            Rectangle rect = new Rectangle(90, 0, 450, 40);
            RenderFilter filter = new RegionTextRenderFilter(rect);
            PdfReader reader = new PdfReader(fileName);
            int pageNum = reader.getNumberOfPages();
            TextExtractionStrategy strategy;
            for (int i = 1; i <= pageNum; i++) {
                strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter);
                pageContent +=PdfTextExtractor.getTextFromPage(reader, i, strategy);
            }
			/*String[] split = pageContent.split(" ");
			for(String ss : split){
				System.out.println(ss.substring(ss.lastIndexOf(":")+1, ss.length()));
			}*/
            writer.write(pageContent);
        } catch (Exception e) {
            e.printStackTrace();
        }finally{
            writer.close();
        }
    }
}

jxl读取txt->excel

package itext;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;

import jxl.Workbook;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
import jxl.write.WriteException;
import jxl.write.biff.RowsExceededException;
public class TextToExcel {

    public static void main(String[] args) {

        File file = new File("D:\\developcodespace\\PdfContent_1.txt");// 将读取的txt文件
        File file2 = new File("D:\\developcodespace\\work.xls");// 将生成的excel表格

        if (file.exists() && file.isFile()) {

            InputStreamReader read = null;
            String line = "";
            BufferedReader input = null;
            WritableWorkbook wbook = null;
            WritableSheet sheet;

            try {
                read = new InputStreamReader(new FileInputStream(file), "utf-8");
                input = new BufferedReader(read);

                wbook = Workbook.createWorkbook(file2);// 根据路径生成excel文件
                sheet = wbook.createSheet("first", 0);// 新标签页

                try {
                    Label company = new Label(0, 0, "公司名称");// 如下皆为列名
                    sheet.addCell(company);
                    Label position = new Label(1, 0, "岗位");
                    sheet.addCell(position);
                    Label salary = new Label(2, 0, "薪资");
                    sheet.addCell(salary);
                    Label status = new Label(3, 0, "状态");
                    sheet.addCell(status);
                } catch (RowsExceededException e) {
                    e.printStackTrace();
                } catch (WriteException e) {
                    e.printStackTrace();
                }

                int m = 1;// excel行数
                int n = 0;// excel列数
                Label t;
                while ((line = input.readLine()) != null) {
                    if(!line.startsWith("014")){
                        continue;
                    }
                    String[] words = line.split("[ \t]");// 把读出来的这行根据空格或tab分割开

                    for (int i = 0; i < words.length; i++) {
                        if (!words[i].matches("\\s*")) { // 当不是空行时
                            t = new Label(n, m, words[i].trim());
                            sheet.addCell(t);
                            n++;
                        }
                    }
                    n = 0;// 回到列头部
                    m++;// 向下移动一行
                }
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            } catch (RowsExceededException e) {
                e.printStackTrace();
            } catch (WriteException e) {
                e.printStackTrace();
            } finally {
                try {
                    wbook.write();
                    wbook.close();
                    input.close();
                    read.close();
                } catch (IOException e) {
                    e.printStackTrace();
                } catch (WriteException e) {
                    e.printStackTrace();
                }
            }
            System.out.println("over!");
            System.exit(0);
        } else {
            System.out.println("file is not exists or not a file");
            System.exit(0);
        }
    }
}