欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

iText 读取pdf的各级标题和标题对应页码 博客分类: 文档处理Java Web iTextword标题页码 

程序员文章站 2024-03-25 19:04:16
...
   因为要做全文检索的项目,需要获取pdf文件的指定标题和标题对应的页码,上网查了相关资料,决定用iText开发包,废话不多说直接上代码。
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
 
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.SimpleBookmark;
 
public class TestPdf {
	
   public static void main ( String [] args ) throws Exception {
     PdfReader reader = new PdfReader ( "e:/test.pdf" ) ;
     List<HashMap<String, Object>> list = SimpleBookmark.getBookmark ( reader ) ;
 
     for ( Iterator<HashMap<String, Object>> i = list.iterator () ; i.hasNext () ; ) {
   
       showBookmark ( i.next ()) ;
      
     }
     for ( Iterator<HashMap<String, Object>> i = list.iterator () ; i.hasNext () ; ) {

    	   getPageNumbers( i.next ());
       }

   }
 //获取标题
   private static void showBookmark ( HashMap<String, Object> bookmark) {
	   System.out.println (bookmark.get ( "Title" )) ;  
     @SuppressWarnings("unchecked")
	ArrayList<HashMap<String, Object>> kids =  (ArrayList<HashMap<String, Object>>) bookmark.get ( "Kids" ) ;
     if ( kids == null )
       return ;
     for ( Iterator<HashMap<String, Object>> i = kids.iterator () ; i.hasNext () ; ) {
    
        showBookmark ( i.next ()) ;
     }
   }
   
  //获取页码
   public static void getPageNumbers(HashMap<String, Object> bookmark) {
       if (bookmark == null)
           return;
       
           if ("GoTo".equals(bookmark.get("Action"))) {

               String page = (String)bookmark.get("Page");
               if (page != null) {

                   page = page.trim();

                   int idx = page.indexOf(' ');

                   int pageNum;

                   if (idx < 0){

                       pageNum = Integer.parseInt(page);
                       System.out.println ("pageNum :"+ pageNum) ;    
                   }
                   else{
            
                       pageNum = Integer.parseInt(page.substring(0, idx));
                       System.out.println ("pageNum:" +pageNum) ;    
                   }
               }
               @SuppressWarnings("unchecked")
			ArrayList<HashMap<String, Object>> kids =  (ArrayList<HashMap<String, Object>>) bookmark.get ( "Kids" ) ;
               if ( kids == null )
                 return ;
               for ( Iterator<HashMap<String, Object>> i = kids.iterator () ; i.hasNext () ; ) {

            	   getPageNumbers ( i.next ()) ;
                }

               }  
       }
  
}