使用java为pdf添加书签的方法(pdf书签制作)
由于我经常下载一些pdf格式的电子书,有的时候一些好书下载下来没有书签,读起来感觉没有整体的感觉,所以决定自己写一个小工具,将特定格式的文本解析成为书签,然后保存到pdf格式中。
整体思路是从豆瓣啊、京东啊、当当啊、亚马逊下面的介绍中可以copy出目录信息,拿《http权威指南》为例:
目录的结构如:
第1章 http 概述 3
1.1 http——因特网的多媒体信使 4
1.2 web 客户端和服务器 4
1.3 资源 5
1.3.1 媒体类型 6
1.3.2 uri 7
1.3.3 url 7
1.3.4 urn 8
1.4 事务 9
1.4.1 方法 9
1.4.2 状态码 10
1.4.3 web 页面中可以包含多个对象 10
1.5 报文 11
1.6 连接 13
每一行后面都有页码,而且是用空格分开的。
处理之后,结果为:
主要的逻辑为:
package org.fra.pdf.bussiness;
import java.io.bufferedreader;
import java.io.fileoutputstream;
import java.io.ioexception;
import java.util.arraylist;
import java.util.hashmap;
import java.util.list;
import java.util.stack;
import com.itextpdf.text.documentexception;
import com.itextpdf.text.pdf.inthashtable;
import com.itextpdf.text.pdf.pdfarray;
import com.itextpdf.text.pdf.pdfdictionary;
import com.itextpdf.text.pdf.pdfindirectreference;
import com.itextpdf.text.pdf.pdfname;
import com.itextpdf.text.pdf.pdfnumber;
import com.itextpdf.text.pdf.pdfobject;
import com.itextpdf.text.pdf.pdfreader;
import com.itextpdf.text.pdf.pdfstamper;
import com.itextpdf.text.pdf.pdfstring;
import com.itextpdf.text.pdf.simplebookmark;
public class addpdfoutlinefromtxt {
private stack<outlineinfo> parentoutlinestack = new stack<outlineinfo>();
public void createpdf(string destpdf, string sourcepdf,
bufferedreader bufread, int pattern) throws ioexception,
documentexception {
if (pattern != addbookmarkconstants.reserved_old_outline
&& pattern != addbookmarkconstants.reserved_none
&& pattern != addbookmarkconstants.reserved_first_outline)
return;
// 读入pdf文件
pdfreader reader = new pdfreader(sourcepdf);
list<hashmap<string, object>> outlines = new arraylist<hashmap<string, object>>();
if (pattern == addbookmarkconstants.reserved_old_outline) {
outlines.addall(simplebookmark.getbookmark(reader));
} else if (pattern == addbookmarkconstants.reserved_first_outline) {
addfirstoutlinereservedpdf(outlines, reader);
}
addbookmarks(bufread, outlines, null, 0);
// 新建stamper
pdfstamper stamper = new pdfstamper(reader, new fileoutputstream(
destpdf));
stamper.setoutlines(outlines);
stamper.close();
}
private void addbookmarks(bufferedreader bufread,
list<hashmap<string, object>> outlines,
hashmap<string, object> preoutline, int prelevel)
throws ioexception {
string contentformatline = null;
bufread.mark(1);
if ((contentformatline = bufread.readline()) != null) {
formattedbookmark bookmark = parseformmattedtext(contentformatline);
hashmap<string, object> map = parsebookmarktohashmap(bookmark);
int level = bookmark.getlevel();
// 如果n==m, 那么是同一层的,这个时候,就加到arraylist中,继续往下面读取
if (level == prelevel) {
outlines.add(map);
addbookmarks(bufread, outlines, map, level);
}
// 如果n>m,那么可以肯定,该行是上一行的孩子,, new 一个kids的arraylist,并且加入到这个arraylist中
else if (level > prelevel) {
list<hashmap<string, object>> kids = new arraylist<hashmap<string, object>>();
kids.add(map);
preoutline.put("kids", kids);
// 记录有孩子的outline信息
parentoutlinestack.push(new outlineinfo(preoutline, outlines,
prelevel));
addbookmarks(bufread, kids, map, level);
}
// 如果n<m , 那么就是说孩子增加完了,退回到上层,bufread倒退一行
else if (level < prelevel) {
bufread.reset();
outlineinfo obj = parentoutlinestack.pop();
addbookmarks(bufread, obj.getoutlines(), obj.getpreoutline(),
obj.getprelevel());
}
}
}
private hashmap<string, object> parsebookmarktohashmap(
formattedbookmark bookmark) {
hashmap<string, object> map = new hashmap<string, object>();
map.put("title", bookmark.gettitle());
map.put("action", "goto");
map.put("page", bookmark.getpage() + " fit");
return map;
}
private formattedbookmark parseformmattedtext(string contentformatline) {
formattedbookmark bookmark = new formattedbookmark();
string title = "";
string destpage = "";
// 当没有页码在字符串结尾的时候,一般就是书的名字,如果格式正确的话。
int lastspaceindex = contentformatline.lastindexof(" ");
if (lastspaceindex == -1) {
title = contentformatline;
destpage = "1";
} else {
title = contentformatline.substring(0, lastspaceindex);
destpage = contentformatline.substring(lastspaceindex + 1);
}
string[] titlesplit = title.split(" ");
int dotcount = titlesplit[0].split("\\.").length - 1;
bookmark.setlevel(dotcount);
bookmark.setpage(destpage);
bookmark.settitle(title);
return bookmark;
}
private void addfirstoutlinereservedpdf(
list<hashmap<string, object>> outlines, pdfreader reader) {
pdfdictionary catalog = reader.getcatalog();
pdfobject obj = pdfreader.getpdfobjectrelease(catalog
.get(pdfname.outlines));
// 没有书签
if (obj == null || !obj.isdictionary())
return;
pdfdictionary outlinesdictionary = (pdfdictionary) obj;
// 得到第一个书签
pdfdictionary firstoutline = (pdfdictionary) pdfreader
.getpdfobjectrelease(outlinesdictionary.get(pdfname.first));
pdfstring titleobj = firstoutline.getasstring((pdfname.title));
string title = titleobj.tounicodestring();
pdfarray dest = firstoutline.getasarray(pdfname.dest);
if (dest == null) {
pdfdictionary action = (pdfdictionary) pdfreader
.getpdfobjectrelease(firstoutline.get(pdfname.a));
if (action != null) {
if (pdfname.goto.equals(pdfreader.getpdfobjectrelease(action
.get(pdfname.s)))) {
dest = (pdfarray) pdfreader.getpdfobjectrelease(action
.get(pdfname.d));
}
}
}
string deststr = parsedeststring(dest, reader);
string[] decodestr = deststr.split(" ");
int num = integer.valueof(decodestr[0]);
hashmap<string, object> map = new hashmap<string, object>();
map.put("title", title);
map.put("action", "goto");
map.put("page", num + " fit");
outlines.add(map);
}
private string parsedeststring(pdfarray dest, pdfreader reader) {
string deststr = "";
if (dest.isstring()) {
deststr = dest.tostring();
} else if (dest.isname()) {
deststr = pdfname.decodename(dest.tostring());
} else if (dest.isarray()) {
inthashtable pages = new inthashtable();
int numpages = reader.getnumberofpages();
for (int k = 1; k <= numpages; ++k) {
pages.put(reader.getpageorigref(k).getnumber(), k);
reader.releasepage(k);
}
deststr = makebookmarkparam((pdfarray) dest, pages);
}
return deststr;
}
private string makebookmarkparam(pdfarray dest, inthashtable pages) {
stringbuffer s = new stringbuffer();
pdfobject obj = dest.getpdfobject(0);
if (obj.isnumber()) {
s.append(((pdfnumber) obj).intvalue() + 1);
} else {
s.append(pages.get(getnumber((pdfindirectreference) obj)));
}
s.append(' ').append(dest.getpdfobject(1).tostring().substring(1));
for (int k = 2; k < dest.size(); ++k) {
s.append(' ').append(dest.getpdfobject(k).tostring());
}
return s.tostring();
}
private int getnumber(pdfindirectreference indirect) {
pdfdictionary pdfobj = (pdfdictionary) pdfreader
.getpdfobjectrelease(indirect);
if (pdfobj.contains(pdfname.type)
&& pdfobj.get(pdfname.type).equals(pdfname.pages)
&& pdfobj.contains(pdfname.kids)) {
pdfarray kids = (pdfarray) pdfobj.get(pdfname.kids);
indirect = (pdfindirectreference) kids.getpdfobject(0);
}
return indirect.getnumber();
}
}
下一篇: 字符串与数组