欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

PDF转化为TXT简单工具(适合程序员使用)

程序员文章站 2022-04-11 10:28:57
...

支持python3版本的pdfminer,请参见
https://pypi.python.org/pypi/pdfminer3k/

1、下载pdfminer

http://www.unixuser.org/~euske/python/pdfminer/
然后解压缩

[root@node1 ~]# unzip pdfminer-master.zip   
[root@node1 ~]# cd pdfminer-master

2、安装pdfminer

(1)python2.x
pdfminer提示需要python2.x环境
Install Python 2.4 or newer. (Python 3 is not supported.)

[root@node1 ~]# python --version
Python 2.6.6
[root@node1 ~]#

(2)安装pdfminer

[[email protected]  pdfminer-master]# ls
cmaprsrc  docs  LICENSE  Makefile  MANIFEST.in  pdfminer  README.md  samples  setup.py  tools
[[email protected]  pdfminer-master]# python setup.py install
/usr/lib64/python2.6/distutils/dist.py:266: UserWarning: Unknown distribution option: 'install_requires'
  warnings.warn(msg)
running install
running build
running build_py
creating build

running install_scripts
copying build/scripts-2.6/pdf2txt.py -> /usr/bin
copying build/scripts-2.6/dumppdf.py -> /usr/bin
copying build/scripts-2.6/latin2ascii.py -> /usr/bin
changing mode of /usr/bin/pdf2txt.py to 755
changing mode of /usr/bin/dumppdf.py to 755
changing mode of /usr/bin/latin2ascii.py to 755
running install_egg_info
Writing /usr/lib/python2.6/site-packages/pdfminer-20140328-py2.6.egg-info
[[email protected]  pdfminer-master]# 

3、pdf转化txt测试

[root@node1  ~]# pdf2txt.py a.pdf > a.txt
[root@node1  ~]# cat a.txt
工 作 简 报  
第 2 期(总第 2 期) 
送:各普通会员单位 
[root@node1  ~]#