欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

TESS4J

程序员文章站 2022-03-05 10:21:35
...
包含头文件:
import java.awt.Color;
import java.awt.image.BufferedImage;
import javax.imageio.ImageIO;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import java.io.IOException;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileDescriptor;
import java.io.PrintStream;
import java.io.FileNotFoundException;
import java.awt.Color;
import java.awt.image.BufferedImage;
import java.io.BufferedOutputStream;
import java.lang.RuntimeException;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.util.CellRangeAddress;

程序主体代码:
public class aa
{
	public static void main(String[] args) throws IOException
	{
		File root1 = new File("D:\\new\\tmall2");
File[] files1 = root1.listFiles();
        int count1=0;
		for(File f : files1)//去水印
		{
			count1++;
		BufferedImage img = ImageIO.read(f);
		for(int x=0;x<img.getWidth();x++)
		{
			for(int y=0;y<img.getHeight();y++)
			{
				int rgb = img.getRGB(x, y);
				Color color = new Color(rgb);
				int r=color.getRed();
				int g=color.getGreen();
				int b=color.getBlue();
				int c=255;
				if(r==0&&g==0&&b==0)
				{
					Color newColor = new Color(c,c,c);
					img.setRGB(x,y,newColor.getRGB());
				}
			}
		}
		int newwidth=(img.getWidth())/2;
		int newheight=(img.getHeight())/4;
		BufferedImage newimg=img.getSubimage(0, 0, newwidth, newheight);
		float resizeTimes=2.0f;
		int width2=(int)(newwidth*resizeTimes);
		int height2=(int)(newheight*resizeTimes);
		BufferedImage result=new BufferedImage(width2,height2,BufferedImage.TYPE_INT_RGB);
		result.getGraphics().drawImage(newimg.getScaledInstance(width2, height2, java.awt.Image.SCALE_SMOOTH), 0, 0, null);
		String path="D:\\new\\tmall\\4.png";
		StringBuilder strb=new StringBuilder(path);
		strb.replace(13, 14, Integer.toString(count1));
		path=strb.toString();
		ImageIO.write(result, "png", new File(path));
		}
		//构建excel表
		HSSFWorkbook wkb=new HSSFWorkbook();
		HSSFSheet sheet = wkb.createSheet("企业表");
		HSSFRow row1=sheet.createRow(0); 
		row1.createCell(0).setCellValue("企业名称");
	    row1.createCell(3).setCellValue("企业注册号");   
	sheet.addMergedRegion(new CellRangeAddress(0,0,0,2));
	sheet.addMergedRegion(new CellRangeAddress(0,0,3,5));
		try {

            ITesseract instance = new Tesseract();
instance.setDatapath("F:\\MyDownloads\\Tess4J");
            File root = new File("D:\\new\\tmall");
File[] files = root.listFiles();
            int count=0;
for(File file : files)
            {
		count++;
		HSSFRow newrow=sheet.createRow(count);
		newrow.createCell(0);
	newrow.createCell(3);
	sheet.addMergedRegion(new CellRangeAddress(count,count,0,2));
				sheet.addMergedRegion(new CellRangeAddress(count,count,3,5));
		instance.setLanguage("eng");//英文识别注册号
		String numresult=instance.doOCR(file);
		
		instance.setLanguage("chi_sim");
	String result = instance.doOCR(file);//中文识别名称
	char[] cnum=numresult.toCharArray();
	int n=0;
	int m=0;
	String num=new String();
	for(int j=0;j<cnum.length;j++)//获取注册号循环
	{
		if(cnum[j]==':')
			m++;
		else if(m==1)
		{
			if((cnum[j]<='9'&&cnum[j]>='0')||(cnum[j]>='A'&&cnum[j]<='Z'))
			{
				num+=cnum[j];
				n++;
			}
			if(n==18)
			{
				newrow.getCell(3).setCellValue(num); 
				break;
			}
		}
		else if(m>1) break;
	}
	char[] c=result.toCharArray();
	int a=0;
	String chi=new String();
	for(int i=0;i< c.length;++i)//获取名称循环
	{
		if(c[i]==':')
			++a;
		else
		{
				if(a==2)
				{
					if(c[i] == '司')
					{
						chi += '司';
						newrow.getCell(0).setCellValue(chi);
						break;
					}
					else chi += c[i];
				}
				else if(a>=2) break;
		}
	}
                FileOutputStream output=new FileOutputStream("D:\\new\\result.xls");
                try {
wkb.write(output);
output.flush();
wkb.close();
                } catch(IOException e)
                {}
                result=null;
                num=null;
                chi=null;
		}
        } catch (TesseractException e) {
e.printStackTrace();
        }
	}
}