java自动根据文件内容的编码来读取避免乱码
程序员文章站
2022-06-14 14:07:58
...
通过cpdetector这个开源的jar包可以自动判断当前文件的内容编码,从而在读取的时候选择正确的编码读取,避免乱码问题。
原创不易,转载请注明出处:java自动根据文件内容的编码来读取避免乱码
测试结果,提供截图:
GBK文件内容
UTF8文件内容
运行结果:
package com.zuidaima.test; import info.monitorenter.cpdetector.io.ASCIIDetector; import info.monitorenter.cpdetector.io.CodepageDetectorProxy; import info.monitorenter.cpdetector.io.JChardetFacade; import info.monitorenter.cpdetector.io.ParsingDetector; import info.monitorenter.cpdetector.io.UnicodeDetector; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; public class Main { public static String getContent(String path) throws Exception { File file = new File(path); CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance(); detector.add(new ParsingDetector(false)); detector.add(JChardetFacade.getInstance()); detector.add(ASCIIDetector.getInstance()); detector.add(UnicodeDetector.getInstance()); java.nio.charset.Charset charset = null; try { charset = detector.detectCodepage(file.toURI().toURL()); } catch (Exception ex) { ex.printStackTrace(); } String charsetName = null; if (charset != null) { charsetName = charset.name(); } else { charsetName = "UTF-8"; } BufferedReader reader = new BufferedReader(new InputStreamReader( new FileInputStream(file), charsetName)); String line = null; String lines = ""; while ((line = reader.readLine()) != null) { lines += line + "\n"; } reader.close(); return lines; } public static void main(String[] args) throws Exception { System.out.println(getContent("bin/gbk.txt")); System.out.println(getContent("bin/utf8.txt")); } }