判断一个字符中是否有乱码
程序员文章站
2022-03-07 15:13:30
...
package com.test; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * @description 判断中文是否乱码 */ public class MessyCodeCheck { public static boolean isChinese(char c) { Character.UnicodeBlock ub = Character.UnicodeBlock.of(c); if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) { return true; } return false; } public static boolean isMessyCode(String strName) { Pattern p = Pattern.compile("\\s*|\t*|\r*|\n*"); Matcher m = p.matcher(strName); String after = m.replaceAll(""); String temp = after.replaceAll("\\p{P}", ""); char[] ch = temp.trim().toCharArray(); float chLength = ch.length; float count = 0; for (int i = 0; i < ch.length; i++) { char c = ch[i]; if (!Character.isLetterOrDigit(c)) { if (!isChinese(c)) { count = count + 1; } } } float result = count / chLength; if (result > 0.2) {//乱码字符 大于原内容的20%算是乱码 return true; } else { return false; } } public static void main(String[] args) { System.out.println(isMessyCode("XYZr�������ABCDԴ")); System.out.println(isMessyCode("平台新¥%&#@*(版本发布")); System.out.println(isMessyCode("System.out.println")); } }