package com.alibaba.china.gene.test;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
/**
* 模拟测试中文字符从浏览器到Web服务器Java端经过的转码过程
*/
public class UrlEncodeTest {
public static void main(String[] args) {
System.out.println("模拟测试中文字符从浏览器到Web服务器Java端经过的转码过程");
System.out.println("--------------------------------------------------");
String str = "中文";
String strGbk = "";
String strUtf8 = "";
try {
strGbk = URLEncoder.encode(str, "gbk");
strUtf8 = URLEncoder.encode(str, "utf8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
System.out.print("中文原字符串:");
System.out.println(str);
System.out.println("浏览器会做一次编码,FireFox默认gbk、IE默认Utf-8:");
System.out.print("中文对应gbk编码:");
System.out.println(strGbk);
System.out.print("中文对应utf-8编码:");
System.out.println(strUtf8);
System.out.println();
System.out.println("在构造Http请求头时,系统会按特定编码转成Byte流");
System.out.print("中文原字符串转成的Bytes流:");
byte[] bytes = getInBytes(str);
printBytes(bytes);
System.out.print("中文对应gbk编码转成的Bytes流:");
byte[] bytesGbk = getInBytes(strGbk);
printBytes(bytesGbk);
System.out.print("中文对应utf-8编码转成的Bytes流:");
byte[] bytesUtf8 = getInBytes(strUtf8);
printBytes(bytesUtf8);
System.out.println();
System.out.println("在发送Http请求给服务器时,做网络传输时,系统都会转成二进制编码");
System.out.print("中文原字符串Bytes流对应二进制:");
String[] binary = printAndGetInBinary(bytes);
System.out.print("中文对应gbk编码Bytes流对应二进制:");
String[] binaryGbk = printAndGetInBinary(bytesGbk);
System.out.print("中文对应utf-8编码Bytes流对应二进制:");
String[] binaryUtf8 = printAndGetInBinary(bytesUtf8);
System.out.println();
System.out.println("服务器接收到二进制,系统都会转成Bytes流");
System.out.print("中文原字符串对应二进制还原得到Bytes流:");
bytes = restoreBytes(binary);
printBytes(bytes);
System.out.print("中文对应gbk编码对应二进制还原得到Bytes流:");
bytesGbk = restoreBytes(binaryGbk);
printBytes(bytesGbk);
System.out.print("中文对应utf-8编码对应二进制还原得到Bytes流:");
bytesUtf8 = restoreBytes(binaryUtf8);
printBytes(bytesUtf8);
System.out.println();
System.out.println("应用服务器如Tomcat,默认会默认编码还原成字符串编码");
str = new String(bytes);
strGbk = new String(bytesGbk);
strUtf8 = new String(bytesUtf8);
System.out.print("中文原字符串Byte流还原得到的字符串编码:");
System.out.println(str);
System.out.print("中文对应gbk编码Byte流还原得到的字符串编码:");
System.out.println(strGbk);
System.out.print("中文对应utf-8编码Byte流还原得到的字符串编码:");
System.out.println(strUtf8);
System.out.println();
try {
System.out.println("Java应用,如Webx会按指定的编码还原字符串");
System.out.print("中文原字符串按gbk还原后:");
System.out.println(URLDecoder.decode(str, "gbk"));
System.out.println("这说明如果客户端不进行编码直接发送中文给服务端,会造成信息丢失");
System.out.print("中文对应gbk编码按gbk还原后:");
System.out.println(URLDecoder.decode(strGbk, "gbk"));
System.out.print("中文对应utf-8编码按utf-8还原后:");
System.out.println(URLDecoder.decode(strUtf8, "utf-8"));
System.out.println();
System.out.println("Webx如果与浏览器使用的编码不一致,还原出的字符串会是乱码");
System.out.print("中文对应gbk编码按utf-8还原后:");
System.out.println(URLDecoder.decode(strGbk, "utf-8"));
System.out.print("中文对应utf-8编码按gbk还原后:");
System.out.println(URLDecoder.decode(strUtf8, "gbk"));
System.out.println();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
try {
System.out.println("数据库中会转成iso-8859-1编码");
str = "中文";
System.out.print("中文字符串原文:");
System.out.println(str);
byte[] gbkBytes = str.getBytes("gbk");
System.out.print("中文字符串对应GBK的Byte流:");
printBytes(gbkBytes);
System.out.print("中文字符串对应Byte流转成的iso-8859-1格式字符串:");
System.out.println(new String(gbkBytes, "iso-8859-1"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
System.out.println("--------------------------------------------------");
}
private static byte[] restoreBytes(String[] binary) {
if (binary == null) {
return new byte[0];
}
byte[] bytes = new byte[binary.length];
for (int i = 0; i < binary.length; i++) {
bytes[i] = (byte) Integer.parseInt(binary[i], 2);
}
return bytes;
}
private static String[] printAndGetInBinary(byte[] bytes) {
if (bytes == null) {
return new String[0];
}
String[] binaryStrs = new String[bytes.length];
for (int i = 0; i < bytes.length; i++) {
binaryStrs[i] = byte2bits(bytes[i]);
}
for (String string : binaryStrs) {
System.out.print(string);
}
System.out.println();
return binaryStrs;
}
public static String byte2bits(byte b) {
int z = b;
z |= 256;
String str = Integer.toBinaryString(z);
int len = str.length();
return str.substring(len - 8, len);
}
private static void printBytes(byte[] bytes) {
if (bytes == null) {
return;
}
StringBuilder strBuilder = new StringBuilder();
for (byte b : bytes) {
strBuilder.append(b);
}
System.out.println(strBuilder.toString());
}
protected static byte[] getInBytes(String str) {
if (str == null) {
return null;
}
byte[] bytes = null;
try {// 这里按iso-8859-1转成Byte流
bytes = str.getBytes("iso-8859-1");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return bytes;
}
}
字符的编码
程序员文章站
2022-03-31 14:51:44
...
推荐阅读
-
cdr怎么调整美术字的属性和排版?
-
WiFi 6的核心技术
-
Ai怎么绘制圆形参考线 Ai创建圆形参考线的教程
-
Adobe 2021系列软件正式发布!你的电脑带得起吗
-
ps怎么制作渐变融合效果的海报? ps渐变海报的做法
-
昆明医科大学海源学院怎么样好不好?附昆明医科大学海源学院最好的专业排名及王牌专业介绍
-
总结C#处理异常的方式
-
华为nova8和华为nova8Plus的区别哪个更值得入手
-
检索 COM 类工厂中 CLSID 为 {000209FF-0000-0000-C000-000000000046} 的组件时失败,原因是出现以下错误: 80070005 拒绝访问
-
超实用的在线作图工具,效率提升100%!