欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

字符的编码

程序员文章站 2022-03-31 14:51:44
...
package com.alibaba.china.gene.test;

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;

/**
 * 模拟测试中文字符从浏览器到Web服务器Java端经过的转码过程
 */
public class UrlEncodeTest {

    public static void main(String[] args) {

        System.out.println("模拟测试中文字符从浏览器到Web服务器Java端经过的转码过程");
        System.out.println("--------------------------------------------------");
        String str = "中文";
        String strGbk = "";
        String strUtf8 = "";
        try {
            strGbk = URLEncoder.encode(str, "gbk");
            strUtf8 = URLEncoder.encode(str, "utf8");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        System.out.print("中文原字符串:");
        System.out.println(str);
        System.out.println("浏览器会做一次编码,FireFox默认gbk、IE默认Utf-8:");
        System.out.print("中文对应gbk编码:");
        System.out.println(strGbk);
        System.out.print("中文对应utf-8编码:");
        System.out.println(strUtf8);
        System.out.println();

        System.out.println("在构造Http请求头时,系统会按特定编码转成Byte流");
        System.out.print("中文原字符串转成的Bytes流:");
        byte[] bytes = getInBytes(str);
        printBytes(bytes);
        System.out.print("中文对应gbk编码转成的Bytes流:");
        byte[] bytesGbk = getInBytes(strGbk);
        printBytes(bytesGbk);
        System.out.print("中文对应utf-8编码转成的Bytes流:");
        byte[] bytesUtf8 = getInBytes(strUtf8);
        printBytes(bytesUtf8);
        System.out.println();

        System.out.println("在发送Http请求给服务器时,做网络传输时,系统都会转成二进制编码");
        System.out.print("中文原字符串Bytes流对应二进制:");
        String[] binary = printAndGetInBinary(bytes);
        System.out.print("中文对应gbk编码Bytes流对应二进制:");
        String[] binaryGbk = printAndGetInBinary(bytesGbk);
        System.out.print("中文对应utf-8编码Bytes流对应二进制:");
        String[] binaryUtf8 = printAndGetInBinary(bytesUtf8);
        System.out.println();

        System.out.println("服务器接收到二进制,系统都会转成Bytes流");
        System.out.print("中文原字符串对应二进制还原得到Bytes流:");
        bytes = restoreBytes(binary);
        printBytes(bytes);
        System.out.print("中文对应gbk编码对应二进制还原得到Bytes流:");
        bytesGbk = restoreBytes(binaryGbk);
        printBytes(bytesGbk);
        System.out.print("中文对应utf-8编码对应二进制还原得到Bytes流:");
        bytesUtf8 = restoreBytes(binaryUtf8);
        printBytes(bytesUtf8);
        System.out.println();

        System.out.println("应用服务器如Tomcat,默认会默认编码还原成字符串编码");
        str = new String(bytes);
        strGbk = new String(bytesGbk);
        strUtf8 = new String(bytesUtf8);
        System.out.print("中文原字符串Byte流还原得到的字符串编码:");
        System.out.println(str);
        System.out.print("中文对应gbk编码Byte流还原得到的字符串编码:");
        System.out.println(strGbk);
        System.out.print("中文对应utf-8编码Byte流还原得到的字符串编码:");
        System.out.println(strUtf8);
        System.out.println();

        try {
            System.out.println("Java应用,如Webx会按指定的编码还原字符串");
            System.out.print("中文原字符串按gbk还原后:");
            System.out.println(URLDecoder.decode(str, "gbk"));
            System.out.println("这说明如果客户端不进行编码直接发送中文给服务端,会造成信息丢失");
            System.out.print("中文对应gbk编码按gbk还原后:");
            System.out.println(URLDecoder.decode(strGbk, "gbk"));
            System.out.print("中文对应utf-8编码按utf-8还原后:");
            System.out.println(URLDecoder.decode(strUtf8, "utf-8"));
            System.out.println();

            System.out.println("Webx如果与浏览器使用的编码不一致,还原出的字符串会是乱码");
            System.out.print("中文对应gbk编码按utf-8还原后:");
            System.out.println(URLDecoder.decode(strGbk, "utf-8"));
            System.out.print("中文对应utf-8编码按gbk还原后:");
            System.out.println(URLDecoder.decode(strUtf8, "gbk"));
            System.out.println();
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }

        try {
            System.out.println("数据库中会转成iso-8859-1编码");
            str = "中文";
            System.out.print("中文字符串原文:");
            System.out.println(str);
            byte[] gbkBytes = str.getBytes("gbk");
            System.out.print("中文字符串对应GBK的Byte流:");
            printBytes(gbkBytes);
            System.out.print("中文字符串对应Byte流转成的iso-8859-1格式字符串:");
            System.out.println(new String(gbkBytes, "iso-8859-1"));
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        System.out.println("--------------------------------------------------");
    }

    private static byte[] restoreBytes(String[] binary) {
        if (binary == null) {
            return new byte[0];
        }
        byte[] bytes = new byte[binary.length];
        for (int i = 0; i < binary.length; i++) {
            bytes[i] = (byte) Integer.parseInt(binary[i], 2);
        }
        return bytes;
    }

    private static String[] printAndGetInBinary(byte[] bytes) {
        if (bytes == null) {
            return new String[0];
        }
        String[] binaryStrs = new String[bytes.length];
        for (int i = 0; i < bytes.length; i++) {
            binaryStrs[i] = byte2bits(bytes[i]);
        }
        for (String string : binaryStrs) {
            System.out.print(string);
        }
        System.out.println();
        return binaryStrs;
    }

    public static String byte2bits(byte b) {
        int z = b;
        z |= 256;
        String str = Integer.toBinaryString(z);
        int len = str.length();
        return str.substring(len - 8, len);
    }

    private static void printBytes(byte[] bytes) {
        if (bytes == null) {
            return;
        }
        StringBuilder strBuilder = new StringBuilder();
        for (byte b : bytes) {
            strBuilder.append(b);
        }
        System.out.println(strBuilder.toString());
    }

    protected static byte[] getInBytes(String str) {
        if (str == null) {
            return null;
        }
        byte[] bytes = null;
        try {// 这里按iso-8859-1转成Byte流
            bytes = str.getBytes("iso-8859-1");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        return bytes;
    }

}