java实现十六进制字符unicode与中英文转换示例
关于unicode和utf的关系,可以简单的记忆:unicode是一个编码组织、一个编码规范、在java中指utf-16;utf是unicode编码的translation转换格式,以便于很好地在网络中传递、在存储媒介汇总保存,于是utf存在多种格式,如8、16、32,而关联le、te的区别,unicode编码格式才会有以下过程中的10种。
public static void main(string[] args) throws unsupportedencodingexception {
stringutil.str2all("0 产品型号描述");
stringutil.str4all("30000900a74ec1548b57f753cf63f08f");
}
/**
* 尝试所有编码格式对十六进制数字字符串进行编码
*
* @param hexstr
* @throws unsupportedencodingexception
*/
public static void str4all(string ustr) throws unsupportedencodingexception{
system.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++");
byte[] bs = new byte[ustr.length()/2];
for (int i = 0; i < bs.length; i++) {
bs[i] = (byte) integer.parseint(ustr.substring(i*2, i*2+2), 16);
}
system.out.println(new string(bs, "utf-8"));
// 16
system.out.println(new string(bs, "utf-16")); // 同unicode
system.out.println(new string(bs, "utf-16le"));
system.out.println(new string(bs, "x-utf-16le-bom"));
system.out.println(new string(bs, "utf-16be"));
// system.out.println(new string(bs, "x-utf-16be-bom")); // unsupportedencodingexception
// 32
system.out.println(new string(bs, "utf-32"));
system.out.println(new string(bs, "utf-32le"));
system.out.println(new string(bs, "x-utf-32le-bom"));
system.out.println(new string(bs, "utf-32be"));
system.out.println(new string(bs, "x-utf-32le-bom"));
}
/**
* 列出所有编码对应的解码后的十六进制数字字符串
*
* @param ustr
* @throws unsupportedencodingexception
*/
public static void str2all(string ustr) throws unsupportedencodingexception{
system.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++");
byte[] bs = new byte[]{};
bs = ustr.getbytes("utf-8");
for(byte b:bs){
system.out.print(integer.tohexstring(b & 0xff));
}
system.out.println();
// 16
bs = ustr.getbytes("utf-16");
for(byte b:bs){
system.out.print(integer.tohexstring(b & 0xff));
}
system.out.println();
bs = ustr.getbytes("utf-16le");
for(byte b:bs){
system.out.print(integer.tohexstring(b & 0xff));
}
system.out.println();
bs = ustr.getbytes("x-utf-16le-bom");
for(byte b:bs){
system.out.print(integer.tohexstring(b & 0xff));
}
system.out.println();
bs = ustr.getbytes("utf-16be");
for(byte b:bs){
system.out.print(integer.tohexstring(b & 0xff));
}
system.out.println();
// bs = ustr.getbytes("x-utf-16be-bom"); // unsupportedencodingexception
// 32
bs = ustr.getbytes("utf-32");
for(byte b:bs){
system.out.print(integer.tohexstring(b & 0xff));
}
system.out.println();
bs = ustr.getbytes("utf-32le");
for(byte b:bs){
system.out.print(integer.tohexstring(b & 0xff));
}
system.out.println();
bs = ustr.getbytes("x-utf-32le-bom");
for(byte b:bs){
system.out.print(integer.tohexstring(b & 0xff));
}
system.out.println();
bs = ustr.getbytes("utf-32be");
for(byte b:bs){
system.out.print(integer.tohexstring(b & 0xff));
}
system.out.println();
bs = ustr.getbytes("x-utf-32le-bom");
for(byte b:bs){
system.out.print(integer.tohexstring(b & 0xff));
}
system.out.println();
}
编码名称收集
charset us-ascii %s
historicalname ascii
# iana aliases
alias iso-ir-6
alias ansi_x3.4-1986
alias iso_646.irv:1991
alias ascii
alias iso646-us
alias us
alias ibm367
alias cp367
alias csascii
alias default
# other aliases
alias 646 # solaris posix locale
alias iso_646.irv:1983
alias ansi_x3.4-1968 # linux posix locale (redhat)
alias ascii7
charset utf-8 utf_8
historicalname utf8
alias utf8
alias unicode-1-1-utf-8
charset utf-16 utf_16
historicalname utf-16
alias utf_16
alias utf16
alias unicode
alias unicodebig
charset utf-16be utf_16be
historicalname unicodebigunmarked
alias utf_16be
alias iso-10646-ucs-2
alias x-utf-16be
alias unicodebigunmarked
charset utf-16le utf_16le
historicalname unicodelittleunmarked
alias utf_16le
alias x-utf-16le
alias unicodelittleunmarked
charset x-utf-16le-bom utf_16le_bom
historicalname unicodelittle
alias unicodelittle
charset utf-32 utf_32
alias utf_32
alias utf32
charset utf-32le utf_32le
alias utf_32le
alias x-utf-32le
charset utf-32be utf_32be
alias utf_32be
alias x-utf-32be
charset x-utf-32le-bom utf_32le_bom
alias utf_32le_bom
alias utf-32le-bom
charset x-utf-32be-bom utf_32be_bom
alias utf_32be_bom
alias utf-32be-bom
charset iso-8859-1 %s
historicalname iso8859_1
# iana aliases
alias iso-ir-100
alias iso_8859-1
alias latin1
alias l1
alias ibm819
alias cp819
alias csisolatin1
# other aliases
alias 819
alias ibm-819
alias iso8859_1
alias iso_8859-1:1987
alias iso_8859_1
alias 8859_1
alias iso8859-1
charset iso-8859-2 %s
historicalname iso8859_2
alias iso8859_2
alias 8859_2
alias iso-ir-101
alias iso_8859-2
alias iso_8859-2:1987
alias iso8859-2
alias latin2
alias l2
alias ibm912
alias ibm-912
alias cp912
alias 912
alias csisolatin2
charset iso-8859-4 %s
historicalname iso8859_4
alias iso8859_4
alias iso8859-4
alias 8859_4
alias iso-ir-110
alias iso_8859-4
alias iso_8859-4:1988
alias latin4
alias l4
alias ibm914
alias ibm-914
alias cp914
alias 914
alias csisolatin4
charset iso-8859-5 %s
historicalname iso8859_5
alias iso8859_5
alias 8859_5
alias iso-ir-144
alias iso_8859-5
alias iso_8859-5:1988
alias iso8859-5
alias cyrillic
alias ibm915
alias ibm-915
alias cp915
alias 915
alias csisolatincyrillic
charset iso-8859-7 %s
historicalname iso8859_7
alias iso8859_7
alias 8859_7
alias iso-ir-126
alias iso_8859-7
alias iso_8859-7:1987
alias elot_928
alias ecma-118
alias greek
alias greek8
alias csisolatingreek
alias sun_eu_greek # solaris 7/8 compatibility
alias ibm813
alias ibm-813
alias 813
alias cp813
alias iso8859-7 # solaris 9 compatibility
charset iso-8859-9 %s
historicalname iso8859_9
alias iso8859_9
alias 8859_9
alias iso-ir-148
alias iso_8859-9
alias iso_8859-9:1989
alias iso8859-9
alias latin5
alias l5
alias ibm920
alias ibm-920
alias 920
alias cp920
alias csisolatin5
charset iso-8859-13 %s
historicalname iso8859_13
alias iso8859_13
alias 8859_13
alias iso_8859-13
alias iso8859-13
charset iso-8859-15 %s
historicalname iso8859_15
# iana alias
alias iso_8859-15
# other aliases
alias 8859_15
alias iso8859_15
alias iso8859-15
alias ibm923
alias ibm-923
alias cp923
alias 923
alias latin0
alias latin9
alias l9
alias csisolatin0
alias csisolatin9
alias iso8859_15_fdis
charset koi8-r %s
historicalname koi8_r
alias koi8_r
alias koi8
alias cskoi8r
charset koi8-u %s
alias koi8_u
charset windows-1250 %s
historicalname cp1250
alias cp1250
alias cp5346 # euro ibm ccsid
charset windows-1251 %s
historicalname cp1251
alias cp1251
alias cp5347 # euro ibm ccsid
alias ansi-1251 # solaris compatibility
charset windows-1252 %s
historicalname cp1252
alias cp1252
alias cp5348 # euro ibm ccsid
charset windows-1253 %s
historicalname cp1253
alias cp1253
alias cp5349 # euro ibm ccsid
charset windows-1254 %s
historicalname cp1254
alias cp1254
alias cp5350 # euro ibm ccsid
charset windows-1257 %s
historicalname cp1257
alias cp1257
alias cp5353 # euro ibm ccsid
charset ibm437 %s
historicalname cp437
alias cp437
alias ibm-437
alias 437
alias cspc8codepage437
alias windows-437
charset x-ibm737 %s
historicalname cp737
alias cp737
alias ibm737
alias ibm-737
alias 737
charset ibm775 %s
historicalname cp775
alias cp775
alias ibm-775
alias 775
charset ibm850 %s
historicalname cp850
alias cp850
alias ibm-850
alias 850
alias cspc850multilingual
charset ibm852 %s
historicalname cp852
alias cp852
alias ibm-852
alias 852
alias cspcp852
charset ibm855 %s
historicalname cp855
alias cp855
alias ibm-855
alias 855
alias cspcp855
charset ibm857 %s
historicalname cp857
alias cp857
alias ibm-857
alias 857
alias csibm857
charset ibm00858 %s
historicalname cp858
alias cp858
alias ccsid00858
alias cp00858
alias 858
charset ibm862 %s
historicalname cp862
alias cp862
alias ibm-862
alias 862
alias csibm862
alias cspc862latinhebrew
charset ibm866 %s
historicalname cp866
alias cp866
alias ibm-866
alias 866
alias csibm866
charset x-ibm874 %s
historicalname cp874
alias cp874
alias ibm874
alias ibm-874
alias 874