欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

java实现十六进制字符unicode与中英文转换示例

程序员文章站 2024-02-21 22:51:52
关于unicode和utf的关系,可以简单的记忆:unicode是一个编码组织、一个编码规范、在java中指utf-16;utf是unicode编码的translation...

关于unicode和utf的关系,可以简单的记忆:unicode是一个编码组织、一个编码规范、在java中指utf-16;utf是unicode编码的translation转换格式,以便于很好地在网络中传递、在存储媒介汇总保存,于是utf存在多种格式,如8、16、32,而关联le、te的区别,unicode编码格式才会有以下过程中的10种。

复制代码 代码如下:

public static void main(string[] args) throws unsupportedencodingexception {
                 stringutil.str2all("0 产品型号描述");
  stringutil.str4all("30000900a74ec1548b57f753cf63f08f");
 }

/**
  * 尝试所有编码格式对十六进制数字字符串进行编码
  *
  * @param hexstr
  * @throws unsupportedencodingexception
  */
 public static void str4all(string ustr) throws unsupportedencodingexception{

  system.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++");

  byte[] bs = new byte[ustr.length()/2];
  for (int i = 0; i < bs.length; i++) {
   bs[i] = (byte) integer.parseint(ustr.substring(i*2, i*2+2), 16);
  }

  system.out.println(new string(bs, "utf-8"));
  // 16
  system.out.println(new string(bs, "utf-16")); // 同unicode
  system.out.println(new string(bs, "utf-16le"));
  system.out.println(new string(bs, "x-utf-16le-bom"));
  system.out.println(new string(bs, "utf-16be"));
//  system.out.println(new string(bs, "x-utf-16be-bom")); // unsupportedencodingexception
  // 32
  system.out.println(new string(bs, "utf-32"));
  system.out.println(new string(bs, "utf-32le"));
  system.out.println(new string(bs, "x-utf-32le-bom"));
  system.out.println(new string(bs, "utf-32be"));
  system.out.println(new string(bs, "x-utf-32le-bom"));
 }
/**
  * 列出所有编码对应的解码后的十六进制数字字符串
  *
  * @param ustr
  * @throws unsupportedencodingexception
  */
 public static void str2all(string ustr) throws unsupportedencodingexception{

  system.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++");

  byte[] bs = new byte[]{};

  bs = ustr.getbytes("utf-8");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
  // 16
  bs = ustr.getbytes("utf-16");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
  bs = ustr.getbytes("utf-16le");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
  bs = ustr.getbytes("x-utf-16le-bom");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
  bs = ustr.getbytes("utf-16be");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
//  bs = ustr.getbytes("x-utf-16be-bom"); // unsupportedencodingexception
  // 32
  bs = ustr.getbytes("utf-32");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
  bs = ustr.getbytes("utf-32le");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
  bs = ustr.getbytes("x-utf-32le-bom");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
  bs = ustr.getbytes("utf-32be");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
  bs = ustr.getbytes("x-utf-32le-bom");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
 }

编码名称收集

复制代码 代码如下:

charset us-ascii %s
    historicalname ascii
    # iana aliases
    alias iso-ir-6
    alias ansi_x3.4-1986
    alias iso_646.irv:1991
    alias ascii
    alias iso646-us
    alias us
    alias ibm367
    alias cp367
    alias csascii
    alias default
    # other aliases
    alias 646 # solaris posix locale
    alias iso_646.irv:1983
    alias ansi_x3.4-1968 # linux posix locale (redhat)
    alias ascii7

charset utf-8 utf_8
    historicalname utf8
    alias utf8
    alias unicode-1-1-utf-8

charset utf-16 utf_16
    historicalname utf-16
    alias utf_16
    alias utf16
    alias unicode
    alias unicodebig

charset utf-16be utf_16be
    historicalname unicodebigunmarked
    alias utf_16be
    alias iso-10646-ucs-2
    alias x-utf-16be
    alias unicodebigunmarked

charset utf-16le utf_16le
    historicalname unicodelittleunmarked
    alias utf_16le
    alias x-utf-16le
    alias unicodelittleunmarked

charset x-utf-16le-bom utf_16le_bom
    historicalname unicodelittle
    alias unicodelittle

charset utf-32 utf_32
    alias utf_32
    alias utf32

charset utf-32le utf_32le
    alias utf_32le
    alias x-utf-32le

charset utf-32be utf_32be
    alias utf_32be
    alias x-utf-32be

charset x-utf-32le-bom utf_32le_bom
    alias utf_32le_bom
    alias utf-32le-bom

charset x-utf-32be-bom utf_32be_bom
    alias utf_32be_bom
    alias utf-32be-bom

charset iso-8859-1 %s
    historicalname iso8859_1
    # iana aliases
    alias iso-ir-100
    alias iso_8859-1
    alias latin1
    alias l1
    alias ibm819
    alias cp819
    alias csisolatin1
    # other aliases
    alias 819
    alias ibm-819
    alias iso8859_1
    alias iso_8859-1:1987
    alias iso_8859_1
    alias 8859_1
    alias iso8859-1

charset iso-8859-2 %s
    historicalname iso8859_2
    alias iso8859_2
    alias 8859_2
    alias iso-ir-101
    alias iso_8859-2
    alias iso_8859-2:1987
    alias iso8859-2
    alias latin2
    alias l2
    alias ibm912
    alias ibm-912
    alias cp912
    alias 912
    alias csisolatin2

charset iso-8859-4 %s
    historicalname iso8859_4
    alias iso8859_4
    alias iso8859-4
    alias 8859_4
    alias iso-ir-110
    alias iso_8859-4
    alias iso_8859-4:1988
    alias latin4
    alias l4
    alias ibm914
    alias ibm-914
    alias cp914
    alias 914
    alias csisolatin4

charset iso-8859-5 %s
    historicalname iso8859_5
    alias iso8859_5
    alias 8859_5
    alias iso-ir-144
    alias iso_8859-5
    alias iso_8859-5:1988
    alias iso8859-5
    alias cyrillic
    alias ibm915
    alias ibm-915
    alias cp915
    alias 915
    alias csisolatincyrillic

charset iso-8859-7 %s
    historicalname iso8859_7
    alias iso8859_7
    alias 8859_7
    alias iso-ir-126
    alias iso_8859-7
    alias iso_8859-7:1987
    alias elot_928
    alias ecma-118
    alias greek
    alias greek8
    alias csisolatingreek
    alias sun_eu_greek # solaris 7/8 compatibility
    alias ibm813
    alias ibm-813
    alias 813
    alias cp813
    alias iso8859-7 # solaris 9 compatibility

charset iso-8859-9 %s
    historicalname iso8859_9
    alias iso8859_9
    alias 8859_9
    alias iso-ir-148
    alias iso_8859-9
    alias iso_8859-9:1989
    alias iso8859-9
    alias latin5
    alias l5
    alias ibm920
    alias ibm-920
    alias 920
    alias cp920
    alias csisolatin5

charset iso-8859-13 %s
    historicalname iso8859_13
    alias iso8859_13
    alias 8859_13
    alias iso_8859-13
    alias iso8859-13

charset iso-8859-15 %s
    historicalname iso8859_15
    # iana alias
    alias iso_8859-15
    # other aliases
    alias 8859_15
    alias iso8859_15
    alias iso8859-15
    alias ibm923
    alias ibm-923
    alias cp923
    alias 923
    alias latin0
    alias latin9
    alias l9
    alias csisolatin0
    alias csisolatin9
    alias iso8859_15_fdis

charset koi8-r %s
    historicalname koi8_r
    alias koi8_r
    alias koi8
    alias cskoi8r

charset koi8-u %s
    alias koi8_u

charset windows-1250 %s
    historicalname cp1250
    alias cp1250
    alias cp5346 # euro ibm ccsid

charset windows-1251 %s
    historicalname cp1251
    alias cp1251
    alias cp5347 # euro ibm ccsid
    alias ansi-1251 # solaris compatibility

charset windows-1252 %s
    historicalname cp1252
    alias cp1252
    alias cp5348 # euro ibm ccsid

charset windows-1253 %s
    historicalname cp1253
    alias cp1253
    alias cp5349 # euro ibm ccsid

charset windows-1254 %s
    historicalname cp1254
    alias cp1254
    alias cp5350 # euro ibm ccsid

charset windows-1257 %s
    historicalname cp1257
    alias cp1257
    alias cp5353 # euro ibm ccsid


charset ibm437 %s
    historicalname cp437
    alias cp437
    alias ibm-437
    alias 437
    alias cspc8codepage437
    alias windows-437

charset x-ibm737 %s
    historicalname cp737
    alias cp737
    alias ibm737
    alias ibm-737
    alias 737

charset ibm775 %s
    historicalname cp775
    alias cp775
    alias ibm-775
    alias 775

charset ibm850 %s
    historicalname cp850
    alias cp850
    alias ibm-850
    alias 850
    alias cspc850multilingual

charset ibm852 %s
    historicalname cp852
    alias cp852
    alias ibm-852
    alias 852
    alias cspcp852

charset ibm855 %s
    historicalname cp855
    alias cp855
    alias ibm-855
    alias 855
    alias cspcp855

charset ibm857 %s
    historicalname cp857
    alias cp857
    alias ibm-857
    alias 857
    alias csibm857

charset ibm00858 %s
    historicalname cp858
    alias cp858
    alias ccsid00858
    alias cp00858
    alias 858

charset ibm862 %s
    historicalname cp862
    alias cp862
    alias ibm-862
    alias 862
    alias csibm862
    alias cspc862latinhebrew

charset ibm866 %s
    historicalname cp866
    alias cp866
    alias ibm-866
    alias 866
    alias csibm866

charset x-ibm874 %s
    historicalname cp874
    alias cp874
    alias ibm874
    alias ibm-874
    alias 874