使用iconv进行内码转换(Big5->GB2312) 博客分类: C++开发工具
i conv 是一个通过unicode 作为中间码实现各种内码间相互转换的库,它基本上囊括了世界上所有编码方式,例如,ASCII 、 GB2312 、 GBK 、 GB18030 、BIG5 、UTF-8 、UCS-2 、UCS-2BE 、UCS-2LE 、UCS-4 、UCS-4BE 、UCS- 4LE 、UTF-16 、 UTF-16BE 、UTF-16LE 、UTF-32 、UTF-32BE 、UTF-32LE 、UTF-7 等等等,除此之外,还包括 泰语、日语、韩语、西欧等国家语言的编码。下面我们演示如何使用iconv 实现Big5 到GB2312 的转换,当然只要简单修改一下便可实现iconv 支 持任何编码间的转换。
下载
libiconv
是linux
版本的iconv
,可在 http://www.gnu.org/software/libiconv/
下载
iconv
的win32
版本可以在 http://gnuwin32.sourceforge.net/packages/libiconv.htm
下载
SVN
源码
另外,还有一些演示代码,需要的可以到我的SVN
下载
http://xcyber.googlecode.com/svn/trunk/Convert/
标签: libiconv , iconv , Big5 , GB2312 , 大五码 , 内码
代码片段(1)
[ 代码] [C/C++/Objective-C] 代码
001 |
/**************************************************************************** |
002 |
* Big5ToGB2312 - Convert Big5 encoding file to GB2312 encoding file |
003 |
* File: |
004 |
* Big5ToGb2312.c |
005 |
* Description: |
006 |
* Convert Big5 encoding file to GB2312 encoding file using iconv library |
007 |
* Author: |
008 |
* XCyber email:XCyber@sohu.com |
009 |
* Date: |
010 |
* August 7, 2008 |
011 |
* Other: |
012 |
* visit http://www.gnu.org/software/libiconv/ for more help of iconv |
013 |
***************************************************************************/ |
014 |
|
015 |
|
016 |
#include <stdio.h> |
017 |
#include <stdlib.h> |
018 |
#include <tchar.h> |
019 |
#include <locale.h> |
020 |
#include "../iconv-1.9.2.win32/include/iconv.h" |
021 |
|
022 |
//#pragma comment(lib, "../iconv-1.9.2.win32/lib/iconv.lib") // using iconv dynamic-link lib, iconv.dll |
023 |
#pragma comment(lib, "../iconv-1.9.2.win32/lib/iconv_a.lib") // using iconv static lib |
024 |
|
025 |
#define BUFFER_SIZE 1024 //BUFFER_SIZE must >= 2 |
026 |
|
027 |
|
028 |
void usage() |
029 |
{ |
030 |
printf("\nBig5ToGB2312 - Convert Big5 encoding file to GB2312 encoding file\n"); |
031 |
printf("XCyber@sohu.com on August 7, 2008\n"); |
032 |
printf(" Usage:\n"); |
033 |
printf(" Big5ToGB2312 [Big5 file(in)] [GB2312 file(out)]\n\n"); |
034 |
} |
035 |
|
036 |
|
037 |
int main(int argc, char* argv[]) |
038 |
{ |
039 |
FILE * pSrcFile = NULL; |
040 |
FILE * pDstFile = NULL; |
041 |
|
042 |
char szSrcBuf[BUFFER_SIZE]; |
043 |
char szDstBuf[BUFFER_SIZE]; |
044 |
|
045 |
size_t nSrc = 0; |
046 |
size_t nDst = 0; |
047 |
size_t nRead = 0; |
048 |
size_t nRet = 0; |
049 |
|
050 |
char *pSrcBuf = szSrcBuf; |
051 |
char *pDstBuf = szDstBuf; |
052 |
|
053 |
iconv_t icv; |
054 |
int argument = 1; |
055 |
|
056 |
//check input arguments |
057 |
if(argc != 3) |
058 |
{ |
059 |
usage(); |
060 |
return -1; |
061 |
} |
062 |
|
063 |
|
064 |
pSrcFile = fopen(argv[1],"r"); |
065 |
if(pSrcFile == NULL) |
066 |
{ |
067 |
printf("can't open source file!\n"); |
068 |
return -1; |
069 |
} |
070 |
|
071 |
pDstFile = fopen(argv[2],"w"); |
072 |
&nbs |
073 |
p;if(pSrcFile == NULL) |
074 |
{ |
075 |
printf("can't open destination file!\n"); |
076 |
return -1; |
077 |
} |
078 |
|
079 |
//initialize iconv routine, perform conversion from BIG5 to GB2312 |
080 |
//TODO: if you want to perfom other type of coversion, e.g. GB2312->BIG5, GB2312->UTF-8 ... |
081 |
//just change following two paremeters of iconv_open() |
082 |
icv = iconv_open("GB2312","BIG5"); |
083 |
if(icv == 0) |
084 |
{ |
085 |
printf("can't initalize iconv routine!\n"); |
086 |
return -1; |
087 |
} |
088 |
|
089 |
//enable "illegal sequence discard and continue" feature, so that if met illeagal sequence, |
090 |
//conversion will continue instead of being terminated |
091 |
if(iconvctl (icv ,ICONV_SET_DISCARD_ILSEQ,&argument) != 0) |
092 |
{ |
093 |
printf("can't enable \"illegal sequence discard and continue\" feature!\n"); |
094 |
return -1; |
095 |
} |
096 |
|
097 |
while(!feof(pSrcFile)) |
098 |
{ |
099 |
pSrcBuf = szSrcBuf; |
100 |
pDstBuf = szDstBuf; |
101 |
nDst = BUFFER_SIZE; |
102 |
|
103 |
// read data from source file |
104 |
nRead = fread(szSrcBuf + nSrc,sizeof(char),BUFFER_SIZE - nSrc,pSrcFile); |
105 |
if(nRead == 0) |
106 |
break; |
107 |
|
108 |
// the amount of data to be converted should include previous left data and current read data |
109 |
nSrc = nSrc + nRead; |
110 |
|
111 |
//perform conversion |
112 |
nRet = iconv(icv,(const char**)&pSrcBuf,&nSrc,&pDstBuf,&nDst); |
113 |
|
114 |
if(nRet == -1) |
115 |
{ |
116 |
// include all case of errno: E2BIG, EILSEQ, EINVAL |
117 |
// E2BIG: There is not sufficient room at *outbuf. |
118 |
// EILSEQ: An invalid multibyte sequence has been encountered in the input. |
119 |
// EINVAL: An incomplete multibyte sequence has been encountered in the input |
120 |
// move the left data to the head of szSrcBuf in other to link it with the next data block |
121 |
memmove(szSrcBuf,pSrcBuf,nSrc); |
122 |
} |
123 |
|
124 |
//wirte data to destination file |
125 |
fwrite(szDstBuf,sizeof(char),BUFFER_SIZE - nDst,pDstFile); |
126 |
|
127 |
} |
128 |
iconv_close(icv); |
129 |
fclose(pSrcFile); |
130 |
fclose(pDstFile); |
131 |
|
132 |
printf("conversion complete.\n"); |
133 |
|
134 |
return; |
135 |
} |