欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

使用iconv进行内码转换(Big5->GB2312) 博客分类: C++开发工具  

程序员文章站 2024-03-24 18:07:34
...

i conv 是一个通过unicode 作为中间码实现各种内码间相互转换的库,它基本上囊括了世界上所有编码方式,例如,ASCIIGB2312 、 GBK 、 GB18030BIG5UTF-8UCS-2UCS-2BEUCS-2LEUCS-4UCS-4BEUCS- 4LEUTF-16 、 UTF-16BEUTF-16LEUTF-32UTF-32BEUTF-32LEUTF-7 等等等,除此之外,还包括 泰语、日语、韩语、西欧等国家语言的编码。下面我们演示如何使用iconv 实现Big5GB2312 的转换,当然只要简单修改一下便可实现iconv 支 持任何编码间的转换。 


下载 
libiconv
linux 版本的iconv ,可在 http://www.gnu.org/software/libiconv/  下载 
iconv
win32 版本可以在 http://gnuwin32.sourceforge.net/packages/libiconv.htm  下载 

SVN
源码 
另外,还有一些演示代码,需要的可以到我的SVN 下载 
http://xcyber.googlecode.com/svn/trunk/Convert/

标签: libiconv , iconv , Big5 , GB2312 ,  大五码  内码

代码片段(1)

[ 代码] [C/C++/Objective-C] 代码

001

/****************************************************************************

002

 *  Big5ToGB2312 - Convert Big5 encoding file to GB2312 encoding file

003

 *  File:

004

 *    Big5ToGb2312.c

005

 *  Description:

006

 *    Convert Big5 encoding file to GB2312 encoding file using iconv library

007

 *  Author:

008

 *    XCyber   email:XCyber@sohu.com

009

 *  Date:

010

 *    August 7, 2008

011

 *  Other:

012

 *    visit http://www.gnu.org/software/libiconv/ for more help of iconv

013

 ***************************************************************************/

014

 

015

 

016

#include <stdio.h>

017

#include <stdlib.h>

018

#include <tchar.h>

019

#include <locale.h>

020

#include "../iconv-1.9.2.win32/include/iconv.h"

021

 

022

//#pragma comment(lib, "../iconv-1.9.2.win32/lib/iconv.lib")  // using iconv dynamic-link lib, iconv.dll

023

#pragma comment(lib, "../iconv-1.9.2.win32/lib/iconv_a.lib")  // using iconv static lib 

024

 

025

#define BUFFER_SIZE 1024   //BUFFER_SIZE must >= 2

026

 

027

 

028

void usage()

029

{

030

    printf("\nBig5ToGB2312 - Convert Big5 encoding file to GB2312 encoding file\n");

031

    printf("XCyber@sohu.com on August 7, 2008\n");

032

    printf("  Usage:\n");

033

    printf("      Big5ToGB2312 [Big5 file(in)]  [GB2312 file(out)]\n\n");

034

}

035

 

036

 

037

int main(int argc, char* argv[])

038

{

039

    FILE * pSrcFile = NULL;

040

    FILE * pDstFile = NULL;

041

 

042

    char szSrcBuf[BUFFER_SIZE];

043

    char szDstBuf[BUFFER_SIZE];

044

 

045

    size_t nSrc  = 0;

046

    size_t nDst  = 0;

047

    size_t nRead = 0;

048

    size_t nRet  = 0;

049

 

050

    char *pSrcBuf = szSrcBuf;

051

    char *pDstBuf = szDstBuf;

052

 

053

    iconv_t icv;

054

    int argument = 1;

055

 

056

    //check input arguments

057

    if(argc != 3)

058

    {

059

        usage();

060

        return -1;

061

    }

062

 

063

 

064

    pSrcFile = fopen(argv[1],"r");

065

    if(pSrcFile == NULL)

066

    {

067

        printf("can't open source file!\n");

068

        return -1;

069

    }

070

 

071

    pDstFile = fopen(argv[2],"w");

072

   &nbs

073

p;if(pSrcFile == NULL)

074

    {

075

        printf("can't open destination file!\n");

076

        return -1;

077

    }

078

 

079

    //initialize iconv routine, perform conversion from BIG5 to GB2312

080

    //TODO: if you want to perfom other type of coversion, e.g. GB2312->BIG5, GB2312->UTF-8 ...

081

    //just change following two paremeters of iconv_open()

082

    icv = iconv_open("GB2312","BIG5");

083

    if(icv == 0)

084

    {

085

        printf("can't initalize iconv routine!\n");

086

        return -1;

087

    }

088

 

089

    //enable "illegal sequence discard and continue" feature, so that if met illeagal sequence, 

090

    //conversion will continue instead of being terminated

091

    if(iconvctl (icv ,ICONV_SET_DISCARD_ILSEQ,&argument) != 0)

092

    {

093

        printf("can't enable \"illegal sequence discard and continue\" feature!\n");

094

        return -1;

095

    }

096

 

097

    while(!feof(pSrcFile))

098

    {

099

        pSrcBuf = szSrcBuf;

100

        pDstBuf = szDstBuf;

101

        nDst = BUFFER_SIZE;

102

 

103

        // read data from source file

104

        nRead = fread(szSrcBuf + nSrc,sizeof(char),BUFFER_SIZE - nSrc,pSrcFile);

105

        if(nRead == 0)

106

            break;

107

 

108

        // the amount of data to be converted should include previous left data and current read data

109

        nSrc = nSrc + nRead; 

110

 

111

        //perform conversion

112

        nRet = iconv(icv,(const char**)&pSrcBuf,&nSrc,&pDstBuf,&nDst);

113

 

114

        if(nRet == -1)

115

        {

116

            // include all case of errno: E2BIG, EILSEQ, EINVAL

117

            //     E2BIG: There is not sufficient room at *outbuf.

118

            //     EILSEQ: An invalid multibyte sequence has been encountered in the input.

119

            //     EINVAL: An incomplete multibyte sequence has been encountered in the input

120

            // move the left data to the head of szSrcBuf in other to link it with the next data block

121

            memmove(szSrcBuf,pSrcBuf,nSrc);

122

        }

123

 

124

        //wirte data to destination file

125

        fwrite(szDstBuf,sizeof(char),BUFFER_SIZE - nDst,pDstFile);

126

 

127

    }

128

    iconv_close(icv);

129

    fclose(pSrcFile);

130

    fclose(pDstFile);

131

 

132

    printf("conversion complete.\n");

133

 

134

    return;

135

}