libxml2中文支持
程序员文章站
2022-04-09 21:42:50
...
LibXML2自身已经支持了中文编码,只是他的所有api处理的数据都是UTF-8类型的,所以只要在读入和写入数据时进行相应装换即可!
代码1是使用Linux下C API进行编码转换;
代码2因为libxml2已经融合了iconv,使用了libxml2的函数来进行编码转换。
/*
compile: gcc -l/usr/include/libxml2 -lxml2 iconv.c
input:
test.xml
<?xml version="1.0" encoding="gb2312"?>
<parent>测试</parent>
output:
测试
1) iconv
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
#include <arpa/inet.h>
#include <iconv.h>
char *Convert(char *encFrom, char *encTo, const char *in)
{
static char bufin[1024], bufout[1024], *sin, *sout;
int mode, lenin, lenout, ret, nline;
iconv_t c_pt;
if ((c_pt = iconv_open(encTo, encFrom)) == (iconv_t)-1) {
printf("iconv_open false: %s ==> %s\n", encFrom, encTo);
return NULL;
}
iconv(c_pt, NULL, NULL, NULL, NULL);
lenin = strlen(in) + 1;
lenout = 1024;
sin = (char *)in;
sout = bufout;
ret = iconv(c_pt, &sin, (size_t *)&lenin, &sout, (size_t *)&lenout);
if (ret == -1) {
return NULL;
}
iconv_close(c_pt);
return bufout;
}
int main(void) {
xmlDocPtr doc = NULL;
xmlNodePtr cur = NULL;
doc = xmlParseFile("test.xml");
cur = xmlDocGetRootElement(doc);
printf("%s\n", (char *)xmlNodeGetContent(cur));
//printf(%s\n", Convert("utf-8", "gb2312", (char*)xmlNodeGetContent(cur)));
}
2) xmlFindCharEncodingHandler
使用数据类型: xmlCharEncodingHandlerPtr
/*******************************************
* compile: gcc -l/usr/include/libxml2/ -lxml2 convert.c
* usage: convert utf-8 string or null
* input: /convert 测试
* output:
[[email protected] test]$ ./convert 测试
ISO-8859-1:
虏芒脢脭
<?xml version="1.0" encoding="ISO-8859-1"?>
<root>测试</root>
**************************************/
#include <libxml/encoding.h>
/**
* function name: ConvertInput
* input:
@in: string in a given encoding
@encoding: the encoding used
* description: Converts @in into UTF-8 for processing with libxml2 APIs
* return: returns the converted UTF-8 string, or NULL in case of error.
**/
unsigned char *ConvertInput(const char *in, const char *encoding)
{
unsigned char *out;
int ret;
int size;
int out_size;
int temp;
xmlCharEncodingHandlerPtr handler;
if (in == 0) return 0;
handler = xmlFindCharEncodingHandler(encoding);
if (!handler) {
printf("ConvertInput: no encoding handler found for '%s'\n",
encoding ? encoding : "");
return 0;
}
size = (int) strlen(in) + 1;
out_size = size * 2 - 1;
out = (unsigned char *)xmlMalloc((size_t) out_size);
if (out != 0) {
temp = size - 1;
ret = handler->input(out, &out_size, (const unsigned char *)in, &temp);
if ((ret < 0) || (temp - size + 1)) {
if (ret < 0) {
printf("ConvertInput: conversion wasn't successful.\n");
} else {
printf("ConvertInput: conversion wasn't successful. converted: %i octets.\n", temp);
}
xmlFree(out);
out = 0;
} else {
out = (unsigned char *)xmlRealloc(out, out_size + 1);
out[out_size] = 0; /* null terminating out */
}
} else {
printf("ConvertInput: no mem\n");
}
return out;
}
int main(int argc, char **argv)
{
unsigned char *content, *out;
xmlDocPtr doc;
xmlNodePtr rootnode;
char *encoding = "ISO-8859-1"; //utf-8, ISO-8859-1
if (argc <= 1) {
printf("Usage: %s content\n", argv[0]);
return (0);
}
content = argv[1];
out = ConvertInput(content, encoding);
printf("%s:%s\n", encoding, out);
doc = xmlNewDoc("1.0");
rootnode = xmlNewDocNode(doc, NULL, (const xmlChar*)"root", out);
xmlDocSetRootElement(doc, rootnode);
xmlSaveFormatFileEnc("=", doc, encoding, 1);
return 1;
}
上一篇: 利用javascript如何随机生成一定位数的密码
下一篇: linux学习 - 基本命令篇