文本的编码转换ANSI、UNICODE、UTF8

程序员文章站 2024-03-24 11:55:04

...

Windows平台下的编码转换类
ANSI和UNICODE和UTF8的互相转换

Charset.h

#pragma once
#include <tchar.h>
#include <stringapiset.h>
#include <stdlib.h>
#include <string>
class Charset
{
public:
    static std::wstring ANSIToUnicode(const char* ansi)
    {
        int textlen = MultiByteToWideChar(CP_ACP, 0, ansi, -1, NULL, 0);
        wchar_t * pBuffer = new wchar_t[textlen + 1];
        memset(pBuffer, 0, (textlen + 1) * sizeof(wchar_t));
        MultiByteToWideChar(CP_ACP, 0, ansi, -1, pBuffer, textlen);
        std::wstring unicode(pBuffer);
        delete[] pBuffer;
        return unicode;
    }

    static BOOL ANSIToUnicode(wchar_t* dst, const int size, const char* ansi)
    {
        BOOL bResult{ FALSE };
        int textlen = 0;
        textlen = MultiByteToWideChar(CP_ACP, 0, ansi, -1, NULL, 0);
        if (size > textlen)
        {
            // memset(dst, 0, (textlen + 1) * sizeof(wchar_t));
            if (0 != MultiByteToWideChar(CP_ACP, 0, ansi, -1, dst, textlen))
            {
                bResult = TRUE;
            }
            dst[textlen] = 0;
        }
        return bResult;
    }

    static std::string UnicodeToANSI(const wchar_t *unicode)
    {
        int textlen = WideCharToMultiByte(CP_ACP, 0, unicode, -1, NULL, 0, NULL, NULL);
        char * pBuffer = new char[textlen + 1];
        memset(pBuffer, 0, sizeof(char) * (textlen + 1));
        WideCharToMultiByte(CP_ACP, 0, unicode, -1, pBuffer, textlen, NULL, NULL);
        std::string ansi{ pBuffer };
        delete[] pBuffer;
        return ansi;
    }

    static BOOL UnicodeToANSI(char* dst, const int size, const wchar_t *unicode)
    {
        BOOL bResult{ FALSE };
        int textlen = WideCharToMultiByte(CP_ACP, 0, unicode, -1, NULL, 0, NULL, NULL);
        if (size > textlen)
        {
            //memset(dst, 0, sizeof(char) * (textlen + 1));
            if (0 != WideCharToMultiByte(CP_ACP, 0, unicode, -1, dst, textlen, NULL, NULL))
            {
                bResult = TRUE;
            }
            dst[textlen] = 0;
        }
        return bResult;
    }

    static std::wstring UTF8ToUnicode(const char* utf8)
    {
        int textlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
        wchar_t * pBuffer = new wchar_t[textlen + 1];
        memset(pBuffer, 0, (textlen + 1) * sizeof(wchar_t));
        MultiByteToWideChar(CP_UTF8, 0, utf8, -1, (LPWSTR)pBuffer, textlen);
        std::wstring unicode{ pBuffer };
        delete[]pBuffer;
        return unicode;
    }

    static std::wstring UTF8ToUnicode(const wchar_t* utf8)
    {
        std::string u = UnicodeToANSI(utf8);
        int textlen = MultiByteToWideChar(CP_UTF8, 0, u.c_str(), -1, NULL, 0);
        wchar_t * pBuffer = new wchar_t[textlen + 1];
        memset(pBuffer, 0, (textlen + 1) * sizeof(wchar_t));
        MultiByteToWideChar(CP_UTF8, 0, u.c_str(), -1, (LPWSTR)pBuffer, textlen);
        std::wstring unicode{ pBuffer };
        delete[]pBuffer;
        return unicode;
    }

    static BOOL UTF8ToUnicode(wchar_t*dst, const int size, const char* utf8)
    {
        BOOL bResult{ FALSE };
        int textlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
        if (size > textlen)
        {
            // memset(dst, 0, (textlen + 1) * sizeof(wchar_t));
            if (0 != MultiByteToWideChar(CP_UTF8, 0, utf8, -1, (LPWSTR)dst, textlen))
            {
                bResult = TRUE;
            }
            dst[textlen] = 0;
        }
        return bResult;
    }

    static std::string UnicodeToUTF8(const wchar_t *unicode)
    {
        int textlen = WideCharToMultiByte(CP_UTF8, 0, unicode, -1, NULL, 0, NULL, NULL);
        char * pBuffer = new char[textlen + 1];
        memset(pBuffer, 0, sizeof(char) * (textlen + 1));
        WideCharToMultiByte(CP_UTF8, 0, unicode, -1, pBuffer, textlen, NULL, NULL);
        std::string utf8{ pBuffer };
        delete[] pBuffer;
        return utf8;
    }

    static BOOL UnicodeToUTF8(char* dst, const int size, const wchar_t *unicode)
    {
        BOOL bResult{ FALSE };
        int textlen = WideCharToMultiByte(CP_UTF8, 0, unicode, -1, NULL, 0, NULL, NULL);
        if (size > textlen)
        {
            // memset(dst, 0, sizeof(char) * (textlen + 1));
            if (0 != WideCharToMultiByte(CP_UTF8, 0, unicode, -1, dst, textlen, NULL, NULL))
            {
                bResult = TRUE;
            }
            dst[textlen] = 0;
        }
        return bResult;
    }
    // char utf8 -> unicode -> ansi
    static std::string UTF8ToANSI(const char* utf8)
    {
        std::wstring unicode = UTF8ToUnicode(utf8);
        std::string ansi = UnicodeToANSI(unicode.c_str());
        return ansi;
    }

#ifdef _UNICODE
    // wchar utf8 -> char utf8 -> unicode
    static std::wstring UTF8ToLocal(const wchar_t* utf8)
    {
        std::wstring unicode = UTF8ToUnicode(utf8);
        return unicode;
    }
#else
    // char utf8 -> unicode -> ansi
    static std::string UTF8ToLocal(const char* utf8)
    {
        std::string ansi = UTF8ToANSI(utf8);
        return ansi;
    }
#endif
};

上一篇： Animation中的算法

下一篇： ANSI，UNICODE，UTF8之间的相互转换

文本的编码转换ANSI、UNICODE、UTF8

文本的编码转换ANSI、UNICODE、UTF8

ANSI，UNICODE，UTF8之间的相互转换

go语言进行文件编码格式转换中（GBK 转UTF8），可能出错部分文件出现乱码情况的解决。

C++中ANSI、Unicode、UTF8字符串之间的互转

PHP实现Unicode编码相互转换的方法示例

php自定义函数实现汉字转换utf8编码的方法

python实现unicode转中文及转换默认编码的方法

PHP数组编码gbk与utf8互相转换的两种方法

python实现unicode转中文及转换默认编码的方法

unicode中的‘\xa0’字符在转换成gbk编码时会出现问题，gbk无法转换'\xa0'字符。

文本的编码转换ANSI、UNICODE、UTF8

文本的编码转换ANSI、UNICODE、UTF8

ANSI，UNICODE，UTF8之间的相互转换

go语言 进行文件编码格式转换中（GBK 转UTF8），可能出错部分文件出现乱码情况的解决。

C++中ANSI、Unicode、UTF8字符串之间的互转

PHP实现Unicode编码相互转换的方法示例

php自定义函数实现汉字转换utf8编码的方法

python实现unicode转中文及转换默认编码的方法

PHP数组编码gbk与utf8互相转换的两种方法

python实现unicode转中文及转换默认编码的方法

unicode中的‘\xa0’字符在转换成gbk编码时会出现问题，gbk无法转换'\xa0'字符。

go语言进行文件编码格式转换中（GBK 转UTF8），可能出错部分文件出现乱码情况的解决。