欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

c语言对utf8字符串的截取

程序员文章站 2024-03-18 08:11:22
...

utf8编码的汉字一般占用3字节,下面默认汉字全是常规的3字节汉字。

void splitUtf8(const char *s, char* store, int l)
{                                                     
    if (strlen(s) <= 0 || l <=0)
    {
        return;
    }

    int i = 0, len = 0;
    int f_len = strlen(s) >= l ? l : strlen(s);
    while (i < f_len)
    {
        if (s[i] >> 7 & 1 && s[i+1] >> 7 & 1)
        {
            cout << "汉字 i = " << i << '\t';
            i = i + 3;
            len = 3;
        }
        else
        {
            cout << s[i] << "-i=" << i << '\t';
            i = i + 1;
            len = 1;
        }
        cout << "end-i=" << i << '\t';
    }
    //i += 1;
    cout << "\ni = " << i << ",len = " << len << endl;
    if (i > f_len)
        i = i - len;
    strncpy(store, s, i);
    *(store + i) = 0;
    cout << "desc len=" << strlen(store) << endl;
}

int main()
{
    string str = "一二三";
    //string str = "一二三四五六sss";
    char desc[64];
    memset(desc, 0, sizeof(desc));
    splitUtf8(str.c_str(), desc, 12);
    printf("desc =%s]\n", desc);
    strcat(desc, "123");
    printf("desc =%s]\n", desc);
}