c语言对utf8字符串的截取
程序员文章站
2024-03-18 08:11:22
...
utf8编码的汉字一般占用3字节,下面默认汉字全是常规的3字节汉字。
void splitUtf8(const char *s, char* store, int l)
{
if (strlen(s) <= 0 || l <=0)
{
return;
}
int i = 0, len = 0;
int f_len = strlen(s) >= l ? l : strlen(s);
while (i < f_len)
{
if (s[i] >> 7 & 1 && s[i+1] >> 7 & 1)
{
cout << "汉字 i = " << i << '\t';
i = i + 3;
len = 3;
}
else
{
cout << s[i] << "-i=" << i << '\t';
i = i + 1;
len = 1;
}
cout << "end-i=" << i << '\t';
}
//i += 1;
cout << "\ni = " << i << ",len = " << len << endl;
if (i > f_len)
i = i - len;
strncpy(store, s, i);
*(store + i) = 0;
cout << "desc len=" << strlen(store) << endl;
}
int main()
{
string str = "一二三";
//string str = "一二三四五六sss";
char desc[64];
memset(desc, 0, sizeof(desc));
splitUtf8(str.c_str(), desc, 12);
printf("desc =%s]\n", desc);
strcat(desc, "123");
printf("desc =%s]\n", desc);
}