判断字符串是否为utf8格式,cpp实现
程序员文章站
2022-03-31 11:09:19
...
基于网上其它编程语言实现的版本,此处改写为c、cpp版本。
bool isUtf8Str(const char *str)
{
int str_len = strlen(str);
int encodingBytesCount = 0;
bool allTextsAreASCIIChars = true;
for (int i = 0; i < str_len; i++)
{
char current = str[i];
if ((current & 0x80) == 0x80)
{
allTextsAreASCIIChars = false;
}
// First byte
if (encodingBytesCount == 0)
{
if ((current & 0x80) == 0)
{
// ASCII chars, from 0x00-0x7F
continue;
}
if ((current & 0xC0) == 0xC0)
{
encodingBytesCount = 1;
current <<= 2;
// More than two bytes used to encoding a unicode char.
// Calculate the real length.
while ((current & 0x80) == 0x80)
{
current <<= 1;
encodingBytesCount++;
}
}
else
{
// Invalid bits structure for UTF8 encoding rule.
return false;
}
}
else
{
// Following bytes, must start with 10.
if ((current & 0xC0) == 0x80)
{
encodingBytesCount--;
}
else
{
// Invalid bits structure for UTF8 encoding rule.
return false;
}
}
}
if (encodingBytesCount != 0)
{
// Invalid bits structure for UTF8 encoding rule.
// Wrong following bytes count.
return false;
}
// Although UTF8 supports encoding for ASCII chars, we regard as a input stream, whose contents are all ASCII as default encoding.
return !allTextsAreASCIIChars;
}
//判断string是否utf8格式的字符串。
inline bool isUtf8Str(const std::string &str)
{
return isUtf8Str(str.c_str());
}
上一篇: Ajax实现Loading效果
下一篇: Qt的QDebug输出信息设置