写高并发程序时慎用strncpy和sprintf
分享一下最近做程序优化的一点小心得:在写高并发交易代码时要谨慎使用strncpy和sprintf。
下面详细介绍一下这样说的原因及建议实践:
1 慎用strncpy因为它的副作用极大
我们平时使用strncpy防止字符串拷贝时溢出,常常这样写
char buf[1024] = {0}; char str[16] = "hello"; strncpy(buf, sizefo(buf), str);
这样写当然没问题,但有些人不知道的是:strncpy一行代码执行时是往buf写了sizeof(buf) = 1024个字节,而不是直观以为的strlen(str) + 1 = 6个字符。
也就是说我们为了复制6个字符却写了1024个字节,多了不少额外消耗。如果这个函数被频繁调用,会导致系统性能出现不少损失。
因为调用strncpy(dest, n, str)时,函数首先将字符从源缓冲区str逐个复制到目标缓冲区dest,直到拷贝了n碰上\0。
紧接着,strncpy函数会往buf填充\0字符直到写满n个字符。
所以我才会说上面的代码strncpy才会写了1024个字节。
可以做一个小实验:
看上面代码及输出结果,我们可以知道在执行strncpy之前dest是用'1'填充的,但在执行strncpy后,前面几个字符变成hello,后面的字符全变成\0;
我个人的解决方法是写一个宏专用于往字符数组拷贝的,与大家分享一下,抛砖引玉。
// 静态断言 从vc拷贝过来(_static_assert) 稍微修改了一下 // 原来是typedef char __static_assert_t[ (expr) ] // 现在是typedef char __static_assert_t[ (expr) - 1 ] // 原因是gcc支持0字符数组 //todo: 这里在win上编译有警告 有待优化 另外在linux宏好像不起作用 原因待查。暂时只有在win编译代码可以用 #ifndef _static_assert_rcc # ifdef __gnuc__ # define _static_assert_rcc(expr) typedef char __static_assert_t[ (expr) - 1 ] # else # define _static_assert_rcc(expr) do { typedef char __static_assert_t[ (expr) ]; } while (0) # endif #endif //将src复制到字符数组arr 保证不会越界并且末尾肯定会加\0 //_static_assert_rcc这里作用是防止有人传字符串指针进来 #define strncpy2arr(arr, src) do { \ char *dest_ = arr; \ size_t n = strnlen(src, sizeof(arr) - 1); \ _static_assert_rcc(sizeof(arr) != sizeof(char *)); \ memcpy(dest_, src, n); \ dest_[n] = '\0'; \ } while (0) #ifdef win32 int main(int argc, char *argv[]) { char dest[16]; char *src = "hello 222"; int i = 0; for (i = 0; i < sizeof(dest); ++i) { dest[i] = '1'; } printf("before strncpy\n"); for (i = 0; i < sizeof(dest); ++i) { printf("%d ", dest[i]); } printf("\n"); strncpy2arr(dest, src); printf("after strncpy\n"); for (i = 0; i < sizeof(dest); ++i) { printf("%d ", dest[i]); } printf("\n"); strncpy(dest, src, sizeof(dest)); printf("after strncpy\n"); for (i = 0; i < sizeof(dest); ++i) { printf("%d ", dest[i]); } printf("\n"); return 0; //return compressperformancetestmain(argc, argv); } #endif
2 慎用sprintf,因为它的效率比你想象的低
之前我一直没注意到sprintf效率低的问题,直到有一次使用callgrind对程序进行性能分析时,发现有相当大的资源消耗在sprintf上面,我才有所警觉。
为此,我写了一点测试代码,对常用的函数做了一下基准测试,结果如下:
测试内容 |
耗时(us) |
for循环赋值40亿次 |
13023889 |
调用简单函数40亿次 |
16967986 |
调用memset函数4亿次 (256个字节) |
6932237 |
调用strcpy函数4亿次 (12个字节) |
3239218 |
调用memcpy函数4亿次 (12个字节) |
3239201 |
调用strcmp函数4亿次 (12个字节) |
2500568 |
调用memcmp函数4亿次 (12个字节) |
2668378 |
调用strcpy函数4亿次 (74个字节) |
4951085 |
调用memcpy函数4亿次 (74个字节) |
4950890 |
调用strcmp函数4亿次 (74个字节) |
5551391 |
调用memcmp函数4亿次 (74个字节) |
3840448 |
调用sprintf函数8千万次 (约27个字节) |
21398106 |
调用scanf函数8千万次 (约27个字节) |
36158749 |
调用fwrite函数8千万次 |
5913579 |
调用fprintf函数8千万次 |
24806837 |
调用fread函数8千万次 |
3182704 |
调用fscanf函数8千万次 |
18739442 |
调用writelog函数20万次 (15个字节) |
4873746 |
调用writelog函数20万次 (47个字节) |
4846449 |
调用writelog函数20万次 (94个字节) |
4950448 |
|
|
1us = 1000ms
图示:scanf/printf系列函数耗时是其它常见字符串操作函数的10倍以上,甚至比io操作还耗时
测试代码见这里:
#define test_log_inf null, __file__, __line__ #ifdef win32 #define writelog lazy_log_output #define log_error null, __file__, __line__ #define log_key null, __file__, __line__ #define sleep(n) sleep(100 * n) int gettimeofday(struct timeval *tv, struct timezone *tz) { systemtime wtm; getlocaltime(&wtm); tv->tv_sec = (long)(wtm.wdayofweek * 24 * 3600 + wtm.whour * 3600 + wtm.wminute * 60 + wtm.wsecond); tv->tv_usec = wtm.wmilliseconds * 1000; return 0; } void initlog(const char *logname) { } #endif struct timeval begtimes = {0}, endtims = {0}; void begintimer() { gettimeofday(&begtimes, null); } int g_nsleepsec = 10; void stoptimer(char *userdata, const char *file, int fileno, int nsleepflag) { size_t totaltrantimes; gettimeofday(&endtims, null); totaltrantimes = (size_t)(endtims.tv_sec - begtimes.tv_sec) * 1000000 + (endtims.tv_usec - begtimes.tv_usec); #ifdef win32 writelog(userdata, file, fileno, "== == end == == == totaltrantimes %lu us", (unsigned long) totaltrantimes); #else writelog(2, file, fileno, "== == end == == == totaltrantimes %lu us", (unsigned long) totaltrantimes); #endif if (nsleepflag) { writelog(log_error, "sleep"); sleep(g_nsleepsec); } else { begintimer(); } } void performancetestlog(char *userdata, const char *file, int fileno, const char *log) { stoptimer(userdata, file, fileno, 1); #ifdef win32 writelog(userdata, file, fileno, "== == beg == == == %s", log); #else writelog(2, file, fileno, "== == beg == == == %s", log); #endif begintimer(); } int func(int argc, char *argv[], char *tmp) { tmp[argc] = '1'; return 0; } //基准测试 int basetest(unsigned long ntimes) { unsigned long i = 0; char tmp[256], t1[64], t2[64], t3[64]; int ntmp; const char *strwriten; ntimes *= 100000; //40亿 writelog(log_key, "basetest %lu", ntimes); begintimer(); performancetestlog(test_log_inf, "test for"); for (i = 0; i < ntimes; ++i) { i = i; } performancetestlog(test_log_inf, "test call func"); for (i = 0; i < ntimes; ++i) { func(1, null, tmp); } stoptimer(test_log_inf, 0); ntimes /= 10; //4亿 writelog(log_key, "basetest %lu", ntimes); performancetestlog(test_log_inf, "test memset"); for (i = 0; i < ntimes; ++i) { memset(tmp, 0, sizeof(tmp)); } performancetestlog(test_log_inf, "test strcpy"); for (i = 0; i < ntimes; ++i) { strcpy(tmp, "test strcpy"); } performancetestlog(test_log_inf, "test memcpy"); for (i = 0; i < ntimes; ++i) { memcpy(tmp, "test strcpy", sizeof("test strcpy")); } performancetestlog(test_log_inf, "test strcmp"); for (i = 0; i < ntimes; ++i) { if (0 == strcmp(tmp, "test strcpy")) { i = i; } } performancetestlog(test_log_inf, "test memcmp"); for (i = 0; i < ntimes; ++i) { if (0 == memcmp(tmp, "test strcpy", sizeof("test strcpy"))) { i = i; } } performancetestlog(test_log_inf, "test strcpy1"); for (i = 0; i < ntimes; ++i) { strcpy(tmp, "test strcpy test strcpy test strcpy test strcpy test strcpytest strcpy"); } performancetestlog(test_log_inf, "test memcpy1"); for (i = 0; i < ntimes; ++i) { memcpy(tmp, "test strcpy test strcpy test strcpy test strcpy test strcpytest strcpy", sizeof("test strcpy test strcpy test strcpy test strcpy test strcpytest strcpy")); } performancetestlog(test_log_inf, "test strcmp1"); for (i = 0; i < ntimes; ++i) { if (0 == strcmp(tmp, "test strcpy test strcpy test strcpy test strcpy test strcpytest strcpy")) { i = i; } } performancetestlog(test_log_inf, "test memcmp1"); for (i = 0; i < ntimes; ++i) { if (0 == memcmp(tmp, "test strcpy test strcpy test strcpy test strcpy test strcpytest strcpy", sizeof("test strcpy test strcpy test strcpy test strcpy test strcpytest strcpy"))) { i = i; } } stoptimer(test_log_inf, 0); ntimes /= 5; //8千万 writelog(log_key, "basetest %lu", ntimes); performancetestlog(test_log_inf, "test sprintf"); for (i = 0; i < ntimes; ++i) { sprintf(tmp, "thiis %s testing %d", "sprintf", i); } performancetestlog(test_log_inf, "test sscanf"); for (i = 0; i < ntimes; ++i) { sscanf(tmp, "%s %s %s %d", t1, t2, t3, &ntmp); } { file *fp; int nstr; performancetestlog(test_log_inf, "fopen"); fp = fopen("performancetest.txt", "w"); strwriten = "this is testing write\n"; nstr = strlen(strwriten); performancetestlog(test_log_inf, "test write file"); for (i = 0; i < ntimes; ++i) { fwrite(strwriten, 1, nstr, fp); } performancetestlog(test_log_inf, "fflush"); fflush(fp); performancetestlog(test_log_inf, "test fprintf file"); for (i = 0; i < ntimes; ++i) { //太过简单的fprintf好像会被自动优化成fwrite,即使没开优化选项 //例如 fprintf(fp, "%s", "strwriten"); fprintf(fp, "%s %d\n", "strwriten", i); } performancetestlog(test_log_inf, "fclose"); fclose(fp); } { file *fp; int nstr; performancetestlog(test_log_inf, "fopen 1"); fp = fopen("performancetest.txt", "r"); nstr = strlen(strwriten); performancetestlog(test_log_inf, "test read file"); for (i = 0; i < ntimes; ++i) { fread(tmp, 1, nstr, fp); tmp[nstr] = '\0'; } performancetestlog(test_log_inf, "test fscanf file"); tmp[0] = t1[0] = '\0'; for (i = 0; i < ntimes; ++i) { fscanf(fp, "%s %s", tmp, t1); } performancetestlog(test_log_inf, "fclose"); fclose(fp); } fclose(fopen("performancetest.txt", "w")); ntimes /= 400; //20万 writelog(log_key, "basetest %lu", ntimes); performancetestlog(test_log_inf, "writelog 1"); for (i = 0; i < ntimes; ++i) { writelog(log_error, "this is loging"); } performancetestlog(test_log_inf, "writelog 2"); for (i = 0; i < ntimes; ++i) { writelog(log_error, "this is loging this is loging this is loging"); } performancetestlog(test_log_inf, "writelog 3"); for (i = 0; i < ntimes; ++i) { writelog(log_error, "this is loging this is loging this is loging this is loging this is loging this is loging"); } stoptimer(test_log_inf, 0); return 0; }
从基准测试结果可以知道,sprintf系列函数效率是比较低的,是我们常见的字符串操作函数的1/10以下。
我个人的解决方案是sprintf该用还是用,但有些情况不是特别必要用的情况,用自己写一些小函数代替。例如下面这个宏是用来代替sprintf(buf, "%02d", i)的
//sprintf比较慢 这里需要写一些简单的字符串组装函数 //这个是代替%02d的(但不会添加\0结尾)顾名思义,传入的值需要保证0 <= vallue < 100 //再次提醒注意,这里为了方便调用,不会添加\0! 不会添加\0! 不会添加\0! #define itoalt100ge0(value, buff_output) do \ {\ int value_ = (int)(value);\ char *buff_output_ = (buff_output);\ if ((value_) >= 10) { int ndigit_ = value_ / 10; buff_output_[0] = '0' + ndigit_; buff_output_[1] = '0' + (value_ - ndigit_ * 10); }\ else { buff_output_[0] = '0'; buff_output_[1] = '0' + (value_); } \ } while (0)
总结一下就是:高并发交易需要慎用strncpy和sprintf,因为不恰当使用它们可能会成为程序性能瓶颈。
如果大家有啥想法,欢迎分享,我是黄词辉,一个程序员 ^_^