C基础 带你手写 redis sds
前言 - simple dynamic strings
antirez 想统一 redis,disque,hiredis 项目中 sds 代码, 因此构建了这个项目
. 更多介绍的背景知识, 可以阅读 readme.md.
sds 项目是 c 字符串数据结构在实际环境中一种取舍实现, 库本身是非线程安全的. 下面
来带大家手写相关代码, 透彻了解这个库的意图(antirez 注释的很棒).
#define sds_max_prealloc (1024*1024) /* note: sdshdr5 is never used, we just access the flags byte directly. * however is here to document the layout of type 5 sds strings. */ struct __attribute__ ((__packed__)) sdshdr5 { unsigned char flags; /* 3 lsb of type, and 5 msb of string length */ char buf[]; }; struct __attribute__ ((__packed__)) sdshdr8 { uint8_t len; /* used */ uint8_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; }; struct __attribute__ ((__packed__)) sdshdr16 { uint16_t len; /* used */ uint16_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; }; struct __attribute__ ((__packed__)) sdshdr32 { uint32_t len; /* used */ uint32_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; }; struct __attribute__ ((__packed__)) sdshdr64 { uint64_t len; /* used */ uint64_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; }; #define sds_type_5 0 #define sds_type_8 1 #define sds_type_16 2 #define sds_type_32 3 #define sds_type_64 4 #define sds_type_mask 7 #define sds_type_bits 3 #define sds_type_5_len(f) ((f)>>sds_type_bits) #define sds_hdr(t, s) ((struct sdshdr##t *)((s) - (sizeof(struct sdshdr##t)))) #define sds_hdr_var(t, s) struct sdshdr##t * sh = sds_hdr(t, s)
可以先分析 sdshdr5 sdshdr8 sdshdr16 sdshdr32 sdshdr64, 感受作者的意图.
首先这几个基本结构, 都有 len, alloc, flags, buf 4 个字段, 是不是. 有的同学会问, sdshdr5
中没有 alloc 和 len 字段呀, 这个啊 sdshdr5 结构比较特殊, 其 alloc 和 len 都隐含在 flags
中, 三者复用一个字段. 可以从函数宏 sds_type_5_len(f), 可以看出来. 因而 sdshdr5 表达的字符
串长度和字符串容量默认相同. 再考究一下 __attribute__ ((__packed__)) 意图(告诉编译器取消结
构在编译过程中的优化对齐, 按照实际占用字节数进行对齐). 对于取消结构内存编译对齐优化, 我的考究有
两点, 1 节约内存, 2 内存可移植变强.
随后多数是流水线代码, 非常好理解. 例如有段这样关系的代码 sdsalloc() = sdsavail() + sdslen()
inline size_t sdslen(const sds s) { unsigned char flags = s[-1]; switch (flags & sds_type_mask) { case sds_type_5 : return sds_type_5_len(flags); case sds_type_8 : return sds_hdr(8 , s)->len; case sds_type_16: return sds_hdr(16, s)->len; case sds_type_32: return sds_hdr(32, s)->len; case sds_type_64: return sds_hdr(64, s)->len; } return 0; } inline size_t sdsavail(const sds s) { unsigned char flags = s[-1]; switch (flags & sds_type_mask) { case sds_type_8 : { sds_hdr_var(8 , s); return sh->alloc - sh->len; } case sds_type_16: { sds_hdr_var(16, s); return sh->alloc - sh->len; } case sds_type_32: { sds_hdr_var(32, s); return sh->alloc - sh->len; } case sds_type_64: { sds_hdr_var(64, s); return sh->alloc - sh->len; } default: return 0; } } /* sdsalloc() = sdsavail() + sdslen() */ inline size_t sdsalloc(const sds s) { unsigned char flags = s[-1]; switch (flags & sds_type_mask) { case sds_type_5 : return sds_type_5_len(flags); case sds_type_8 : return sds_hdr(8 , s)->alloc; case sds_type_16: return sds_hdr(16, s)->alloc; case sds_type_32: return sds_hdr(32, s)->alloc; case sds_type_64: return sds_hdr(64, s)->alloc; } return 0; }
是不是一下就理解了 sdsalloc(), sdsavail(), sdslen() 是干什么的呢 ❤
正文 - 代码抽样讲解
1. 重复代码可以修的更好
/* helper for sdscatlonglong() doing the actual number -> string * conversion. 's' must point to a string with room for at least * sds_llstr_size bytes. * * the function returns the length of the null-terminated string * representation stored at 's'. */ #define sds_llstr_size 21 int sdsll2str(char *s, long long value) { char *p, aux; unsigned long long v; size_t l; /* generate the string representation, this method produces * an reversed string. */ v = (value < 0) ? -value : value; p = s; do { *p++ = '0'+(v%10); v /= 10; } while(v); if (value < 0) *p++ = '-'; /* compute length and add null term. */ l = p-s; *p = '\0'; /* reverse the string. */ p--; while(s < p) { aux = *s; *s = *p; *p = aux; s++; p--; } return l; } /* identical sdsll2str(), but for unsigned long long type. */ int sdsull2str(char *s, unsigned long long v) { char *p, aux; size_t l; /* generate the string representation, this method produces * an reversed string. */ p = s; do { *p++ = '0'+(v%10); v /= 10; } while(v); /* compute length and add null term. */ l = p-s; *p = '\0'; /* reverse the string. */ p--; while(s < p) { aux = *s; *s = *p; *p = aux; s++; p--; } return l; }
long long or unsigned long long convert to char * 中, 功能很简单. 函数结尾处代码
重复不少, 是可以重构复用的.
inline int sdsreverse(char * s, char * p) { /* compute length and add null term. */ size_t l = p - s; *p = '\0'; p--; while (s < p) { char aux = *s; *s = *p; *p = aux; s++; p--; } return (int)l; } /* helper for sdscatlonglong() doing the actual number -> string * conversion. 's' must point to a string with room for at least * sds_llstr_size bytes. * * the function returns the length of the null-terminated string * representation stored at 's'. */ #define sds_llstr_size 21 int sdsll2str(char * s, long long value) { char * p; unsigned long long v; /* generate the string representation, this method produces * an reversed string. */ v = (value < 0) ? -value : value; p = s; do { *p++ = '0' + (v % 10); } while ((v /= 10)); if (value < 0) *p++ = '-'; return sdsreverse(s, p); } /* identical sdsll2str(), but for unsigned long long type. */ int sdsull2str(char * s, unsigned long long v) { /* generate the string representation, this method produces * an reversed string. */ char * p = s; do { *p++ = '0' + (v % 10); } while ((v /= 10)); return sdsreverse(s, p); }
是不是显得老学究气质凸显了不少.
2. vsnprintf 用的太硬邦邦
/* like sdscatprintf() but gets va_list instead of being variadic. */ sds sdscatvprintf(sds s, const char *fmt, va_list ap) { va_list cpy; char staticbuf[1024], *buf = staticbuf, *t; size_t buflen = strlen(fmt)*2; /* we try to start using a static buffer for speed. * if not possible we revert to heap allocation. */ if (buflen > sizeof(staticbuf)) { buf = s_malloc(buflen); if (buf == null) return null; } else { buflen = sizeof(staticbuf); } /* try with buffers two times bigger every time we fail to * fit the string in the current buffer size. */ while(1) { buf[buflen-2] = '\0'; va_copy(cpy,ap); vsnprintf(buf, buflen, fmt, cpy); va_end(cpy); if (buf[buflen-2] != '\0') { if (buf != staticbuf) s_free(buf); buflen *= 2; buf = s_malloc(buflen); if (buf == null) return null; continue; } break; } /* finally concat the obtained string to the sds string and return it. */ t = sdscat(s, buf); if (buf != staticbuf) s_free(buf); return t; }
由 while vsnprintf 来扩容, 这实在太暴力. 更好操作可以观看 man vsnprintf/ 这里可以看我的提交
/* like sdscatprintf() but gets va_list instead of being variadic. */ sds sdscatvprintf(sds s, const char * fmt, va_list ap) { int size; va_list cpy; char staticbuf[1024], * buf, * t; /* determine required size */ va_copy(cpy, ap); size = vsnprintf(null, 0, fmt, cpy); va_end(cpy); if (size < 0) return null; /* for '\0' */ size++; /* we try to start using a static buffer for speed. * if not possible we revert to heap allocation. */ if (size > sizeof(staticbuf)) { buf = s_malloc(size); if (buf == null) return null; } else { buf = staticbuf; } va_copy(cpy, ap); size = vsnprintf(buf, size, fmt, cpy); va_end(ap); if (size < 0) { if (buf != staticbuf) s_free(buf); return null; } /* finally concat the obtained string to the sds string and return it. */ t = sdscat(s, buf); if (buf != staticbuf) s_free(buf); return t; }
别问, 问就是上天.
3. sdssplitargs 难以让人见色起意
/* helper function for sdssplitargs() that returns non zero if 'c' * is a valid hex digit. */ int is_hex_digit(char c) { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'a' && c <= 'f'); } /* helper function for sdssplitargs() that converts a hex digit into an * integer from 0 to 15 */ int hex_digit_to_int(char c) { switch(c) { case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4; case '5': return 5; case '6': return 6; case '7': return 7; case '8': return 8; case '9': return 9; case 'a': case 'a': return 10; case 'b': case 'b': return 11; case 'c': case 'c': return 12; case 'd': case 'd': return 13; case 'e': case 'e': return 14; case 'f': case 'f': return 15; default: return 0; } } /* split a line into arguments, where every argument can be in the * following programming-language repl-alike form: * * foo bar "newline are supported\n" and "\xff\x00otherstuff" * * the number of arguments is stored into *argc, and an array * of sds is returned. * * the caller should free the resulting array of sds strings with * sdsfreesplitres(). * * note that sdscatrepr() is able to convert back a string into * a quoted string in the same format sdssplitargs() is able to parse. * * the function returns the allocated tokens on success, even when the * input string is empty, or null if the input contains unbalanced * quotes or closed quotes followed by non space characters * as in: "foo"bar or "foo' */ sds *sdssplitargs(const char *line, int *argc) { const char *p = line; char *current = null; char **vector = null; *argc = 0; while(1) { /* skip blanks */ while(*p && isspace(*p)) p++; if (*p) { /* get a token */ int inq=0; /* set to 1 if we are in "quotes" */ int insq=0; /* set to 1 if we are in 'single quotes' */ int done=0; if (current == null) current = sdsempty(); while(!done) { if (inq) { if (*p == '\\' && *(p+1) == 'x' && is_hex_digit(*(p+2)) && is_hex_digit(*(p+3))) { unsigned char byte; byte = (hex_digit_to_int(*(p+2))*16)+ hex_digit_to_int(*(p+3)); current = sdscatlen(current,(char*)&byte,1); p += 3; } else if (*p == '\\' && *(p+1)) { char c; p++; switch(*p) { case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'b': c = '\b'; break; case 'a': c = '\a'; break; default: c = *p; break; } current = sdscatlen(current,&c,1); } else if (*p == '"') { /* closing quote must be followed by a space or * nothing at all. */ if (*(p+1) && !isspace(*(p+1))) goto err; done=1; } else if (!*p) { /* unterminated quotes */ goto err; } else { current = sdscatlen(current,p,1); } } else if (insq) { if (*p == '\\' && *(p+1) == '\'') { p++; current = sdscatlen(current,"'",1); } else if (*p == '\'') { /* closing quote must be followed by a space or * nothing at all. */ if (*(p+1) && !isspace(*(p+1))) goto err; done=1; } else if (!*p) { /* unterminated quotes */ goto err; } else { current = sdscatlen(current,p,1); } } else { switch(*p) { case ' ': case '\n': case '\r': case '\t': case '\0': done=1; break; case '"': inq=1; break; case '\'': insq=1; break; default: current = sdscatlen(current,p,1); break; } } if (*p) p++; } /* add the token to the vector */ vector = s_realloc(vector,((*argc)+1)*sizeof(char*)); vector[*argc] = current; (*argc)++; current = null; } else { /* even on empty input string return something not null. */ if (vector == null) vector = s_malloc(sizeof(void*)); return vector; } } err: while((*argc)--) sdsfree(vector[*argc]); s_free(vector); if (current) sdsfree(current); *argc = 0; return null; } /* free the result returned by sdssplitlen(), or do nothing if 'tokens' is null. */ void sdsfreesplitres(sds *tokens, int count) { if (!tokens) return; while(count--) sdsfree(tokens[count]); s_free(tokens); }
sdssplitargs 函数开始不好理解, 不过我这写了个 demo 分享给大家
#include <stdio.h> #include "sds.h" int main(int argc, char * argv[]) { int count; const char * line = " hset name \"name:filed\" \"value:field\" "; sds * tokens = sdssplitargs(line, &count); printf("line = [%s], count = [%d]\n", line, count); for (int i = 0; i < count; i++) { printf("tokens[%d] = [%s]\n", i, tokens[i]); } sdsfreesplitres(tokens, count); return 0; }
输出
line = [ hset name "name:filed" "value:field" ], count = [4] tokens[0] = [hset] tokens[1] = [name] tokens[2] = [name:filed] tokens[3] = [value:field]
是不是瞬间开悟 -> sdssplitargs 到底要闹那样!
整体写下来, 感受是 antirez sds 实现的很四平八稳, 没有什么花哨的地方.
感兴趣的朋友写写, 收获少不了 ~
后记 - 再接再厉
错误是难免, 欢迎交流吹水 ~
-
古朗月行(唐·李白) 小时不识月,呼作白玉盘。 又疑瑶台镜,飞在青云端。 仙人垂两足,桂树作团团。 白兔捣药成,问言与谁餐。 蟾蜍蚀圆影,大明夜已残。 羿昔落九乌,天人清且安。 阴精此沦惑,去去不足观。 忧来其如何,凄怆摧心肝。