C基础带你手写 redis sds

程序员文章站 2022-03-25 21:25:21

前言 - Simple Dynamic Strings antirez 想统一 Redis，Disque，Hiredis 项目中 SDS 代码, 因此构建了这个项目 https://github.com/antirez/sds . 更多介绍的背景知识, 可以阅读 README.md. sds 项目是 ......

前言 - simple dynamic strings

　　 antirez 想统一 redis，disque，hiredis 项目中 sds 代码, 因此构建了这个项目

. 更多介绍的背景知识, 可以阅读 readme.md.

　　sds 项目是 c 字符串数据结构在实际环境中一种取舍实现, 库本身是非线程安全的. 下面

来带大家手写相关代码, 透彻了解这个库的意图(antirez 注释的很棒).

#define sds_max_prealloc  (1024*1024)

/* note: sdshdr5 is never used, we just access the flags byte directly.
 * however is here to document the layout of type 5 sds strings. */
struct __attribute__ ((__packed__)) sdshdr5 {
    unsigned char flags;    /* 3 lsb of type, and 5 msb of string length */
    char buf[];
};

struct __attribute__ ((__packed__)) sdshdr8 {
    uint8_t len;            /* used */
    uint8_t alloc;          /* excluding the header and null terminator */
    unsigned char flags;    /* 3 lsb of type, 5 unused bits */
    char buf[];
};

struct __attribute__ ((__packed__)) sdshdr16 {
    uint16_t len;           /* used */
    uint16_t alloc;         /* excluding the header and null terminator */
    unsigned char flags;    /* 3 lsb of type, 5 unused bits */
    char buf[];
};

struct __attribute__ ((__packed__)) sdshdr32 {
    uint32_t len;           /* used */
    uint32_t alloc;         /* excluding the header and null terminator */
    unsigned char flags;    /* 3 lsb of type, 5 unused bits */
    char buf[];
};

struct __attribute__ ((__packed__)) sdshdr64 {
    uint64_t len;           /* used */
    uint64_t alloc;         /* excluding the header and null terminator */
    unsigned char flags;    /* 3 lsb of type, 5 unused bits */
    char buf[]; 
};

#define sds_type_5    0
#define sds_type_8    1
#define sds_type_16   2
#define sds_type_32   3
#define sds_type_64   4
#define sds_type_mask 7
#define sds_type_bits 3
#define sds_type_5_len(f) ((f)>>sds_type_bits)
#define sds_hdr(t, s)     ((struct sdshdr##t *)((s) - (sizeof(struct sdshdr##t))))
#define sds_hdr_var(t, s) struct sdshdr##t * sh = sds_hdr(t, s)

可以先分析 sdshdr5 sdshdr8 sdshdr16 sdshdr32 sdshdr64, 感受作者的意图.

首先这几个基本结构, 都有 len, alloc, flags, buf 4 个字段, 是不是. 有的同学会问, sdshdr5

中没有 alloc 和 len 字段呀, 这个啊 sdshdr5 结构比较特殊, 其 alloc 和 len 都隐含在 flags

中, 三者复用一个字段. 可以从函数宏 sds_type_5_len(f), 可以看出来. 因而 sdshdr5 表达的字符

串长度和字符串容量默认相同. 再考究一下 __attribute__ ((__packed__)) 意图(告诉编译器取消结

构在编译过程中的优化对齐, 按照实际占用字节数进行对齐). 对于取消结构内存编译对齐优化, 我的考究有

两点, 1 节约内存, 2 内存可移植变强.

随后多数是流水线代码, 非常好理解. 例如有段这样关系的代码 sdsalloc() = sdsavail() + sdslen()

inline size_t sdslen(const sds s) {
    unsigned char flags = s[-1];
    switch (flags & sds_type_mask) {
    case sds_type_5 :
        return sds_type_5_len(flags);
    case sds_type_8 :
        return sds_hdr(8 , s)->len;
    case sds_type_16:
        return sds_hdr(16, s)->len;
    case sds_type_32:
        return sds_hdr(32, s)->len;
    case sds_type_64:
        return sds_hdr(64, s)->len;
    }
    return 0;
}

inline size_t sdsavail(const sds s) {
    unsigned char flags = s[-1];
    switch (flags & sds_type_mask) {
    case sds_type_8 : {
        sds_hdr_var(8 , s);
        return sh->alloc - sh->len;       
    }
    case sds_type_16: {
        sds_hdr_var(16, s);
        return sh->alloc - sh->len;       
    }
    case sds_type_32: {
        sds_hdr_var(32, s);
        return sh->alloc - sh->len;       
    }
    case sds_type_64: {
        sds_hdr_var(64, s);
        return sh->alloc - sh->len;       
    }
    default:
        return 0;
    }  
}

/* sdsalloc() = sdsavail() + sdslen() */
inline size_t sdsalloc(const sds s) {
    unsigned char flags = s[-1];
    switch (flags & sds_type_mask) {
    case sds_type_5 :
        return sds_type_5_len(flags);
    case sds_type_8 :
        return sds_hdr(8 , s)->alloc;
    case sds_type_16:
        return sds_hdr(16, s)->alloc;
    case sds_type_32:
        return sds_hdr(32, s)->alloc;
    case sds_type_64:
        return sds_hdr(64, s)->alloc;
    }
    return 0;
}

是不是一下就理解了 sdsalloc(), sdsavail(), sdslen() 是干什么的呢 ❤

正文 - 代码抽样讲解

1. 重复代码可以修的更好

/* helper for sdscatlonglong() doing the actual number -> string
 * conversion. 's' must point to a string with room for at least
 * sds_llstr_size bytes.
 *
 * the function returns the length of the null-terminated string
 * representation stored at 's'. */
#define sds_llstr_size 21
int sdsll2str(char *s, long long value) {
    char *p, aux;
    unsigned long long v;
    size_t l;

    /* generate the string representation, this method produces
     * an reversed string. */
    v = (value < 0) ? -value : value;
    p = s;
    do {
        *p++ = '0'+(v%10);
        v /= 10;
    } while(v);
    if (value < 0) *p++ = '-';

    /* compute length and add null term. */
    l = p-s;
    *p = '\0';

    /* reverse the string. */
    p--;
    while(s < p) {
        aux = *s;
        *s = *p;
        *p = aux;
        s++;
        p--;
    }
    return l;
}

/* identical sdsll2str(), but for unsigned long long type. */
int sdsull2str(char *s, unsigned long long v) {
    char *p, aux;
    size_t l;

    /* generate the string representation, this method produces
     * an reversed string. */
    p = s;
    do {
        *p++ = '0'+(v%10);
        v /= 10;
    } while(v);

    /* compute length and add null term. */
    l = p-s;
    *p = '\0';

    /* reverse the string. */
    p--;
    while(s < p) {
        aux = *s;
        *s = *p;
        *p = aux;
        s++;
        p--;
    }
    return l;
}

long long or unsigned long long convert to char * 中, 功能很简单. 函数结尾处代码

重复不少, 是可以重构复用的.

inline int sdsreverse(char * s, char * p) {
    /* compute length and add null term. */
    size_t l = p - s;
    *p = '\0';

    p--;
    while (s < p) {
        char aux = *s;
        *s = *p;
        *p = aux;

        s++;
        p--;
    }

    return (int)l;
}

/* helper for sdscatlonglong() doing the actual number -> string
 * conversion. 's' must point to a string with room for at least
 * sds_llstr_size bytes.
 *
 * the function returns the length of the null-terminated string
 * representation stored at 's'. */
#define sds_llstr_size 21
int sdsll2str(char * s, long long value) {
    char * p;
    unsigned long long v;

    /* generate the string representation, this method produces
     * an reversed string. */
    v = (value < 0) ? -value : value;
    p = s;
    do {
        *p++ = '0' + (v % 10);
    } while ((v /= 10));
    if (value < 0) *p++ = '-';

    return sdsreverse(s, p);
}

/* identical sdsll2str(), but for unsigned long long type. */
int sdsull2str(char * s, unsigned long long v) {
    /* generate the string representation, this method produces
     * an reversed string. */
    char * p = s;
    do {
        *p++ = '0' + (v % 10);
    } while ((v /= 10));

    return sdsreverse(s, p);
}

是不是显得老学究气质凸显了不少.

2. vsnprintf 用的太硬邦邦

/* like sdscatprintf() but gets va_list instead of being variadic. */
sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
    va_list cpy;
    char staticbuf[1024], *buf = staticbuf, *t;
    size_t buflen = strlen(fmt)*2;

    /* we try to start using a static buffer for speed.
     * if not possible we revert to heap allocation. */
    if (buflen > sizeof(staticbuf)) {
        buf = s_malloc(buflen);
        if (buf == null) return null;
    } else {
        buflen = sizeof(staticbuf);
    }

    /* try with buffers two times bigger every time we fail to
     * fit the string in the current buffer size. */
    while(1) {
        buf[buflen-2] = '\0';
        va_copy(cpy,ap);
        vsnprintf(buf, buflen, fmt, cpy);
        va_end(cpy);
        if (buf[buflen-2] != '\0') {
            if (buf != staticbuf) s_free(buf);
            buflen *= 2;
            buf = s_malloc(buflen);
            if (buf == null) return null;
            continue;
        }
        break;
    }

    /* finally concat the obtained string to the sds string and return it. */
    t = sdscat(s, buf);
    if (buf != staticbuf) s_free(buf);
    return t;
}

由 while vsnprintf 来扩容, 这实在太暴力. 更好操作可以观看 man vsnprintf/ 这里可以看我的提交

/* like sdscatprintf() but gets va_list instead of being variadic. */
sds sdscatvprintf(sds s, const char * fmt, va_list ap) {
    int size;
    va_list cpy;
    char staticbuf[1024], * buf, * t;

    /* determine required size */
    va_copy(cpy, ap);
    size = vsnprintf(null, 0, fmt, cpy);
    va_end(cpy);

    if (size < 0) return null;

    /* for '\0' */
    size++;

    /* we try to start using a static buffer for speed.
     * if not possible we revert to heap allocation. */
    if (size > sizeof(staticbuf)) {
        buf = s_malloc(size);
        if (buf == null) return null;        
    } else {
        buf = staticbuf;
    }

    va_copy(cpy, ap);
    size = vsnprintf(buf, size, fmt, cpy);
    va_end(ap);

    if (size < 0) {
        if (buf != staticbuf) s_free(buf);
        return null;
    }

    /* finally concat the obtained string to the sds string and return it. */
    t = sdscat(s, buf);
    if (buf != staticbuf) s_free(buf);
    return t;
}

别问, 问就是上天.

3. sdssplitargs 难以让人见色起意

/* helper function for sdssplitargs() that returns non zero if 'c'
 * is a valid hex digit. */
int is_hex_digit(char c) {
    return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
           (c >= 'a' && c <= 'f');
}

/* helper function for sdssplitargs() that converts a hex digit into an
 * integer from 0 to 15 */
int hex_digit_to_int(char c) {
    switch(c) {
    case '0': return 0;
    case '1': return 1;
    case '2': return 2;
    case '3': return 3;
    case '4': return 4;
    case '5': return 5;
    case '6': return 6;
    case '7': return 7;
    case '8': return 8;
    case '9': return 9;
    case 'a': case 'a': return 10;
    case 'b': case 'b': return 11;
    case 'c': case 'c': return 12;
    case 'd': case 'd': return 13;
    case 'e': case 'e': return 14;
    case 'f': case 'f': return 15;
    default: return 0;
    }
}

/* split a line into arguments, where every argument can be in the
 * following programming-language repl-alike form:
 *
 * foo bar "newline are supported\n" and "\xff\x00otherstuff"
 *
 * the number of arguments is stored into *argc, and an array
 * of sds is returned.
 *
 * the caller should free the resulting array of sds strings with
 * sdsfreesplitres().
 *
 * note that sdscatrepr() is able to convert back a string into
 * a quoted string in the same format sdssplitargs() is able to parse.
 *
 * the function returns the allocated tokens on success, even when the
 * input string is empty, or null if the input contains unbalanced
 * quotes or closed quotes followed by non space characters
 * as in: "foo"bar or "foo'
 */
sds *sdssplitargs(const char *line, int *argc) {
    const char *p = line;
    char *current = null;
    char **vector = null;

    *argc = 0;
    while(1) {
        /* skip blanks */
        while(*p && isspace(*p)) p++;
        if (*p) {
            /* get a token */
            int inq=0;  /* set to 1 if we are in "quotes" */
            int insq=0; /* set to 1 if we are in 'single quotes' */
            int done=0;

            if (current == null) current = sdsempty();
            while(!done) {
                if (inq) {
                    if (*p == '\\' && *(p+1) == 'x' &&
                                             is_hex_digit(*(p+2)) &&
                                             is_hex_digit(*(p+3)))
                    {
                        unsigned char byte;

                        byte = (hex_digit_to_int(*(p+2))*16)+
                                hex_digit_to_int(*(p+3));
                        current = sdscatlen(current,(char*)&byte,1);
                        p += 3;
                    } else if (*p == '\\' && *(p+1)) {
                        char c;

                        p++;
                        switch(*p) {
                        case 'n': c = '\n'; break;
                        case 'r': c = '\r'; break;
                        case 't': c = '\t'; break;
                        case 'b': c = '\b'; break;
                        case 'a': c = '\a'; break;
                        default: c = *p; break;
                        }
                        current = sdscatlen(current,&c,1);
                    } else if (*p == '"') {
                        /* closing quote must be followed by a space or
                         * nothing at all. */
                        if (*(p+1) && !isspace(*(p+1))) goto err;
                        done=1;
                    } else if (!*p) {
                        /* unterminated quotes */
                        goto err;
                    } else {
                        current = sdscatlen(current,p,1);
                    }
                } else if (insq) {
                    if (*p == '\\' && *(p+1) == '\'') {
                        p++;
                        current = sdscatlen(current,"'",1);
                    } else if (*p == '\'') {
                        /* closing quote must be followed by a space or
                         * nothing at all. */
                        if (*(p+1) && !isspace(*(p+1))) goto err;
                        done=1;
                    } else if (!*p) {
                        /* unterminated quotes */
                        goto err;
                    } else {
                        current = sdscatlen(current,p,1);
                    }
                } else {
                    switch(*p) {
                    case ' ':
                    case '\n':
                    case '\r':
                    case '\t':
                    case '\0':
                        done=1;
                        break;
                    case '"':
                        inq=1;
                        break;
                    case '\'':
                        insq=1;
                        break;
                    default:
                        current = sdscatlen(current,p,1);
                        break;
                    }
                }
                if (*p) p++;
            }
            /* add the token to the vector */
            vector = s_realloc(vector,((*argc)+1)*sizeof(char*));
            vector[*argc] = current;
            (*argc)++;
            current = null;
        } else {
            /* even on empty input string return something not null. */
            if (vector == null) vector = s_malloc(sizeof(void*));
            return vector;
        }
    }

err:
    while((*argc)--)
        sdsfree(vector[*argc]);
    s_free(vector);
    if (current) sdsfree(current);
    *argc = 0;
    return null;
}

/* free the result returned by sdssplitlen(), or do nothing if 'tokens' is null. */
void sdsfreesplitres(sds *tokens, int count) {
    if (!tokens) return;
    while(count--)
        sdsfree(tokens[count]);
    s_free(tokens);
}

sdssplitargs 函数开始不好理解, 不过我这写了个 demo 分享给大家

#include <stdio.h>

#include "sds.h"

int main(int argc, char * argv[]) {
    int count;
    const char * line = " hset name \"name:filed\" \"value:field\" ";
    sds * tokens = sdssplitargs(line, &count);

    printf("line = [%s], count = [%d]\n", line, count);
    for (int i = 0; i < count; i++) {
        printf("tokens[%d] = [%s]\n", i, tokens[i]);
    }

    sdsfreesplitres(tokens, count);

    return 0;
}

输出

line = [ hset name "name:filed" "value:field" ], count = [4]
tokens[0] = [hset]
tokens[1] = [name]
tokens[2] = [name:filed]
tokens[3] = [value:field]

是不是瞬间开悟 -> sdssplitargs 到底要闹那样!

整体写下来, 感受是 antirez sds 实现的很四平八稳, 没有什么花哨的地方.

感兴趣的朋友写写, 收获少不了 ~

后记 - 再接再厉

错误是难免, 欢迎交流吹水 ~

古朗月行（唐·李白）
小时不识月，呼作白玉盘。
又疑瑶台镜，飞在青云端。
仙人垂两足，桂树作团团。
白兔捣药成，问言与谁餐。
蟾蜍蚀圆影，大明夜已残。
羿昔落九乌，天人清且安。
阴精此沦惑，去去不足观。
忧来其如何，凄怆摧心肝。

上一篇： Java变量常量声明和定义

下一篇： Uncaught TypeError: xx.split is not a function

C基础带你手写 redis sds

C基础带你手写 redis adlist 双向链表

Redis 字符串实现：简单动态字符串-SDS与C字符串的区别

一篇文章带你入门Redis（小白的Redis搭建与基础命令、知识）

C基础带你手写 redis adlist 双向链表

Redis 字符串实现：简单动态字符串-SDS与C字符串的区别