redis0.1源码解析之字典

程序员文章站 2022-05-20 22:13:39

...

字典也叫哈希表。看一下redis中的实现。下面是数据结构关系图。
redis0.1源码解析之字典
redis中，哈希表的设计思想是，申请一个指针数组，然后每个元素指向一个链表用来存储数据（即链地址法）。

1 创建一个字典

// 申请一个表示字典的数据结构
dict *dictCreate(dictType *type,
        void *privDataPtr)
{
    dict *ht = _dictAlloc(sizeof(*ht));

    _dictInit(ht,type,privDataPtr);
    return ht;
} 

// 初始化字典数据结构
int _dictInit(dict *ht, dictType *type,
        void *privDataPtr)
{
    _dictReset(ht);
    ht->type = type;
    ht->privdata = privDataPtr;
    return DICT_OK;
}

// 扩容
int dictResize(dict *ht)
{
    int minimal = ht->used;

    if (minimal < DICT_HT_INITIAL_SIZE)
        minimal = DICT_HT_INITIAL_SIZE;
    return dictExpand(ht, minimal);
}

首先调用申请一个dict结构体用来表示一个字典。然后初始化他的相关字段，最后调用dictExpand开辟保存数据的内存。

int dictExpand(dict *ht, unsigned long size)
{
    dict n; 
    // size是数组元素的个数，需要是2的倍数
    unsigned long realsize = _dictNextPower(size), i;

    _dictInit(&n, ht->type, ht->privdata);
    // 哈希表数组的长度
    n.size = realsize;
    // 索引掩码，用于计算索引
    n.sizemask = realsize-1;
    // 用作哈希表的指针数组
    n.table = _dictAlloc(realsize*sizeof(dictEntry*));
    // 初始化内存为0
    memset(n.table, 0, realsize*sizeof(dictEntry*));
    // 之前字典已经使用的项数，复制过来，used是字典里节点个数
    n.used = ht->used;
    for (i = 0; i < ht->size && ht->used > 0; i++) {
        dictEntry *he, *nextHe;
        // 空项，不需要复制
        if (ht->table[i] == NULL) continue;
        he = ht->table[i];
        while(he) {
            unsigned int h;
            // 保存下一个节点的地址，table的每一个项都是一个链表
            nextHe = he->next;  
            /* Get the new element index */
            // 重新计算he的索引
            h = dictHashKey(ht, he->key) & n.sizemask;
            // 插入索引为h的字典项中，头插法
            he->next = n.table[h];
            n.table[h] = he;
            // 使用数减一
            ht->used--;
            he = nextHe;
        }
    }
    assert(ht->used == 0);
    // 释放旧的内存
    _dictFree(ht->table);
    // 覆盖之前的信息
    *ht = n;
    return DICT_OK;
}

这样就形成了一个和文章开始那个图一样的结构。

2 往字典加入一个元素

int dictAdd(dict *ht, void *key, void *val)
{
    int index;
    dictEntry *entry;
    // 计算key是否已经存在，不存在则返回key对应的索引
    if ((index = _dictKeyIndex(ht, key)) == -1)
        return DICT_ERR;

    // 申请一个字典项
    entry = _dictAlloc(sizeof(*entry));
    // 头插法插入第index项对应的链表中
    entry->next = ht->table[index];
    ht->table[index] = entry;

    // 设置键值
    dictSetHashKey(ht, entry, key);
    dictSetHashVal(ht, entry, val);
    // 节点数加一
    ht->used++;
    return DICT_OK;
}

1 首先判断key是否已经存在，存在的话就无法添加。
2 根据key和哈希函数，计算对应的索引。
3 头插法插入对应索引的链表。

3 修改某个key对应的值

int dictReplace(dict *ht, void *key, void *val)
{
    dictEntry *entry;
    // 尝试新增，成功说明之前还没有这个key，否则说明存在，下面再替换
    if (dictAdd(ht, key, val) == DICT_OK)
        return DICT_OK;
    // 找到key对应的索引
    entry = dictFind(ht, key);
    // 释放当前的value
    dictFreeEntryVal(ht, entry);
    // 重新设置新值
    dictSetHashVal(ht, entry, val);
    return DICT_OK;
}

4 删除一个元素

static int dictGenericDelete(dict *ht, const void *key, int nofree)
{
    unsigned int h;
    dictEntry *he, *prevHe;
    // 空字典
    if (ht->size == 0)
        return DICT_ERR;
    // 算出索引
    h = dictHashKey(ht, key) & ht->sizemask;
    // 拿到索引对应的链表
    he = ht->table[h];

    prevHe = NULL;
    while(he) {
        // 比较key，找到返回true
        if (dictCompareHashKeys(ht, key, he->key)) {
            /* Unlink the element from the list */
            // 上一个不匹配节点的next指针指向待删除节点的下一个节点
            if (prevHe)
                prevHe->next = he->next;
            else
                // prevHe为空，说明待删除的节点是链表的第一个节点，则更新头指针指向待删除节点的下一个
                ht->table[h] = he->next;
            // 
            if (!nofree) {
                dictFreeEntryKey(ht, he);
                dictFreeEntryVal(ht, he);
            }
            // 删除这个节点
            _dictFree(he);
            // 已使用个数减一
            ht->used--;
            return DICT_OK;
        }
        // 保存上一个不匹配的
        prevHe = he;
        he = he->next;
    }
    return DICT_ERR; /* not found */
}

首先根据key找到索引的对应的链表，然后遍历链表找到key一样的节点，最后就是从链表中删除一个节点。
新增和删除等操作都涉及到查找元素，我们看看怎么查找。

5 查找

dictEntry *dictFind(dict *ht, const void *key)
{
    dictEntry *he;
    unsigned int h;

    if (ht->size == 0) return NULL;
    // 自定义的哈希函数钩子
    h = dictHashKey(ht, key) & ht->sizemask;
    he = ht->table[h];
    while(he) {
    	// 自定义的比较函数钩子
        if (dictCompareHashKeys(ht, key, he->key))
            return he;
        he = he->next;
    }
    return NULL;
}

我们可以看一下某个钩子的实现。

// 哈希函数
unsigned int dictGenHashFunction(const unsigned char *buf, int len) {
    unsigned int hash = 5381;

    while (len--)
        hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */
    return hash;
}
// 比较函数
static int _dictStringCopyHTKeyCompare(void *privdata, const void *key1,
        const void *key2)
{
    return strcmp(key1, key2) == 0;
}

总来的流程就是根据key产生一个索引，然后根据索引到哈希表中取得对应的一个链表，然后操作这个链表（新增，查找，删除）。

6 销毁字典

// 销毁整个字典
int _dictClear(dict *ht)
{
    unsigned long i;

    /* Free all the elements */
    for (i = 0; i < ht->size && ht->used > 0; i++) {
        dictEntry *he, *nextHe;
        // 空闲项，跳过
        if ((he = ht->table[i]) == NULL) continue;
        // 
        while(he) {
            // 先保存下一个节点地址，不然下面内存被释放就找不到了
            nextHe = he->next;
            // 释放键内存
            dictFreeEntryKey(ht, he);
            // 释放值内存
            dictFreeEntryVal(ht, he);
            // 释放节点内存
            _dictFree(he);
            // 使用数减一
            ht->used--;
            he = nextHe;
        }
    }
    /* Free the table and the allocated cache structure */
    // 释放字典结构体
    _dictFree(ht->table);
    /* Re-initialize the table */
    // 重置字段
    _dictReset(ht);
    return DICT_OK; /* never fails */
}

该函数只是释放字典里存储数据的内存，但是没有释放字典本身。下面函数可以释放字典本身的内存。

// 释放字典里的数据和字典本身
void dictRelease(dict *ht)
{
    _dictClear(ht);
    _dictFree(ht);
}

7 迭代字典
redis中实现了各种迭代器对某种数据结构进行迭代，比如字典。

// 申请字典迭代器
dictIterator *dictGetIterator(dict *ht)
{
    dictIterator *iter = _dictAlloc(sizeof(*iter));

    iter->ht = ht;
    iter->index = -1;
    iter->entry = NULL;
    iter->nextEntry = NULL;
    return iter;
}
// 迭代字典的元素，每次调用返回一个节点
dictEntry *dictNext(dictIterator *iter)
{
    while (1) {
        // 初始化时entry为NULL
        if (iter->entry == NULL) {
            iter->index++;
            // 到底了才break，而不是entry为空
            if (iter->index >=
                    (signed)iter->ht->size) break;
            // 返回索引为0对应的链表
            iter->entry = iter->ht->table[iter->index];
        } else {
            iter->entry = iter->nextEntry;
        }
        // 还有节点，则同时记录下一个节，返回entry节点
        if (iter->entry) {
            iter->nextEntry = iter->entry->next;
            return iter->entry;
        }
    }
    return NULL;
}

基本的逻辑就是，按照哈希表数组的索引，从小到大，每一个索引，先去遍历他对应的链表，遍历完后，继续下一个索引，直到最后一个索引结束。

总结，redis的字典里还有其他的一些方法，没有一一列举。有兴趣可以点这。

redis0.1源码解析之字典

1 创建一个字典

2 往字典加入一个元素

3 修改某个key对应的值

4 删除一个元素

5 查找

6 销毁字典

Spring源码解析之ConfigurableApplicationContext

angularjs 源码解析之injector

angularjs 源码解析之scope

Java并发之ReentrantLock类源码解析

Android图片加载利器之Picasso源码解析

Vue源码解析之Template转化为AST的实现方法

死磕 java同步系列之Phaser源码解析

asp.net abp模块化开发之通用树2：设计思路及源码解析

Java中的容器（集合）之ArrayList源码解析

死磕 java同步系列之CyclicBarrier源码解析——有图有真相