FRR - 基础数据结构篇 - 数据结构 hash.c
程序员文章站
2024-02-13 20:46:10
...
frr底层中hash表的不同之处
这里用到的hash表也是传统的数组桶链方式,有几点不同,同样也很精妙的点:
1:数组每个元素不是单纯一个指针,也是一个桶元素。并且只有数组上的桶元素会记录该桶所在的链上所有桶的个数。为了描述方便,直接将数组元素上的桶联合挂在该桶上的所有桶统称为桶链。
2:hash表主体结构维护有一些很有价值的数据,例如,
2.1 max_size,hash表数组的最大长度,避免表无上限扩容;
2.2 count,已分配内存的桶个数,也是hash表中挂载数据的个数,也便于分析相关内存分配的问题;
2.3 hashstats中的数据,尤其是ssq,每次hash节点变动都会修改,记录了整个表的过程动态变化,从一个过程的角度来评定hash函数的好坏
数据结构
//hash主体的结构
struct hash {
/* Hash bucket. */
struct hash_bucket **index; //桶链的hash表
/* Hash table size. Must be power of 2 */ //2的幂能够保证hash_key%size == hash_key&(size-1),提高计算效率
unsigned int size; //hash表数组的长度
/* If max_size is 0 there is no limit */
unsigned int max_size; //hash数组长度的限制,扩容时会用
/* Key make function. */
unsigned int (*hash_key)(const void *); //hash函数
/* Data compare function. */
bool (*hash_cmp)(const void *, const void *); //查找和跟新桶里数据会用到的比较回调函数
/* Backet alloc. */
unsigned long count; //已分配内存的桶个数
struct hashstats stats;
/* hash name */
char *name;
};
//用于计算负载,慢负载和标准差等一系列评定hash函数的参数
struct hashstats {
/* number of empty hash buckets */
atomic_uint_fast32_t empty; //空数组元素的个数
/* sum of squares of bucket length */
atomic_uint_fast32_t ssq; //记录hash表每次跟新桶的过程负载因子
//用于评定hash函数的优劣
};
struct hash_bucket {
/*
* if this bucket is the head of the linked listed, len denotes the
* number of elements in the list
*/
//只有链首的桶中记录非0值,也是该链上所有桶的总个数,以此值区分链首与链上其他桶节点
int len; //记录的桶个数
/* Linked list. */
struct hash_bucket *next;
/* Hash key. */
unsigned int key; //经hash函数计算过多的key值
/* Data. */
void *data; //挂在桶上的数据
};
函数
1:hash扩容
/* Expand hash if the chain length exceeds the threshold. */
static void hash_expand(struct hash *hash)
{
unsigned int i, new_size;
struct hash_bucket *hb, *hbnext, **new_index;
new_size = hash->size * 2;
//max_size 为0则不作最大长度限制
if (hash->max_size && new_size > hash->max_size)
return;
new_index = XCALLOC(MTYPE_HASH_INDEX,
sizeof(struct hash_bucket *) * new_size);
hash->stats.empty = new_size;
//遍历数组上每个桶链
for (i = 0; i < hash->size; i++)
//遍历每个链表
for (hb = hash->index[i]; hb; hb = hbnext) {
unsigned int h = hb->key & (new_size - 1); //要处理的桶元素
hbnext = hb->next; //记录下一个桶元素
hb->next = new_index[h]; //旧表中每个元素插在新表新桶链的首元素位置上
int oldlen = hb->next ? hb->next->len : 0; //将元素插入前,新表新桶链是否为空
int newlen = oldlen + 1;
if (newlen == 1)
hash->stats.empty--; //插入前非空桶链,新表空桶链少了一个
else
hb->next->len = 0;
hb->len = newlen; //记录新长度
hash_update_ssq(hash, oldlen, newlen); //跟新ssq值
new_index[h] = hb;
}
/* Switch to new table */
XFREE(MTYPE_HASH_INDEX, hash->index);
hash->size = new_size;
hash->index = new_index;
}
//关于ssq跟新的宏
#define hash_update_ssq(hz, old, new) \
atomic_fetch_add_explicit(&hz->stats.ssq, (new + old) * (new - old), \
memory_order_relaxed);
#define atomic_fetch_add_explicit(ptr, val, mem) \
({ \
__sync_synchronize(); \
typeof(*ptr) rval = __sync_fetch_and_add((ptr), (val)); \
__sync_synchronize(); \
rval; \
})
其中__sync_synchronize()和__sync_fetch_and_add的理解可参考博文__sync_*系列原子操作函数等
2:字符串的hash函数
unsigned int string_hash_make(const char *str)
{
unsigned int hash = 0;
while (*str)
hash = (hash * 33) ^ (unsigned int)*str++;
return hash;
}
3:hash表释放某数据所在的桶
void *hash_release(struct hash *hash, void *data)
{
void *ret;
unsigned int key;
unsigned int index;
struct hash_bucket *bucket;
struct hash_bucket *pp;
key = (*hash->hash_key)(data);
index = key & (hash->size - 1);
//pp初始化和bucket指向桶链首元素,之后pp指向bucket的上一个桶元素
for (bucket = pp = hash->index[index]; bucket; bucket = bucket->next) {
if (bucket->key == key
&& (*hash->hash_cmp)(bucket->data, data)) { //找到数据
int oldlen = hash->index[index]->len; //首元素存的桶链长度
int newlen = oldlen - 1;
if (bucket == pp) //要删除的节点是桶链首节点
hash->index[index] = bucket->next; //直接删除首节点
else
pp->next = bucket->next; //删除非首元素的链表节点
if (hash->index[index])
hash->index[index]->len = newlen; //给桶链首元素赋值该链新长度
else
hash->stats.empty++; //删除完,成了空桶链
hash_update_ssq(hash, oldlen, newlen);
ret = bucket->data; //返回同上的数据
XFREE(MTYPE_HASH_BACKET, bucket); //删除桶所占的内存
hash->count--; //分配节点的个数少了一个
return ret;
}
pp = bucket;
}
return NULL;
}
4:hash元素迭代
//hash迭代
void hash_iterate(struct hash *hash, void (*func)(struct hash_bucket *, void *),
void *arg)
{
unsigned int i;
struct hash_bucket *hb;
struct hash_bucket *hbnext;
for (i = 0; i < hash->size; i++)
for (hb = hash->index[i]; hb; hb = hbnext) {
/* get pointer to next hash bucket here, in case (*func)
* decides to delete hb by calling hash_release
*/
hbnext = hb->next; //记录下一个节点,保证此次迭代链不断
(*func)(hb, arg); //对当前节点进行删除等操作处理
}
}
5:hash表中各种数据的计算和输出
//一个输出hash参数的命令,只需要关注函数体中各参数的计算即可,中间的英文备注可帮助解释
DEFUN_NOSH(show_hash_stats,
show_hash_stats_cmd,
"show debugging hashtable [statistics]",
SHOW_STR
DEBUG_STR
"Statistics about hash tables\n"
"Statistics about hash tables\n")
{
struct hash *h;
struct listnode *ln;
struct ttable *tt = ttable_new(&ttable_styles[TTSTYLE_BLANK]);
ttable_add_row(tt, "Hash table|Buckets|Entries|Empty|LF|SD|FLF|SD");
tt->style.cell.lpad = 2;
tt->style.cell.rpad = 1;
tt->style.corner = '+';
ttable_restyle(tt);
ttable_rowseps(tt, 0, BOTTOM, true, '-');
/* Summary statistics calculated are:
*
* - Load factor: This is the number of elements in the table divided
* by the number of buckets. Since this hash table implementation
* uses chaining, this value can be greater than 1.
* This number provides information on how 'full' the table is, but
* does not provide information on how evenly distributed the
* elements are.
* Notably, a load factor >= 1 does not imply that every bucket has
* an element; with a pathological hash function, all elements could
* be in a single bucket.
*
* - Full load factor: this is the number of elements in the table
* divided by the number of buckets that have some elements in them.
*
* - Std. Dev.: This is the standard deviation calculated from the
* relevant load factor. If the load factor is the mean of number of
* elements per bucket, the standard deviation measures how much any
* particular bucket is likely to deviate from the mean.
* As a rule of thumb this number should be less than 2, and ideally
* <= 1 for optimal performance. A number larger than 3 generally
* indicates a poor hash function.
*/
double lf; // load factor
double flf; // full load factor
double var; // overall variance
double fvar; // full variance
double stdv; // overall stddev
double fstdv; // full stddev
long double x2; // h->count ^ 2
long double ldc; // (long double) h->count
long double full; // h->size - h->stats.empty
long double ssq; // ssq casted to long double
pthread_mutex_lock(&_hashes_mtx);
if (!_hashes) {
pthread_mutex_unlock(&_hashes_mtx);
ttable_del(tt);
vty_out(vty, "No hash tables in use.\n");
return CMD_SUCCESS;
}
for (ALL_LIST_ELEMENTS_RO(_hashes, ln, h)) {
if (!h->name)
continue;
ssq = (long double)h->stats.ssq;
x2 = h->count * h->count;
ldc = (long double)h->count;
full = h->size - h->stats.empty;
lf = h->count / (double)h->size; //LF,负载系数
flf = full ? h->count / (double)(full) : 0; //FLF,满负载
var = ldc ? (1.0 / ldc) * (ssq - x2 / ldc) : 0;
fvar = full ? (1.0 / full) * (ssq - x2 / full) : 0;
var = (var < .0001) ? 0 : var;
fvar = (fvar < .0001) ? 0 : fvar;
stdv = sqrt(var);
fstdv = sqrt(fvar); //标准差,<=1最好,<2符合标准,>3则hash效果很差
ttable_add_row(tt, "%s|%d|%ld|%.0f%%|%.2lf|%.2lf|%.2lf|%.2lf",
h->name, h->size, h->count,
(h->stats.empty / (double)h->size) * 100, lf,
stdv, flf, fstdv);
}
pthread_mutex_unlock(&_hashes_mtx);
/* display header */
char header[] = "Showing hash table statistics for ";
char underln[sizeof(header) + strlen(frr_protonameinst)];
memset(underln, '-', sizeof(underln));
underln[sizeof(underln) - 1] = '\0';
vty_out(vty, "%s%s\n", header, frr_protonameinst);
vty_out(vty, "%s\n", underln);
vty_out(vty, "# allocated: %d\n", _hashes->count);
vty_out(vty, "# named: %d\n\n", tt->nrows - 1);
if (tt->nrows > 1) {
ttable_colseps(tt, 0, RIGHT, true, '|');
char *table = ttable_dump(tt, "\n");
vty_out(vty, "%s\n", table);
XFREE(MTYPE_TMP, table);
} else
vty_out(vty, "No named hash tables to display.\n");
ttable_del(tt);
return CMD_SUCCESS;
}
6:hash表释放
void hash_free(struct hash *hash)
{
frr_with_mutex(&_hashes_mtx) {
if (_hashes) {
listnode_delete(_hashes, hash); //从list里找到hash所在的节点,然后删除这个节点
if (_hashes->count == 0) { //若链表为空,删除链表主体结构
list_delete(&_hashes);
}
}
}
XFREE(MTYPE_HASH, hash->name); //表名
XFREE(MTYPE_HASH_INDEX, hash->index); //桶链结构
XFREE(MTYPE_HASH, hash); //表结构
}
//_hashes结构是一个个hash表用双向指针连起来的总hash链表
static struct list *_hashes;