FRR - 基础数据结构篇 - 数据结构 hash.c

程序员文章站 2024-02-13 20:46:10

...

frr底层中hash表的不同之处

这里用到的hash表也是传统的数组桶链方式，有几点不同，同样也很精妙的点：

1：数组每个元素不是单纯一个指针，也是一个桶元素。并且只有数组上的桶元素会记录该桶所在的链上所有桶的个数。为了描述方便，直接将数组元素上的桶联合挂在该桶上的所有桶统称为桶链。

2：hash表主体结构维护有一些很有价值的数据，例如，

2.1 max_size，hash表数组的最大长度，避免表无上限扩容；

2.2 count，已分配内存的桶个数，也是hash表中挂载数据的个数，也便于分析相关内存分配的问题；

2.3 hashstats中的数据，尤其是ssq，每次hash节点变动都会修改，记录了整个表的过程动态变化，从一个过程的角度来评定hash函数的好坏

数据结构

//hash主体的结构
struct hash {
	/* Hash bucket. */
	struct hash_bucket **index;                    //桶链的hash表

	/* Hash table size. Must be power of 2 */      //2的幂能够保证hash_key%size == hash_key&(size-1)，提高计算效率
	unsigned int size;                             //hash表数组的长度

	/* If max_size is 0 there is no limit */
	unsigned int max_size;                         //hash数组长度的限制，扩容时会用

	/* Key make function. */
	unsigned int (*hash_key)(const void *);        //hash函数

	/* Data compare function. */
	bool (*hash_cmp)(const void *, const void *);  //查找和跟新桶里数据会用到的比较回调函数

	/* Backet alloc. */
	unsigned long count;                           //已分配内存的桶个数

	struct hashstats stats;

	/* hash name */
	char *name;
};

//用于计算负载，慢负载和标准差等一系列评定hash函数的参数
struct hashstats {
	/* number of empty hash buckets */
	atomic_uint_fast32_t empty;                   //空数组元素的个数
	/* sum of squares of bucket length */
	atomic_uint_fast32_t ssq;                     //记录hash表每次跟新桶的过程负载因子
                                                     //用于评定hash函数的优劣
};

struct hash_bucket {
	/*
	 * if this bucket is the head of the linked listed, len denotes the
	 * number of elements in the list
	 */
        //只有链首的桶中记录非0值，也是该链上所有桶的总个数，以此值区分链首与链上其他桶节点
	int len;                                     //记录的桶个数

	/* Linked list.  */
	struct hash_bucket *next;

	/* Hash key. */
	unsigned int key;                           //经hash函数计算过多的key值

	/* Data.  */
	void *data;                                 //挂在桶上的数据
};

函数

1：hash扩容

/* Expand hash if the chain length exceeds the threshold. */
static void hash_expand(struct hash *hash)
{
	unsigned int i, new_size;
	struct hash_bucket *hb, *hbnext, **new_index;

	new_size = hash->size * 2;

    //max_size 为0则不作最大长度限制
	if (hash->max_size && new_size > hash->max_size)
		return;

	new_index = XCALLOC(MTYPE_HASH_INDEX,
			    sizeof(struct hash_bucket *) * new_size);

	hash->stats.empty = new_size;

    //遍历数组上每个桶链
	for (i = 0; i < hash->size; i++)
        //遍历每个链表
		for (hb = hash->index[i]; hb; hb = hbnext) {
			unsigned int h = hb->key & (new_size - 1);  //要处理的桶元素

			hbnext = hb->next;                          //记录下一个桶元素
			hb->next = new_index[h];                    //旧表中每个元素插在新表新桶链的首元素位置上

			int oldlen = hb->next ? hb->next->len : 0;  //将元素插入前，新表新桶链是否为空
			int newlen = oldlen + 1;

			if (newlen == 1)
				hash->stats.empty--;                    //插入前非空桶链，新表空桶链少了一个
			else
				hb->next->len = 0;

			hb->len = newlen;                           //记录新长度

			hash_update_ssq(hash, oldlen, newlen);      //跟新ssq值
 
			new_index[h] = hb;
		}

	/* Switch to new table */
	XFREE(MTYPE_HASH_INDEX, hash->index);
	hash->size = new_size;
	hash->index = new_index;
}


//关于ssq跟新的宏
#define hash_update_ssq(hz, old, new)                                          \
	atomic_fetch_add_explicit(&hz->stats.ssq, (new + old) * (new - old),   \
				  memory_order_relaxed);

#define atomic_fetch_add_explicit(ptr, val, mem)                               \
	({                                                                     \
		__sync_synchronize();                                          \
		typeof(*ptr) rval = __sync_fetch_and_add((ptr), (val));        \
		__sync_synchronize();                                          \
		rval;                                                          \
	})

其中__sync_synchronize()和__sync_fetch_and_add的理解可参考博文__sync_*系列原子操作函数等

2：字符串的hash函数

unsigned int string_hash_make(const char *str)
{
	unsigned int hash = 0;

	while (*str)
		hash = (hash * 33) ^ (unsigned int)*str++;

	return hash;
}

3：hash表释放某数据所在的桶

void *hash_release(struct hash *hash, void *data)
{
	void *ret;
	unsigned int key;
	unsigned int index;
	struct hash_bucket *bucket;
	struct hash_bucket *pp;

	key = (*hash->hash_key)(data);
	index = key & (hash->size - 1);

    //pp初始化和bucket指向桶链首元素，之后pp指向bucket的上一个桶元素
	for (bucket = pp = hash->index[index]; bucket; bucket = bucket->next) {
		if (bucket->key == key
		    && (*hash->hash_cmp)(bucket->data, data)) {   //找到数据
			int oldlen = hash->index[index]->len;         //首元素存的桶链长度
			int newlen = oldlen - 1;

			if (bucket == pp)                             //要删除的节点是桶链首节点
				hash->index[index] = bucket->next;        //直接删除首节点
			else
				pp->next = bucket->next;                  //删除非首元素的链表节点

			if (hash->index[index])
				hash->index[index]->len = newlen;         //给桶链首元素赋值该链新长度
			else
				hash->stats.empty++;                      //删除完，成了空桶链

			hash_update_ssq(hash, oldlen, newlen);

			ret = bucket->data;                           //返回同上的数据
			XFREE(MTYPE_HASH_BACKET, bucket);             //删除桶所占的内存
			hash->count--;                                //分配节点的个数少了一个
			return ret;
		}
		pp = bucket;
	}
	return NULL;
}

4：hash元素迭代

//hash迭代
void hash_iterate(struct hash *hash, void (*func)(struct hash_bucket *, void *),
		  void *arg)
{
	unsigned int i;
	struct hash_bucket *hb;
	struct hash_bucket *hbnext;

	for (i = 0; i < hash->size; i++)
		for (hb = hash->index[i]; hb; hb = hbnext) {
			/* get pointer to next hash bucket here, in case (*func)
			 * decides to delete hb by calling hash_release
			 */
			hbnext = hb->next;                              //记录下一个节点，保证此次迭代链不断
			(*func)(hb, arg);                               //对当前节点进行删除等操作处理
		} 
}

5：hash表中各种数据的计算和输出

//一个输出hash参数的命令，只需要关注函数体中各参数的计算即可，中间的英文备注可帮助解释
DEFUN_NOSH(show_hash_stats,
           show_hash_stats_cmd,
           "show debugging hashtable [statistics]",
           SHOW_STR
           DEBUG_STR
           "Statistics about hash tables\n"
           "Statistics about hash tables\n")
{
	struct hash *h;
	struct listnode *ln;
	struct ttable *tt = ttable_new(&ttable_styles[TTSTYLE_BLANK]);

	ttable_add_row(tt, "Hash table|Buckets|Entries|Empty|LF|SD|FLF|SD");
	tt->style.cell.lpad = 2;
	tt->style.cell.rpad = 1;
	tt->style.corner = '+';
	ttable_restyle(tt);
	ttable_rowseps(tt, 0, BOTTOM, true, '-');

	/* Summary statistics calculated are:
	 *
	 * - Load factor: This is the number of elements in the table divided
	 *   by the number of buckets. Since this hash table implementation
	 *   uses chaining, this value can be greater than 1.
	 *   This number provides information on how 'full' the table is, but
	 *   does not provide information on how evenly distributed the
	 *   elements are.
	 *   Notably, a load factor >= 1 does not imply that every bucket has
	 *   an element; with a pathological hash function, all elements could
	 *   be in a single bucket.
	 *
	 * - Full load factor: this is the number of elements in the table
	 *   divided by the number of buckets that have some elements in them.
	 *
	 * - Std. Dev.: This is the standard deviation calculated from the
	 *   relevant load factor. If the load factor is the mean of number of
	 *   elements per bucket, the standard deviation measures how much any
	 *   particular bucket is likely to deviate from the mean.
	 *   As a rule of thumb this number should be less than 2, and ideally
	 *   <= 1 for optimal performance. A number larger than 3 generally
	 *   indicates a poor hash function.
	 */

	double lf;    // load factor
	double flf;   // full load factor
	double var;   // overall variance
	double fvar;  // full variance
	double stdv;  // overall stddev
	double fstdv; // full stddev

	long double x2;   // h->count ^ 2
	long double ldc;  // (long double) h->count
	long double full; // h->size - h->stats.empty
	long double ssq;  // ssq casted to long double

	pthread_mutex_lock(&_hashes_mtx);
	if (!_hashes) {
		pthread_mutex_unlock(&_hashes_mtx);
		ttable_del(tt);
		vty_out(vty, "No hash tables in use.\n");
		return CMD_SUCCESS;
	}

	for (ALL_LIST_ELEMENTS_RO(_hashes, ln, h)) {
		if (!h->name)
			continue;

		ssq = (long double)h->stats.ssq;
		x2 = h->count * h->count;
		ldc = (long double)h->count;
		full = h->size - h->stats.empty;
		lf = h->count / (double)h->size;                              //LF,负载系数
		flf = full ? h->count / (double)(full) : 0;                   //FLF,满负载
		var = ldc ? (1.0 / ldc) * (ssq - x2 / ldc) : 0;
		fvar = full ? (1.0 / full) * (ssq - x2 / full) : 0;
		var = (var < .0001) ? 0 : var;
		fvar = (fvar < .0001) ? 0 : fvar;
		stdv = sqrt(var);
		fstdv = sqrt(fvar);                                           //标准差，<=1最好，<2符合标准，>3则hash效果很差

		ttable_add_row(tt, "%s|%d|%ld|%.0f%%|%.2lf|%.2lf|%.2lf|%.2lf",
			       h->name, h->size, h->count,
			       (h->stats.empty / (double)h->size) * 100, lf,
			       stdv, flf, fstdv);
	}
	pthread_mutex_unlock(&_hashes_mtx);

	/* display header */
	char header[] = "Showing hash table statistics for ";
	char underln[sizeof(header) + strlen(frr_protonameinst)];
	memset(underln, '-', sizeof(underln));
	underln[sizeof(underln) - 1] = '\0';
	vty_out(vty, "%s%s\n", header, frr_protonameinst);
	vty_out(vty, "%s\n", underln);

	vty_out(vty, "# allocated: %d\n", _hashes->count);
	vty_out(vty, "# named:     %d\n\n", tt->nrows - 1);

	if (tt->nrows > 1) {
		ttable_colseps(tt, 0, RIGHT, true, '|');
		char *table = ttable_dump(tt, "\n");
		vty_out(vty, "%s\n", table);
		XFREE(MTYPE_TMP, table);
	} else
		vty_out(vty, "No named hash tables to display.\n");

	ttable_del(tt);

	return CMD_SUCCESS;
}

6：hash表释放

void hash_free(struct hash *hash)
{
	frr_with_mutex(&_hashes_mtx) {
		if (_hashes) {
			listnode_delete(_hashes, hash); //从list里找到hash所在的节点，然后删除这个节点
			if (_hashes->count == 0) {      //若链表为空，删除链表主体结构
				list_delete(&_hashes);
			}
		}
	}

	XFREE(MTYPE_HASH, hash->name);         //表名

	XFREE(MTYPE_HASH_INDEX, hash->index);  //桶链结构
	XFREE(MTYPE_HASH, hash);               //表结构
}

//_hashes结构是一个个hash表用双向指针连起来的总hash链表
static struct list *_hashes;

FRR - 基础数据结构篇 - 数据结构 hash.c

frr底层中hash表的不同之处

数据结构

函数

1：hash扩容

2：字符串的hash函数

3：hash表释放某数据所在的桶

4：hash元素迭代

5：hash表中各种数据的计算和输出

6：hash表释放

FRR - 基础数据结构篇 - 数据结构 hash.c

FRR - 基础数据结构篇 - 数据同步atomlist.c

Java基础数据结构及其实现原理（一）

Python基础数据结构

【Python基础语法04】数据结构之字典

数据结构与算法－栈篇(js实现)

数据结构与算法－队列篇(js实现)

数据结构与算法－队列篇(js实现)

数据结构与算法－栈篇(js实现)

C_数据结构与算法（一）：C语言基础 - Rudolph_Browne