欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

FRR - 基础数据结构篇 - 数据结构 hash.c

程序员文章站 2024-02-13 20:46:10
...

frr底层中hash表的不同之处

        这里用到的hash表也是传统的数组桶链方式,有几点不同,同样也很精妙的点:

1:数组每个元素不是单纯一个指针,也是一个桶元素。并且只有数组上的桶元素会记录该桶所在的链上所有桶的个数。为了描述方便,直接将数组元素上的桶联合挂在该桶上的所有桶统称为桶链。

2:hash表主体结构维护有一些很有价值的数据,例如,

      2.1 max_size,hash表数组的最大长度,避免表无上限扩容;

      2.2 count,已分配内存的桶个数,也是hash表中挂载数据的个数,也便于分析相关内存分配的问题;

      2.3 hashstats中的数据,尤其是ssq,每次hash节点变动都会修改,记录了整个表的过程动态变化,从一个过程的角度来评定hash函数的好坏

数据结构

//hash主体的结构
struct hash {
	/* Hash bucket. */
	struct hash_bucket **index;                    //桶链的hash表

	/* Hash table size. Must be power of 2 */      //2的幂能够保证hash_key%size == hash_key&(size-1),提高计算效率
	unsigned int size;                             //hash表数组的长度

	/* If max_size is 0 there is no limit */
	unsigned int max_size;                         //hash数组长度的限制,扩容时会用

	/* Key make function. */
	unsigned int (*hash_key)(const void *);        //hash函数

	/* Data compare function. */
	bool (*hash_cmp)(const void *, const void *);  //查找和跟新桶里数据会用到的比较回调函数

	/* Backet alloc. */
	unsigned long count;                           //已分配内存的桶个数

	struct hashstats stats;

	/* hash name */
	char *name;
};

//用于计算负载,慢负载和标准差等一系列评定hash函数的参数
struct hashstats {
	/* number of empty hash buckets */
	atomic_uint_fast32_t empty;                   //空数组元素的个数
	/* sum of squares of bucket length */
	atomic_uint_fast32_t ssq;                     //记录hash表每次跟新桶的过程负载因子
                                                     //用于评定hash函数的优劣
};

struct hash_bucket {
	/*
	 * if this bucket is the head of the linked listed, len denotes the
	 * number of elements in the list
	 */
        //只有链首的桶中记录非0值,也是该链上所有桶的总个数,以此值区分链首与链上其他桶节点
	int len;                                     //记录的桶个数

	/* Linked list.  */
	struct hash_bucket *next;

	/* Hash key. */
	unsigned int key;                           //经hash函数计算过多的key值

	/* Data.  */
	void *data;                                 //挂在桶上的数据
};

函数

     1:hash扩容

/* Expand hash if the chain length exceeds the threshold. */
static void hash_expand(struct hash *hash)
{
	unsigned int i, new_size;
	struct hash_bucket *hb, *hbnext, **new_index;

	new_size = hash->size * 2;

    //max_size 为0则不作最大长度限制
	if (hash->max_size && new_size > hash->max_size)
		return;

	new_index = XCALLOC(MTYPE_HASH_INDEX,
			    sizeof(struct hash_bucket *) * new_size);

	hash->stats.empty = new_size;

    //遍历数组上每个桶链
	for (i = 0; i < hash->size; i++)
        //遍历每个链表
		for (hb = hash->index[i]; hb; hb = hbnext) {
			unsigned int h = hb->key & (new_size - 1);  //要处理的桶元素

			hbnext = hb->next;                          //记录下一个桶元素
			hb->next = new_index[h];                    //旧表中每个元素插在新表新桶链的首元素位置上

			int oldlen = hb->next ? hb->next->len : 0;  //将元素插入前,新表新桶链是否为空
			int newlen = oldlen + 1;

			if (newlen == 1)
				hash->stats.empty--;                    //插入前非空桶链,新表空桶链少了一个
			else
				hb->next->len = 0;

			hb->len = newlen;                           //记录新长度

			hash_update_ssq(hash, oldlen, newlen);      //跟新ssq值
 
			new_index[h] = hb;
		}

	/* Switch to new table */
	XFREE(MTYPE_HASH_INDEX, hash->index);
	hash->size = new_size;
	hash->index = new_index;
}


//关于ssq跟新的宏
#define hash_update_ssq(hz, old, new)                                          \
	atomic_fetch_add_explicit(&hz->stats.ssq, (new + old) * (new - old),   \
				  memory_order_relaxed);

#define atomic_fetch_add_explicit(ptr, val, mem)                               \
	({                                                                     \
		__sync_synchronize();                                          \
		typeof(*ptr) rval = __sync_fetch_and_add((ptr), (val));        \
		__sync_synchronize();                                          \
		rval;                                                          \
	})

      其中__sync_synchronize()和__sync_fetch_and_add的理解可参考博文__sync_*系列原子操作函数等 

2:字符串的hash函数

unsigned int string_hash_make(const char *str)
{
	unsigned int hash = 0;

	while (*str)
		hash = (hash * 33) ^ (unsigned int)*str++;

	return hash;
}

 3:hash表释放某数据所在的桶

void *hash_release(struct hash *hash, void *data)
{
	void *ret;
	unsigned int key;
	unsigned int index;
	struct hash_bucket *bucket;
	struct hash_bucket *pp;

	key = (*hash->hash_key)(data);
	index = key & (hash->size - 1);

    //pp初始化和bucket指向桶链首元素,之后pp指向bucket的上一个桶元素
	for (bucket = pp = hash->index[index]; bucket; bucket = bucket->next) {
		if (bucket->key == key
		    && (*hash->hash_cmp)(bucket->data, data)) {   //找到数据
			int oldlen = hash->index[index]->len;         //首元素存的桶链长度
			int newlen = oldlen - 1;

			if (bucket == pp)                             //要删除的节点是桶链首节点
				hash->index[index] = bucket->next;        //直接删除首节点
			else
				pp->next = bucket->next;                  //删除非首元素的链表节点

			if (hash->index[index])
				hash->index[index]->len = newlen;         //给桶链首元素赋值该链新长度
			else
				hash->stats.empty++;                      //删除完,成了空桶链

			hash_update_ssq(hash, oldlen, newlen);

			ret = bucket->data;                           //返回同上的数据
			XFREE(MTYPE_HASH_BACKET, bucket);             //删除桶所占的内存
			hash->count--;                                //分配节点的个数少了一个
			return ret;
		}
		pp = bucket;
	}
	return NULL;
}

4:hash元素迭代

//hash迭代
void hash_iterate(struct hash *hash, void (*func)(struct hash_bucket *, void *),
		  void *arg)
{
	unsigned int i;
	struct hash_bucket *hb;
	struct hash_bucket *hbnext;

	for (i = 0; i < hash->size; i++)
		for (hb = hash->index[i]; hb; hb = hbnext) {
			/* get pointer to next hash bucket here, in case (*func)
			 * decides to delete hb by calling hash_release
			 */
			hbnext = hb->next;                              //记录下一个节点,保证此次迭代链不断
			(*func)(hb, arg);                               //对当前节点进行删除等操作处理
		} 
}

5:hash表中各种数据的计算和输出

//一个输出hash参数的命令,只需要关注函数体中各参数的计算即可,中间的英文备注可帮助解释
DEFUN_NOSH(show_hash_stats,
           show_hash_stats_cmd,
           "show debugging hashtable [statistics]",
           SHOW_STR
           DEBUG_STR
           "Statistics about hash tables\n"
           "Statistics about hash tables\n")
{
	struct hash *h;
	struct listnode *ln;
	struct ttable *tt = ttable_new(&ttable_styles[TTSTYLE_BLANK]);

	ttable_add_row(tt, "Hash table|Buckets|Entries|Empty|LF|SD|FLF|SD");
	tt->style.cell.lpad = 2;
	tt->style.cell.rpad = 1;
	tt->style.corner = '+';
	ttable_restyle(tt);
	ttable_rowseps(tt, 0, BOTTOM, true, '-');

	/* Summary statistics calculated are:
	 *
	 * - Load factor: This is the number of elements in the table divided
	 *   by the number of buckets. Since this hash table implementation
	 *   uses chaining, this value can be greater than 1.
	 *   This number provides information on how 'full' the table is, but
	 *   does not provide information on how evenly distributed the
	 *   elements are.
	 *   Notably, a load factor >= 1 does not imply that every bucket has
	 *   an element; with a pathological hash function, all elements could
	 *   be in a single bucket.
	 *
	 * - Full load factor: this is the number of elements in the table
	 *   divided by the number of buckets that have some elements in them.
	 *
	 * - Std. Dev.: This is the standard deviation calculated from the
	 *   relevant load factor. If the load factor is the mean of number of
	 *   elements per bucket, the standard deviation measures how much any
	 *   particular bucket is likely to deviate from the mean.
	 *   As a rule of thumb this number should be less than 2, and ideally
	 *   <= 1 for optimal performance. A number larger than 3 generally
	 *   indicates a poor hash function.
	 */

	double lf;    // load factor
	double flf;   // full load factor
	double var;   // overall variance
	double fvar;  // full variance
	double stdv;  // overall stddev
	double fstdv; // full stddev

	long double x2;   // h->count ^ 2
	long double ldc;  // (long double) h->count
	long double full; // h->size - h->stats.empty
	long double ssq;  // ssq casted to long double

	pthread_mutex_lock(&_hashes_mtx);
	if (!_hashes) {
		pthread_mutex_unlock(&_hashes_mtx);
		ttable_del(tt);
		vty_out(vty, "No hash tables in use.\n");
		return CMD_SUCCESS;
	}

	for (ALL_LIST_ELEMENTS_RO(_hashes, ln, h)) {
		if (!h->name)
			continue;

		ssq = (long double)h->stats.ssq;
		x2 = h->count * h->count;
		ldc = (long double)h->count;
		full = h->size - h->stats.empty;
		lf = h->count / (double)h->size;                              //LF,负载系数
		flf = full ? h->count / (double)(full) : 0;                   //FLF,满负载
		var = ldc ? (1.0 / ldc) * (ssq - x2 / ldc) : 0;
		fvar = full ? (1.0 / full) * (ssq - x2 / full) : 0;
		var = (var < .0001) ? 0 : var;
		fvar = (fvar < .0001) ? 0 : fvar;
		stdv = sqrt(var);
		fstdv = sqrt(fvar);                                           //标准差,<=1最好,<2符合标准,>3则hash效果很差

		ttable_add_row(tt, "%s|%d|%ld|%.0f%%|%.2lf|%.2lf|%.2lf|%.2lf",
			       h->name, h->size, h->count,
			       (h->stats.empty / (double)h->size) * 100, lf,
			       stdv, flf, fstdv);
	}
	pthread_mutex_unlock(&_hashes_mtx);

	/* display header */
	char header[] = "Showing hash table statistics for ";
	char underln[sizeof(header) + strlen(frr_protonameinst)];
	memset(underln, '-', sizeof(underln));
	underln[sizeof(underln) - 1] = '\0';
	vty_out(vty, "%s%s\n", header, frr_protonameinst);
	vty_out(vty, "%s\n", underln);

	vty_out(vty, "# allocated: %d\n", _hashes->count);
	vty_out(vty, "# named:     %d\n\n", tt->nrows - 1);

	if (tt->nrows > 1) {
		ttable_colseps(tt, 0, RIGHT, true, '|');
		char *table = ttable_dump(tt, "\n");
		vty_out(vty, "%s\n", table);
		XFREE(MTYPE_TMP, table);
	} else
		vty_out(vty, "No named hash tables to display.\n");

	ttable_del(tt);

	return CMD_SUCCESS;
}

6:hash表释放

void hash_free(struct hash *hash)
{
	frr_with_mutex(&_hashes_mtx) {
		if (_hashes) {
			listnode_delete(_hashes, hash); //从list里找到hash所在的节点,然后删除这个节点
			if (_hashes->count == 0) {      //若链表为空,删除链表主体结构
				list_delete(&_hashes);
			}
		}
	}

	XFREE(MTYPE_HASH, hash->name);         //表名

	XFREE(MTYPE_HASH_INDEX, hash->index);  //桶链结构
	XFREE(MTYPE_HASH, hash);               //表结构
}

//_hashes结构是一个个hash表用双向指针连起来的总hash链表
static struct list *_hashes;