哈希闭散列和开散列的基本实现
哈希概念
顺序搜索以及二叉树搜索树中,元素存储位置和元素各关键码之间没有对应的关系,因此在查找一个元素时,必须要经过关键码的 多次比较。搜索的效率取决于搜索过程中元素的比较次数。
理想的搜索方法:可以不经过任何比较,一次直接从表中得到要搜索的元素。 如果构造一种存储结构,通过某种函数(hashFunc)使元素的存储位置与它的关键码之间能够建立一一映射的关系,那么在查找时通过该函数可以很快找到该元素
而这种方法就是哈希(散列)方法
下面给出常见的哈希两种哈希函数:
直接定值法
取关键字的某个线性函数为散列地址:Hash(Key)= A*Key + B
优点:简单、均匀
缺点:需要事先知道关键字的分布情况
适合查找比较小且连续的情况
除留余数法
设散列表中允许的地址数为m,取一个不大于m,但接近或者等于m的质数p作为除数,按照哈希函数:Hash(key) = key % p(p<=m),将关键码转换成哈希地址
在应用哈希解决问题是还需要考虑到哈希冲突
也就是对于两个数据元素的关键字 和 (i != j),有 != ,但有: HashFun(Ki) == HashFun(Kj)
即不同关键字通过相同哈希哈数计算出相同的哈希地址,该种现象称为哈希冲突或哈希碰撞。把具有不同关键码而具有相同哈希地 址的数据元素称为“同义词”。
解决哈希冲突有两种方法:
闭散列:也叫开放地址法,当发生哈希冲突时,如果哈希表未被装满,说明在哈希表中必然还有空位置,那么可以把key存放到表中 “下一个” 空位中去
开散列:开散列法又叫链地址法(开链法)。 开散列法:首先对关键码集合用散列函数计算散列地址,具有相同地址的关键码归于同一子集合,每一个子集合称为一个桶,各个 桶中的元素通过一个单链表链接起来,各链表的头结点存储在哈希表中。
以上就是哈希的大致分布,如果对于闭散列和开散列还有什么疑问,可以去度娘一下,这里就不详细介绍了,下面请看代码的实现:
闭散列
#pragma once
#include<assert.h>
#include<malloc.h>
#include<stdio.h>
//typedef int HTDataType;
#define MAX_SIZE 10
#define _PrimeSize 28
typedef char* HTDataType;
typedef enum {EMPTY, EXIST, DELETE}State;
typedef (*_DataToInt)(HTDataType data);
typedef struct HTElem
{
HTDataType _data;
State _state;
}HTElem;
typedef struct HashTable
{
HTElem *_ht;
int _size; ///哈希表中有效元素的个数
int _capacity; //哈希表的容量
int _total;//EXIST和DELETE
_DataToInt PDInt;
}HashTable, HT;
void InitHashTable(HT* pHT, int capacity, _DataToInt PDInt);
int InsertHashTable(HT* pHT, HTDataType data);
int DeleteHashTable(HT* pHT, HTDataType data);
int FindHashTable(HT* pHT, HTDataType data);
int EmptyHashTable(HT* pHT);
int SizeHashTable(HT* pHT);
//int HashFunc(HTDataType data);
int HashFunc(HashTable *pHT, HTDataType data);//计算哈希地址
unsigned long GetNextPrime(unsigned long prime);//计算素数
long StrToInt(const char * str);//字符串转整型
long DefToInt(long data);//整型转整型
#include"hash.h"
long StrToInt(const char * str)
{
unsigned int seed = 131; // 31 131 1313 13131 131313
unsigned int hash = 0;
while (*str)
{
hash = hash * seed + (*str++);
}
return (hash & 0x7FFFFFFF);
}
long DefToInt(long data)
{
return data;
}
unsigned long GetNextPrime(unsigned long prime)
{
int i = 0;
static const unsigned long _PrimeList[_PrimeSize] =
{ 53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};
for (; i < _PrimeList; i++)
{
if (_PrimeList[i] > prime)
return _PrimeList[i];
}
return -1;
}
int HashFunc(HashTable *pHT,HTDataType data)//计算哈希地址
{
assert(pHT);
return pHT->PDInt(data) % pHT->_capacity;
}
void InitHashTable(HT* pHT, int capacity, _DataToInt PDInt)//初始化
{
assert(pHT);
int i = 0;
//capacity = GetNextPrime(capacity);
pHT->_capacity = capacity;
pHT->_ht = (HTElem*)malloc(sizeof(HTElem)*capacity);
if (NULL == pHT->_ht)
{
assert(0);
return;
}
for (i = 0; i < capacity; ++i)
pHT->_ht[i]._state = EMPTY;
pHT->_size = 0;
pHT->_total = 0;
pHT->PDInt = PDInt;
}
//哈希负载因子超过0.7扩充容量
void _CheckCapacity(HashTable* pHT)
{
int hashAddr = 0;
assert(pHT);
if (pHT->_total * 10 / pHT->_capacity >= 7)
{
int i = 0;
//1.开辟空间
int oldCapacity = pHT->_capacity;
int newCapacity = GetNextPrime(pHT->_capacity * 2);
//int newCapacity = pHT->_capacity * 2;
HTElem *NewSpace = (HTElem*)malloc(sizeof(HTElem)*(newCapacity));
if (NULL == NewSpace)
{
assert(0);
return;
}
pHT->_capacity = newCapacity;
//2.拷贝数据
for (; i <newCapacity; ++i)
{
NewSpace[i]._state = EMPTY;
}
for (i = 0; i < oldCapacity; i++)
{
if (EXIST != pHT->_ht[i]._state)
continue;
hashAddr = HashFunc(pHT, pHT->_ht[i]._data);
while (EMPTY != NewSpace[hashAddr]._state)//为空
{
//EXIST DELETE
#if 1
//线性探测
hashAddr++;//找空位置
if (hashAddr == newCapacity)
hashAddr = 0;
#else
i++;
hashAddr = hashAddr + 2 * i + 1;
hashAddr %= MAX_SIZE;
#endif
//hashAddr %= MAX_SIZE;//防止越界
}
NewSpace[hashAddr]._data = pHT->_ht[i]._data;
NewSpace[hashAddr]._state = EXIST;
}
//3.释放空间
free(pHT->_ht);
pHT->_ht = NewSpace;
pHT->_total = pHT->_size;
}
}
int InsertHashTable(HT* pHT, HTDataType data)//插入
{
int hashAddr;
int i = 0;
assert(pHT);
_CheckCapacity(pHT);
//1.计算哈希地址
hashAddr = HashFunc(pHT,data);
//2.找存储位置
while (EMPTY != pHT->_ht[hashAddr]._state)//不为空
{
//EXIST DELETE
if (EXIST == pHT->_ht[hashAddr]._state &&
data == pHT->_ht[hashAddr]._data)//判断元素是否存在,已经存在则退出不插入
{
return 0;
}
#if 1
//线性探测
hashAddr++;//找空位置
if (hashAddr == pHT->_capacity)
hashAddr = 0;
#else
i++;
hashAddr = hashAddr + 2 * i + 1;
hashAddr %= pHT->_capacity;
#endif
//hashAddr %= MAX_SIZE;//防止越界
}
//3.插入元素
pHT->_ht[hashAddr]._data = data;
pHT->_ht[hashAddr]._state = EXIST;
pHT->_size++;
pHT->_total++;
}
int DeleteHashTable(HT* pHT, HTDataType data)//删除
{
int hashAddr = FindHashTable(pHT, data);
if (-1 != hashAddr)
{
pHT->_ht[hashAddr]._state = DELETE;
pHT->_size -= 1;
return 1;
}
return 0;
}
int FindHashTable(HT* pHT, HTDataType data)//查找
{
int i = 0;
int hashAddr;
assert(pHT);
hashAddr = HashFunc(pHT,data);
while (EMPTY != pHT->_ht[hashAddr]._state)
{
if (EXIST == pHT->_ht[hashAddr]._state &&
data == pHT->_ht[hashAddr]._data)
return hashAddr;
hashAddr++;//找空位置
#if 0
if (hashAddr == MAX_SIZE)
hashAddr = 0;
#else
i++;
hashAddr = hashAddr + 2 * i + 1;
hashAddr %= MAX_SIZE;
#endif
//hashAddr %= MAX_SIZE;//防止越界
}
return -1;
}
int EmptyHashTable(HT* pHT)//判空
{
assert(pHT);
return 0 == pHT->_size;
}
int SizeHashTable(HT* pHT)//计算大小
{
assert(pHT);
return pHT->_size;
}
#endif
开散列
#pragma once
#include<assert.h>
#include<stdlib.h>
#include<stdio.h>
#include<malloc.h>
typedef int HBDataType;
#define _PrimeSize 28
typedef struct HashBucketNode
{
struct HashBucketNode* _pNext;
HBDataType _data;
}Node,*PNode;
typedef struct HashBucket
{
PNode* _table;
int _capacity;
int _size;
}HashBucket;
void InitHashBucket(HashBucket* pHB, int capacity);
void InsertHashBucket(HashBucket* pHB, HBDataType data);
int DeleteHashBucket(HashBucket* pHB, HBDataType data);
int FindHashBucket(HashBucket* pHB, HBDataType data);
int EmptyHashBucket(HashBucket* pHB);
int SizeHashBucket(HashBucket* pHB);
void DestroyHashBucket(HashBucket* pHB);
void _CheckCapacity(HashBucket* pHB);
int HashFunc(HashBucket *pHB, HBDataType data);//计算哈希地址
unsigned long GetNextPrime(unsigned long prime);
#include"OpenHash.h"
unsigned long GetNextPrime(unsigned long prime)
{
int i = 0;
static const unsigned long _PrimeList[_PrimeSize] =
{ 53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};
for (; i < _PrimeList; i++)
{
if (_PrimeList[i] > prime)
return _PrimeList[i];
}
return -1;
}
void InitHashBucket(HashBucket* pHB, int capacity)
{
assert(pHB);
pHB->_table = (PNode*)malloc(sizeof(PNode)*capacity);
if (NULL == pHB->_table)
{
assert(0);
return;
}
memset(pHB->_table, 0, sizeof(PNode)*capacity);
pHB->_size = 0;
pHB->_capacity = capacity;
}
int HashFunc(HashBucket *pHB, HBDataType data)//计算哈希地址
{
assert(pHB);
return data%pHB->_capacity;
}
PNode BuyNode(HBDataType data)
{
PNode NewNode = (PNode)malloc(sizeof(Node));
if (NULL == NewNode)
{
assert(0);
return NULL;
}
NewNode->_data = data;
NewNode->_pNext = NULL;
return NewNode;
}
void _CheckCapacity(HashBucket* pHB)
{
assert(pHB);
//有效元素个数与哈希桶的个数相当(最佳状态:每个桶中挂一个节点)
if (pHB->_size == pHB->_capacity)
{
int newCapacity = GetNextPrime(pHB->_capacity);
int i = 0;
int hashAddr = 0;
int oldCapacity = pHB->_capacity;
//开辟新空间
PNode* pNewTable = (PNode*)malloc(sizeof(PNode));
if (NULL == pNewTable)
{
assert(0);
return;
}
memset(pNewTable, 0, sizeof(PNode)*newCapacity);
pHB->_capacity = newCapacity;
//拷贝元素
for (; i < oldCapacity; i++)
{
//将i号筒中的所有节点重新哈希
PNode pCur = pHB->_table[i];
while (pCur)
{
hashAddr = HashFunc(pHB, pCur->_data);
//将pCur从原链表中删除
pHB->_table[i] = pCur->_pNext;
//将pCur插入到新链表中
pCur->_pNext = pNewTable[hashAddr];
pNewTable[hashAddr] = pCur;
//取原链表中下一个节点
pCur = pHB->_table[i];
}
}
free(pHB->_table);
pHB->_table = pNewTable;
}
}
void InsertHashBucket(HashBucket* pHB, HBDataType data)
{
//PNode pCur = NULL;
PNode NewNode = NULL;
int hashAddr = 0;
assert(pHB);
_CheckCapacity(pHB);//判断是否哈希桶挂满
hashAddr = HashFunc(pHB, data);
//pCur = pHB->_table[hashAddr];
NewNode = BuyNode(data);
NewNode->_pNext = pHB->_table[hashAddr];
pHB->_table[hashAddr] = NewNode;
pHB->_size++;
}
int DeleteHashBucket(HashBucket* pHB, HBDataType data)
{
assert(pHB);
PNode* pPre = NULL;
int hashAddr = HashFunc(pHB, data);
PNode pCur = pHB->_table[hashAddr];
pPre = &pHB->_table[hashAddr];
while (pCur)
{
if (pCur->_data == data)
{
*pPre = pCur->_pNext;
free(pCur);
pCur = NULL;
pHB->_size--;
return 1;
}
*pPre = pCur;
pCur = pCur->_pNext;
}
return 0;
}
int FindHashBucket(HashBucket* pHB, HBDataType data)
{
assert(pHB);
PNode pPre = NULL;
int hashAddr = HashFunc(pHB, data);
PNode pCur = pHB->_table[hashAddr];
while (pCur)
{
pPre = pCur;
if (pCur->_data == data)
{
return 1;
}
pCur = pCur->_pNext;
}
return 0;
}
int EmptyHashBucket(HashBucket* pHB)
{
assert(pHB);
return !(pHB->_size);
}
int SizeHashBucket(HashBucket* pHB)
{
assert(pHB);
return pHB->_size;
}
void DestroyHashBucket(HashBucket* pHB)
{
int i = 0;
assert(pHB);
for(; i<pHB->_capacity ;i++)
{
PNode pCur = pHB->_table[i];
while (pCur)
{
pHB->_table[i] = pCur->_pNext;
free(pCur);
pCur = pHB->_table[i];
}
}
pHB->_size = 0;
pHB->_capacity = 0;
}
开散列和闭散列的实现思路都差不多,只是闭散列是用了动态数组,而开散列是用来存储指向链表的地址的动态数组来实现
上一篇: 陷阱:“覆盖” 私有方法