欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

哈希闭散列和开散列的基本实现

程序员文章站 2022-03-15 19:44:57
...

哈希概念
顺序搜索以及二叉树搜索树中,元素存储位置和元素各关键码之间没有对应的关系,因此在查找一个元素时,必须要经过关键码的 多次比较。搜索的效率取决于搜索过程中元素的比较次数。
理想的搜索方法:可以不经过任何比较,一次直接从表中得到要搜索的元素。 如果构造一种存储结构,通过某种函数(hashFunc)使元素的存储位置与它的关键码之间能够建立一一映射的关系,那么在查找时通过该函数可以很快找到该元素
而这种方法就是哈希(散列)方法
下面给出常见的哈希两种哈希函数:
直接定值法
取关键字的某个线性函数为散列地址:Hash(Key)= A*Key + B
优点:简单、均匀
缺点:需要事先知道关键字的分布情况
适合查找比较小且连续的情况
除留余数法
设散列表中允许的地址数为m,取一个不大于m,但接近或者等于m的质数p作为除数,按照哈希函数:Hash(key) = key % p(p<=m),将关键码转换成哈希地址
在应用哈希解决问题是还需要考虑到哈希冲突
也就是对于两个数据元素的关键字 和 (i != j),有 != ,但有: HashFun(Ki) == HashFun(Kj)
即不同关键字通过相同哈希哈数计算出相同的哈希地址,该种现象称为哈希冲突或哈希碰撞。把具有不同关键码而具有相同哈希地 址的数据元素称为“同义词”。
解决哈希冲突有两种方法:
闭散列:也叫开放地址法,当发生哈希冲突时,如果哈希表未被装满,说明在哈希表中必然还有空位置,那么可以把key存放到表中 “下一个” 空位中去
开散列:开散列法又叫链地址法(开链法)。 开散列法:首先对关键码集合用散列函数计算散列地址,具有相同地址的关键码归于同一子集合,每一个子集合称为一个桶,各个 桶中的元素通过一个单链表链接起来,各链表的头结点存储在哈希表中。
以上就是哈希的大致分布,如果对于闭散列和开散列还有什么疑问,可以去度娘一下,这里就不详细介绍了,下面请看代码的实现:

闭散列

#pragma once
#include<assert.h>
#include<malloc.h>
#include<stdio.h>

//typedef int HTDataType;
#define MAX_SIZE 10
#define _PrimeSize 28

typedef char* HTDataType;

typedef enum {EMPTY, EXIST, DELETE}State;
typedef (*_DataToInt)(HTDataType data);
typedef struct HTElem
{
    HTDataType _data;
    State _state;
}HTElem;

typedef struct HashTable
{
    HTElem *_ht;
    int _size; ///哈希表中有效元素的个数
    int _capacity;   //哈希表的容量
    int _total;//EXIST和DELETE
    _DataToInt PDInt;
}HashTable, HT;

void InitHashTable(HT* pHT, int capacity, _DataToInt PDInt);

int InsertHashTable(HT* pHT, HTDataType data);
int DeleteHashTable(HT* pHT, HTDataType data);
int FindHashTable(HT* pHT, HTDataType data);
int EmptyHashTable(HT* pHT);
int SizeHashTable(HT* pHT);
//int HashFunc(HTDataType data);
int HashFunc(HashTable *pHT, HTDataType data);//计算哈希地址

unsigned long GetNextPrime(unsigned long prime);//计算素数
 long StrToInt(const char * str);//字符串转整型
 long DefToInt(long data);//整型转整型
#include"hash.h"

 long StrToInt(const char * str)
{
    unsigned int seed = 131; // 31 131 1313 13131 131313
    unsigned int hash = 0;
    while (*str)
    {
        hash = hash * seed + (*str++);
    }
    return (hash & 0x7FFFFFFF);
}

 long DefToInt(long data)
{
    return data;
}

unsigned long GetNextPrime(unsigned long prime)
{
    int i = 0;
    static const unsigned long _PrimeList[_PrimeSize] =
    { 53ul,     97ul,        193ul,       389ul,       769ul,
        1543ul,       3079ul,       6151ul,      12289ul,     24593ul,
        49157ul,      98317ul,      196613ul,    393241ul,    786433ul,
        1572869ul,    3145739ul,    6291469ul,   12582917ul,  25165843ul,
        50331653ul,   100663319ul,  201326611ul, 402653189ul, 805306457ul,
        1610612741ul, 3221225473ul, 4294967291ul
    };
    for (; i < _PrimeList; i++)
    {
        if (_PrimeList[i] > prime)
            return _PrimeList[i];
    }
    return -1;
}

int HashFunc(HashTable *pHT,HTDataType data)//计算哈希地址
{
    assert(pHT);
    return pHT->PDInt(data) % pHT->_capacity;
}


void InitHashTable(HT* pHT, int capacity, _DataToInt PDInt)//初始化
{
    assert(pHT);
    int i = 0;
    //capacity = GetNextPrime(capacity);
    pHT->_capacity = capacity;
    pHT->_ht = (HTElem*)malloc(sizeof(HTElem)*capacity);
    if (NULL == pHT->_ht)
    {
        assert(0);
        return;
    }
    for (i = 0; i < capacity; ++i)
        pHT->_ht[i]._state = EMPTY;
    pHT->_size = 0;
    pHT->_total = 0;
    pHT->PDInt = PDInt;
}

//哈希负载因子超过0.7扩充容量
void _CheckCapacity(HashTable* pHT)
{
    int hashAddr = 0;
    assert(pHT);
    if (pHT->_total * 10 / pHT->_capacity >= 7)
    {
        int i = 0;

        //1.开辟空间
        int oldCapacity = pHT->_capacity;
        int newCapacity = GetNextPrime(pHT->_capacity * 2);

        //int newCapacity = pHT->_capacity * 2;
        HTElem *NewSpace = (HTElem*)malloc(sizeof(HTElem)*(newCapacity));
        if (NULL == NewSpace)
        {
            assert(0);
            return;
        }
        pHT->_capacity = newCapacity;
        //2.拷贝数据
        for (; i <newCapacity; ++i)
        {
            NewSpace[i]._state = EMPTY;
        }
        for (i = 0; i < oldCapacity; i++)
        {
            if (EXIST != pHT->_ht[i]._state)
                continue;
            hashAddr = HashFunc(pHT, pHT->_ht[i]._data);
            while (EMPTY != NewSpace[hashAddr]._state)//为空
            {
                //EXIST DELETE

#if 1
                //线性探测
                hashAddr++;//找空位置
                if (hashAddr == newCapacity)
                    hashAddr = 0;
#else
                i++;
                hashAddr = hashAddr + 2 * i + 1;
                hashAddr %= MAX_SIZE;
#endif
                //hashAddr %= MAX_SIZE;//防止越界
            }
            NewSpace[hashAddr]._data = pHT->_ht[i]._data;
            NewSpace[hashAddr]._state = EXIST;
        }
        //3.释放空间
        free(pHT->_ht);
        pHT->_ht = NewSpace;
        pHT->_total = pHT->_size;
    }
}


int InsertHashTable(HT* pHT, HTDataType data)//插入
{
    int hashAddr;
    int i = 0;
    assert(pHT);

    _CheckCapacity(pHT);

    //1.计算哈希地址
    hashAddr = HashFunc(pHT,data);

    //2.找存储位置
    while (EMPTY != pHT->_ht[hashAddr]._state)//不为空
    {
        //EXIST DELETE 
        if (EXIST == pHT->_ht[hashAddr]._state &&
            data == pHT->_ht[hashAddr]._data)//判断元素是否存在,已经存在则退出不插入
        {
            return 0;
        }

#if 1
        //线性探测
        hashAddr++;//找空位置
        if (hashAddr == pHT->_capacity)
            hashAddr = 0;
#else
        i++;
        hashAddr = hashAddr + 2 * i + 1;
        hashAddr %= pHT->_capacity;
#endif
        //hashAddr %= MAX_SIZE;//防止越界
    }

    //3.插入元素

    pHT->_ht[hashAddr]._data = data;
    pHT->_ht[hashAddr]._state = EXIST;
    pHT->_size++;
    pHT->_total++;
}

int DeleteHashTable(HT* pHT, HTDataType data)//删除
{
    int hashAddr = FindHashTable(pHT, data);
    if (-1 != hashAddr)
    {
        pHT->_ht[hashAddr]._state = DELETE;
        pHT->_size -= 1;
        return 1;
    }
    return 0;
}

int FindHashTable(HT* pHT, HTDataType data)//查找
{
    int i = 0;
    int hashAddr;
    assert(pHT);
    hashAddr = HashFunc(pHT,data);
    while (EMPTY != pHT->_ht[hashAddr]._state)
    {
        if (EXIST == pHT->_ht[hashAddr]._state &&
            data == pHT->_ht[hashAddr]._data)
            return hashAddr;

        hashAddr++;//找空位置
#if 0
        if (hashAddr == MAX_SIZE)
            hashAddr = 0;
#else
        i++;
        hashAddr = hashAddr + 2 * i + 1;
        hashAddr %= MAX_SIZE;
#endif
        //hashAddr %= MAX_SIZE;//防止越界

    }
    return -1;
}

int EmptyHashTable(HT* pHT)//判空
{
    assert(pHT);
    return 0 == pHT->_size;
}
int SizeHashTable(HT* pHT)//计算大小
{
    assert(pHT);
    return pHT->_size;
}
#endif

开散列

#pragma once
#include<assert.h>
#include<stdlib.h>
#include<stdio.h>
#include<malloc.h>

typedef int HBDataType;
#define _PrimeSize 28



typedef struct HashBucketNode
{

    struct HashBucketNode* _pNext;
    HBDataType _data;
}Node,*PNode;

typedef struct HashBucket
{
    PNode* _table;
    int _capacity;
    int _size;
}HashBucket;


void InitHashBucket(HashBucket* pHB, int capacity);

void InsertHashBucket(HashBucket* pHB, HBDataType data);
int DeleteHashBucket(HashBucket* pHB, HBDataType data);
int FindHashBucket(HashBucket* pHB, HBDataType data);
int EmptyHashBucket(HashBucket* pHB);
int SizeHashBucket(HashBucket* pHB);
void DestroyHashBucket(HashBucket* pHB);
void _CheckCapacity(HashBucket* pHB);
int HashFunc(HashBucket *pHB, HBDataType data);//计算哈希地址

unsigned long GetNextPrime(unsigned long prime);
#include"OpenHash.h"



unsigned long GetNextPrime(unsigned long prime)
{
    int i = 0;
    static const unsigned long _PrimeList[_PrimeSize] =
    { 53ul,     97ul,        193ul,       389ul,       769ul,
        1543ul,       3079ul,       6151ul,      12289ul,     24593ul,
        49157ul,      98317ul,      196613ul,    393241ul,    786433ul,
        1572869ul,    3145739ul,    6291469ul,   12582917ul,  25165843ul,
        50331653ul,   100663319ul,  201326611ul, 402653189ul, 805306457ul,
        1610612741ul, 3221225473ul, 4294967291ul
    };
    for (; i < _PrimeList; i++)
    {
        if (_PrimeList[i] > prime)
            return _PrimeList[i];
    }
    return -1;
}

void InitHashBucket(HashBucket* pHB, int capacity)
{
    assert(pHB);
    pHB->_table = (PNode*)malloc(sizeof(PNode)*capacity);
    if (NULL == pHB->_table)
    {
        assert(0);
        return;
    }
    memset(pHB->_table, 0, sizeof(PNode)*capacity);

    pHB->_size = 0;
    pHB->_capacity = capacity;
}

int HashFunc(HashBucket *pHB, HBDataType data)//计算哈希地址
{
    assert(pHB);
    return data%pHB->_capacity;
}

PNode BuyNode(HBDataType data)
{
    PNode NewNode = (PNode)malloc(sizeof(Node));
    if (NULL == NewNode)
    {
        assert(0);
        return NULL;
    }
    NewNode->_data = data;
    NewNode->_pNext = NULL;
    return NewNode;
}

void _CheckCapacity(HashBucket* pHB)
{
    assert(pHB);
    //有效元素个数与哈希桶的个数相当(最佳状态:每个桶中挂一个节点)

    if (pHB->_size == pHB->_capacity)
    {
        int newCapacity = GetNextPrime(pHB->_capacity);
        int i = 0;
        int hashAddr = 0;
        int oldCapacity = pHB->_capacity;
        //开辟新空间
        PNode* pNewTable = (PNode*)malloc(sizeof(PNode));
        if (NULL == pNewTable)
        {
            assert(0);
            return;
        }
        memset(pNewTable, 0, sizeof(PNode)*newCapacity);
        pHB->_capacity = newCapacity;
        //拷贝元素
        for (; i < oldCapacity; i++)
        {
            //将i号筒中的所有节点重新哈希
            PNode pCur = pHB->_table[i];
            while (pCur)
            {
                hashAddr = HashFunc(pHB, pCur->_data);

                //将pCur从原链表中删除

                pHB->_table[i] = pCur->_pNext;
                //将pCur插入到新链表中

                pCur->_pNext = pNewTable[hashAddr];
                pNewTable[hashAddr] = pCur;

                //取原链表中下一个节点

                pCur = pHB->_table[i];
            }
        }

        free(pHB->_table);
        pHB->_table = pNewTable;

    }
}

void InsertHashBucket(HashBucket* pHB, HBDataType data)
{
    //PNode pCur = NULL;
    PNode NewNode = NULL;
    int hashAddr = 0;
    assert(pHB);

    _CheckCapacity(pHB);//判断是否哈希桶挂满


    hashAddr = HashFunc(pHB, data);

    //pCur = pHB->_table[hashAddr];
    NewNode = BuyNode(data);
    NewNode->_pNext = pHB->_table[hashAddr];
    pHB->_table[hashAddr] = NewNode;
    pHB->_size++;
}

int DeleteHashBucket(HashBucket* pHB, HBDataType data)
{
    assert(pHB);
    PNode* pPre = NULL;
    int hashAddr = HashFunc(pHB, data);
    PNode pCur = pHB->_table[hashAddr];
    pPre = &pHB->_table[hashAddr];
    while (pCur)
    {

        if (pCur->_data == data)
        {
            *pPre = pCur->_pNext;
            free(pCur);
            pCur = NULL;
            pHB->_size--;
            return 1;
        }
        *pPre = pCur;
        pCur = pCur->_pNext;
    }
    return 0;
}

int FindHashBucket(HashBucket* pHB, HBDataType data)
{
    assert(pHB);
    PNode pPre = NULL;
    int hashAddr = HashFunc(pHB, data);
    PNode pCur = pHB->_table[hashAddr];
    while (pCur)
    {
        pPre = pCur;
        if (pCur->_data == data)
        {
            return 1;
        }
        pCur = pCur->_pNext;
    }
    return 0;
}

int EmptyHashBucket(HashBucket* pHB)
{
    assert(pHB);
    return !(pHB->_size);
}
int SizeHashBucket(HashBucket* pHB)
{
    assert(pHB);
    return pHB->_size;
}

void DestroyHashBucket(HashBucket* pHB)
{
    int i = 0;
    assert(pHB);

    for(; i<pHB->_capacity ;i++)
    {
        PNode pCur = pHB->_table[i];
        while (pCur)
        {
            pHB->_table[i] = pCur->_pNext;
            free(pCur);
            pCur = pHB->_table[i];
        }
    }
    pHB->_size = 0;
    pHB->_capacity = 0;
}

开散列和闭散列的实现思路都差不多,只是闭散列是用了动态数组,而开散列是用来存储指向链表的地址的动态数组来实现