欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Huffman Encode 哈夫曼编码 c++实现

程序员文章站 2022-04-30 22:41:44
...

基本原理

相关文章很多,不再赘述
参考:哈夫曼(赫夫曼,哈弗曼)编码算法(带源码+解析)

实现思路

设计一个Huffman类实现编码操作更为直观
注意:以下类的实现包含了头文件string,map,vector,algorithm等

树节点

struct treeNode
{
    treeNode *left;
    treeNode *right;
    char ch;//此节点储存的待编码字符
    int weight; //字符所占权重(频数)
    treeNode() { left = right = nullptr; ch = '\0';weight = 0;}
    treeNode(treeNode* l,treeNode* r,char c,int w):left(l),right(r),ch(c),weight(w){}
    bool isLeaf() { return !left && !right; }
};

Huffman树

struct HuffmanTree
{
    treeNode* head;
    vector<treeNode> nodeSet;//储存待生成的树节点
    HuffmanTree() {}
    HuffmanTree(map<char,int> weightMap)//以存有字符频数的vector初始化nodeSet
    {
        for(auto & i:weightMap)
            nodeSet.push_back(treeNode(nullptr,nullptr,i.first, i.second));
    }
    void HtreeTraverse(treeNode* root, string& code)//遍历H树(对输出code的操作并不美观hh)
    {
        string tempCodeL = code;
        string tempCodeR = code;
        tempCodeL += '0';
        tempCodeR += '1';
        if (root->isLeaf())
            cout << root->ch << "  " << code<<endl;
        else
        {
            HtreeTraverse(root->left,tempCodeL);
            HtreeTraverse(root->right,tempCodeR);
        }
    }
    static bool treeNodeCmp(treeNode t1, treeNode t2) { return t1.weight > t2.weight; }//用于sort算法
};

编码函数

void HuffmanEncode(string raw_str)
{
    //统计字符
    map<char, int> count;
    for(auto & i:raw_str)   
        count[i]++;
    //构造Huffman tree
    HuffmanTree Htree(count);
    //当待生成的节点大于1时
    while (Htree.nodeSet.size() > 1)
    {
        treeNode* tempLeft = new treeNode((Htree.nodeSet.end()-1)->left,(Htree.nodeSet.end()-1)->right,(Htree.nodeSet.end()-1)->ch,(Htree.nodeSet.end()-1)->weight);
        treeNode* tempRight = new treeNode((Htree.nodeSet.end()-2)->left,(Htree.nodeSet.end()-2)->right,(Htree.nodeSet.end()-2)->ch,(Htree.nodeSet.end()-2)->weight);
        //更新head
        Htree.head = new treeNode(tempLeft,tempRight,'\0',(Htree.nodeSet.end()-1)->weight+(Htree.nodeSet.end()-2)->weight);
        Htree.nodeSet.pop_back();//弹出已编码的节点
        Htree.nodeSet.pop_back();
        Htree.nodeSet.push_back(*Htree.head);//推入新生成节点
        sort(Htree.nodeSet.begin(), Htree.nodeSet.end(), HuffmanTree::treeNodeCmp);
    }
    //遍历树并输出结果
    string str = " ";
    Htree.HtreeTraverse(Htree.head,str);
}

测试(汇总)

#include <iostream>
#include <algorithm>
#include <map>
#include <string>
#include <vector>
using namespace std;
struct treeNode
{
    treeNode *left;
    treeNode *right;
    char ch;    //此节点储存的待编码字符
    int weight; //字符所占权重(频数)
    treeNode()
    {
        left = right = nullptr;
        ch = '\0';
        weight = 0;
    }
    treeNode(treeNode *l, treeNode *r, char c, int w) : left(l), right(r), ch(c), weight(w) {}
    bool isLeaf() { return !left && !right; }
};

struct HuffmanTree
{
    treeNode* head;
    vector<treeNode> nodeSet;//储存待生成的树节点
    HuffmanTree() {}
    HuffmanTree(map<char,int> weightMap)//以存有字符频数的vector初始化nodeSet
    {
        for(auto & i:weightMap)
            nodeSet.push_back(treeNode(nullptr,nullptr,i.first, i.second));
    }
    void HtreeTraverse(treeNode* root, string& code)//遍历H树(对输出code的操作并不美观hh)
    {
        string tempCodeL = code;
        string tempCodeR = code;
        tempCodeL += '0';
        tempCodeR += '1';
        if (root->isLeaf())
            cout << root->ch << "  " << code<<endl;
        else
        {
            HtreeTraverse(root->left,tempCodeL);
            HtreeTraverse(root->right,tempCodeR);
        }
        
    }
    static bool treeNodeCmp(treeNode t1, treeNode t2) { return t1.weight > t2.weight; }//用于sort算法
};

void HuffmanEncode(string raw_str)
{
    //统计字符
    map<char, int> count;
    for (auto &i : raw_str)
        count[i]++;
    //构造Huffman tree
    HuffmanTree Htree(count);
    //当待生成的节点大于1时
    while (Htree.nodeSet.size() > 1)
    {
        treeNode *tempLeft = new treeNode((Htree.nodeSet.end() - 1)->left, (Htree.nodeSet.end() - 1)->right, (Htree.nodeSet.end() - 1)->ch, (Htree.nodeSet.end() - 1)->weight);
        treeNode *tempRight = new treeNode((Htree.nodeSet.end() - 2)->left, (Htree.nodeSet.end() - 2)->right, (Htree.nodeSet.end() - 2)->ch, (Htree.nodeSet.end() - 2)->weight);
        //更新head
        Htree.head = new treeNode(tempLeft, tempRight, '\0', (Htree.nodeSet.end() - 1)->weight + (Htree.nodeSet.end() - 2)->weight);
        Htree.nodeSet.pop_back(); //弹出已编码的节点
        Htree.nodeSet.pop_back();
        Htree.nodeSet.push_back(*Htree.head); //推入新生成节点
        sort(Htree.nodeSet.begin(), Htree.nodeSet.end(), HuffmanTree::treeNodeCmp);
    }
    //遍历树并输出结果
    string str = " ";
    Htree.HtreeTraverse(Htree.head, str);
}

int main()
{
    string raw_str;
    cout << "Raw string without space: ";//cin不能直接取得带空格的字符串
    cin >> raw_str;
    HuffmanEncode(raw_str);
    system("pause");
    return 0;
}

结果:
Huffman Encode 哈夫曼编码 c++实现

几个要点(遇到的错误)

  • treeNodeCmp()的参数不能为指针,因为sort()函数的要求,具体原因待了解
  • 构造tempLeft注意指针(并且少重载构造函数,血泪啊)
  • 生成树的while循环中,注意终止条件(开始我居然令它不为零时,发生了奇奇怪怪的错误)
  • 编程过程都尽量实现“高内聚低耦合”吧,虽然是个小程序,但遵循这个原则会使得改代码时非常省事

zinkt