欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Huffman Tree——文件压缩

程序员文章站 2022-07-02 20:30:39
...

1 Huffman树的压缩和解压缩原理

Huffman Tree——文件压缩
2 文件压缩(Huffman树的实战应用)
* 代码实现*

//Heap.h
#pragma once
#include <iostream>
#include<string>
#include<vector>
#include"assert.h"
using namespace std;

template<class T, class Compare>
class Heap
{
public:
    Heap()
        :_a(0)
    {   }

    //建堆  
    Heap(T* a, int sz)
    {
        _a.resize(sz);
        int i = 0;
        for (i = 0; i < sz; i++)
        {
            _a[i] = a[i];
        }
        for (i = (_a.size() - 2) / 2; i >= 0; i--)//外层循环,每次传入根结点下标。(_a.size()-2)/2即为最后一个非叶子结点下标。  
        {
            AdjustDown(i);
        }
    }

    void AdjustDown(int root)//向下调整
    {
        Compare com;
        int parent = root;
        int child = parent * 2 + 1;
        while (child < _a.size())
        {
            if (child + 1 < _a.size()
                && com(_a[child + 1], _a[child]))//比较孩子
            {
                ++child;
            }
            if (com(_a[child], _a[parent]))
            {
                swap(_a[child], _a[parent]);
                parent = child;
                child = parent * 2 + 1;
            }
            else   //表示父亲大于孩子
            {
                break;
            }
        }
    }
    void AdjustUp(int child)//向上调整
    {
        Compare com;
        int parent = (child - 1) / 2;
        while (child > 0)//这里的parent不可能为负数
        {
            //if (_a[child]>_a[parent])
            if (com(_a[child], _a[parent]))
            {
                swap(_a[child], _a[parent]);
                child = parent;
                parent = (child - 1) / 2;
            }
            else
            {
                break;
            }
        }
    }
    void Push(const T&x)
    {
        _a.push_back(x);
        AdjustUp(_a.size() - 1);
    }
    void Pop()
    {
        assert(!_a.empty());
        swap(_a[0], _a[_a.size() - 1]);
        _a.pop_back();
        AdjustDown(0);
    }

    const T&Top()
    {
        return _a[0];
    }

    size_t Size()
    {
        return _a.size();
    }
private:
    vector<T> _a;//层序存储
};
template<class T>
struct Less
{
    bool operator()(const T& l, const T& r)
    {
        return l < r;
    }
};
template<class T>
struct Greater
{
    bool operator()(const T& l, const T& r)
    {
        return l > r;
    }
};

void Testheap()
{
    int array[8] = { 8, 1, 14, 3, 21, 5, 7, 10 };
    Heap<int, Less<int>> h(array, 8);
    //int array[] = { 0, 1, 6, 2, 3, 9, 4, 5, 8, 7 };
    //Heap<int> h(array, 10);
    cout << h.Top() << endl;
    h.Push(6);
    h.Pop();
    cout << h.Size() << endl;
}
//HuffmanTree.h
#pragma once
//包堆的头文件
#include"Heap.h"

template<class W>//W存权值
struct HuffmanTreeNode
{
    HuffmanTreeNode<W>* _left;
    HuffmanTreeNode<W>* _right;
    HuffmanTreeNode<W>* _parent;

    W _w;

    HuffmanTreeNode(const W& w)
        :_w(w)
        , _left(NULL)
        , _right(NULL)
        , _parent(NULL)
    {}
};

template<class W>
class HuffmanTree//用堆来构建哈夫曼树
{
    typedef HuffmanTreeNode<W> Node;
public:
    HuffmanTree()
        :_root(NULL)
    {}

    HuffmanTree(W *a, size_t n, const W& invalid)//只比较出现的字符,部分比较,所以使用非法值invalid
    {
        struct NodeCompare
        {
            bool operator()(Node* l, Node* r)//按权值比较
            {
                return l->_w < r->_w;
            }
        };
        Heap<Node*, NodeCompare> minHeap;//根据W中的权值比较而不是指针比较

        for (size_t i = 0; i < n; ++i)
        {
            if (a[i] != invalid)//限定比较的字符
            {
                minHeap.Push(new Node(a[i]));
            }
        }

        while (minHeap.Size()>1)//小堆
        {
            Node* left = minHeap.Top();
            minHeap.Pop();

            Node* right = minHeap.Top();
            minHeap.Pop();

            Node* parent = new Node(left->_w + right->_w);
            parent->_left = left;
            parent->_right = right;

            left->_parent = parent;
            right->_parent = parent;

            minHeap.Push(parent);
        }
        _root = minHeap.Top();
    }
    Node* GetRoot()
    {
        return _root;
    }

protected:
    Node* _root;
};

void TestHuffmanTree()
{
    int a[] = { 1, 2, 3, 4 };
    HuffmanTree<int> t(a, sizeof(a) / sizeof(a[0]), 0);
}
//FileCompress.h
#pragma once 
#include <iostream>
#include<string>
#include<algorithm>
#include"stdio.h""
#include"assert.h"
#include"HuffmanTree.h" 

using namespace std;

struct CharInfo
{

    char _ch; // 字符 
    long long _count; // 文件中出现的次数 
    string _code; // huffman code 

    bool operator!=(const CharInfo &info)
    {
        return _count != info._count;
    }
    CharInfo operator+(const CharInfo &info)
    {
        CharInfo ret;
        ret._count = _count + info._count;
        return ret;
    }
    bool operator<(const CharInfo &info)
    {
        return _count < info._count;
    }
};

class FileCompress
{
    typedef HuffmanTreeNode<CharInfo> Node;
    struct ConfigInfo//解压缩配置
    {
        char _ch;
        size_t _count;
    };
public:
    FileCompress()
    {
        for (size_t i = 0; i < 256; ++i)
        {
            _infos[i]._ch = i;
            _infos[i]._count = 0;
            _infos[i]._code = "";
        }
    }

    void GetHuffmanCode(Node* root)//方法1
    {
        if (root == NULL)
        {
            return;
        }
        GetHuffmanCode(root->_left);
        GetHuffmanCode(root->_right);
        if (root->_left == NULL&&root->_right == NULL)
        {
            string &code = _infos[root->_w._ch]._code;
            Node* cur = root;
            Node* parent = cur->_parent;
            while (parent)
            {
                if (cur == parent->_left)
                {
                    code.push_back('0');
                }
                else
                {
                    code.push_back('1');
                }
                cur = parent;
                parent = cur->_parent;
            }
            reverse(code.begin(), code.end());//逆置,借助sting容器的迭代器
            return;
        }
    }

    //方法2
    void GetHuffmanCode(Node* root, string code)
    {
        if (root == NULL)
        {
            return;
        }
        if (root->_left == NULL&&root->_right == NULL)
        {
            //叶子
            _infos[(unsigned char)root->_w._ch]._code = code;
            return;
        }
        GetHuffmanCode(root->_left, code + '0');
        GetHuffmanCode(root->_right, code + '1');
    }
    // Input.txt->Input.txt.huffman 
    void Compress(const char* file)//文件压缩
    {
        //1.统计字符出现次数
        FILE*fout = fopen(file, "r");
        assert(fout!=NULL);
        char ch = fgetc(fout);
        while (ch != EOF)
        {
            _infos[ch]._count++;
            ch = fgetc(fout);
        }

        //2.构建Huffman树
        CharInfo invalid;
        invalid._count = 0;
        HuffmanTree<CharInfo> tree(_infos, 256, invalid);

        //3.生成编码(存成字符串)
        //方法一:倒着走,三叉链
        //GetHuffmanCode(tree.GetRoot());

        //方法二:
        string code = "";
        GetHuffmanCode(tree.GetRoot(), code);

        //4.压缩
        string compressFile = file;
        long long count = 0;
        compressFile += ".Huffman";
        FILE* fin = fopen(compressFile.c_str(), "w");
        assert(fin);
        //写字符出现的次数到压缩文件---解压缩时候需要重建Huffman tree

        ConfigInfo info;
        for (size_t i = 0; i < 256; ++i)
        {
            if (_infos[i]._count >0)
            {
                //二进制形式写字符串

                info._ch = _infos[i]._ch;
                info._count = _infos[i]._count;
                fwrite(&info, sizeof(ConfigInfo), 1, fin);//
            }
        }
        info._count = 0;//在最后加上一个0,用来分隔
        fwrite(&info, sizeof(ConfigInfo), 1, fin);

        fseek(fout, 0, SEEK_SET);
        ch = fgetc(fout);
        char value = 0;
        int pos = 0;
        //while (ch != EOF)
        while (!feof(fout))
        {
            string &code = _infos[(unsigned char)ch]._code;
            for (size_t i = 0; i < code.size(); i++)
            {
                if (code[i] == '0')
                {
                    value &= ~(1 << pos);
                }
                else if (code[i] == '1')
                {
                    value |= (1 << pos);
                }
                else
                {
                    assert(false);
                }
                ++pos;
                if (pos == 8)
                {
                    count++;
                    fputc(value, fin);
                    value = 0;
                    pos = 0;
                }
            }
            ch = fgetc(fout);
        }
        cout << "压缩字符数:" << count << endl;
        //补齐剩余位
        if (pos > 0)
        {
            fputc(value, fin);
        }
        fclose(fout);
        fclose(fin);
    }



    // Input.txt.huffman -> Input.txt.unhuffman 
    void Uncompress(const char* file)//解压
    {
        assert(file);
        string uncompressFile = file;
        size_t pos = uncompressFile.rfind('.');
        assert(pos != string::npos);
        uncompressFile.erase(pos, uncompressFile.size()-pos);
//#ifdef _DEBUG_
        uncompressFile += ".UnHuffman";
//#endif
        FILE *fin = fopen(uncompressFile.c_str(), "w");
        assert(fin);
        FILE* fout = fopen(file, "r");
        assert(fout);
        ////先读入配置信息---字符出现的次数
        ConfigInfo info;
        while (1)
        {
            fread(&info, sizeof(ConfigInfo), 1, fout);
            if (info._count == 0)
            {
                break;
            }
            else
            {
                _infos[(unsigned char)info._ch]._ch = info._ch;
                _infos[(unsigned char)info._ch]._count = info._count;
            }
        }
        //1.重建Huffman树
        CharInfo invalid;
        invalid._count = 0;
        HuffmanTree<CharInfo> tree(_infos, 256, invalid);
        Node* root = tree.GetRoot();
        Node* cur = root;
        long long filesize = root->_w._count;
        //2.解压缩
        char value = fgetc(fout);
        long long count = 0;
        while (!feof(fout))
        {
            count++;
            for (int pos = 0; pos < 8; ++pos)
            {
                if (value&(1 << pos))//pos位为1
                {
                    cur = cur->_right;
                }
                else
                {
                    cur = cur->_left;
                }
                if (cur->_left == NULL&&cur->_right == NULL)
                {
                    fputc(cur->_w._ch, fin);
                    cur = root;
                }
                if (--filesize == 0)
                {
                    break;
                }
            }
            value = fgetc(fout);
        }
        cout << "解压缩字符数:" << count << endl;
        fclose(fout);
        fclose(fin);
    }
protected:
    CharInfo _infos[256];
};

void TestFileCompress()
{
    FileCompress fc;
    /*fc.Compress("C:\\Users\\Administrator\\Desktop\\CNN.png");
    fc.Uncompress("C:\\Users\\Administrator\\Desktop\\CNN.png.huffman");*/
    //fc.Compress("C:\\Users\\Administrator\\Desktop\\1.jpg");
    fc.Uncompress("C:\\Users\\Administrator\\Desktop\\1.jpg"); 
}
//test.cpp
#define _CRT_SECURE_NO_WARNINGS 1
#include <iostream>
#include<string>
#include<vector>
#include"assert.h"

#include"HuffmanTree.h"
#include"FileCompress.h"
#include"Heap.h" 

int main()
{
    Testheap();
    TestHuffman();
    TestFileCompress();

    system("pause");
    return 0;
}

不足:
能够对文本文件进行压缩,对于图片和音频上述程序还不能正确解压缩