Huffman Tree——文件压缩
程序员文章站
2022-07-02 20:30:39
...
1 Huffman树的压缩和解压缩原理
2 文件压缩(Huffman树的实战应用)
* 代码实现*
//Heap.h
#pragma once
#include <iostream>
#include<string>
#include<vector>
#include"assert.h"
using namespace std;
template<class T, class Compare>
class Heap
{
public:
Heap()
:_a(0)
{ }
//建堆
Heap(T* a, int sz)
{
_a.resize(sz);
int i = 0;
for (i = 0; i < sz; i++)
{
_a[i] = a[i];
}
for (i = (_a.size() - 2) / 2; i >= 0; i--)//外层循环,每次传入根结点下标。(_a.size()-2)/2即为最后一个非叶子结点下标。
{
AdjustDown(i);
}
}
void AdjustDown(int root)//向下调整
{
Compare com;
int parent = root;
int child = parent * 2 + 1;
while (child < _a.size())
{
if (child + 1 < _a.size()
&& com(_a[child + 1], _a[child]))//比较孩子
{
++child;
}
if (com(_a[child], _a[parent]))
{
swap(_a[child], _a[parent]);
parent = child;
child = parent * 2 + 1;
}
else //表示父亲大于孩子
{
break;
}
}
}
void AdjustUp(int child)//向上调整
{
Compare com;
int parent = (child - 1) / 2;
while (child > 0)//这里的parent不可能为负数
{
//if (_a[child]>_a[parent])
if (com(_a[child], _a[parent]))
{
swap(_a[child], _a[parent]);
child = parent;
parent = (child - 1) / 2;
}
else
{
break;
}
}
}
void Push(const T&x)
{
_a.push_back(x);
AdjustUp(_a.size() - 1);
}
void Pop()
{
assert(!_a.empty());
swap(_a[0], _a[_a.size() - 1]);
_a.pop_back();
AdjustDown(0);
}
const T&Top()
{
return _a[0];
}
size_t Size()
{
return _a.size();
}
private:
vector<T> _a;//层序存储
};
template<class T>
struct Less
{
bool operator()(const T& l, const T& r)
{
return l < r;
}
};
template<class T>
struct Greater
{
bool operator()(const T& l, const T& r)
{
return l > r;
}
};
void Testheap()
{
int array[8] = { 8, 1, 14, 3, 21, 5, 7, 10 };
Heap<int, Less<int>> h(array, 8);
//int array[] = { 0, 1, 6, 2, 3, 9, 4, 5, 8, 7 };
//Heap<int> h(array, 10);
cout << h.Top() << endl;
h.Push(6);
h.Pop();
cout << h.Size() << endl;
}
//HuffmanTree.h
#pragma once
//包堆的头文件
#include"Heap.h"
template<class W>//W存权值
struct HuffmanTreeNode
{
HuffmanTreeNode<W>* _left;
HuffmanTreeNode<W>* _right;
HuffmanTreeNode<W>* _parent;
W _w;
HuffmanTreeNode(const W& w)
:_w(w)
, _left(NULL)
, _right(NULL)
, _parent(NULL)
{}
};
template<class W>
class HuffmanTree//用堆来构建哈夫曼树
{
typedef HuffmanTreeNode<W> Node;
public:
HuffmanTree()
:_root(NULL)
{}
HuffmanTree(W *a, size_t n, const W& invalid)//只比较出现的字符,部分比较,所以使用非法值invalid
{
struct NodeCompare
{
bool operator()(Node* l, Node* r)//按权值比较
{
return l->_w < r->_w;
}
};
Heap<Node*, NodeCompare> minHeap;//根据W中的权值比较而不是指针比较
for (size_t i = 0; i < n; ++i)
{
if (a[i] != invalid)//限定比较的字符
{
minHeap.Push(new Node(a[i]));
}
}
while (minHeap.Size()>1)//小堆
{
Node* left = minHeap.Top();
minHeap.Pop();
Node* right = minHeap.Top();
minHeap.Pop();
Node* parent = new Node(left->_w + right->_w);
parent->_left = left;
parent->_right = right;
left->_parent = parent;
right->_parent = parent;
minHeap.Push(parent);
}
_root = minHeap.Top();
}
Node* GetRoot()
{
return _root;
}
protected:
Node* _root;
};
void TestHuffmanTree()
{
int a[] = { 1, 2, 3, 4 };
HuffmanTree<int> t(a, sizeof(a) / sizeof(a[0]), 0);
}
//FileCompress.h
#pragma once
#include <iostream>
#include<string>
#include<algorithm>
#include"stdio.h""
#include"assert.h"
#include"HuffmanTree.h"
using namespace std;
struct CharInfo
{
char _ch; // 字符
long long _count; // 文件中出现的次数
string _code; // huffman code
bool operator!=(const CharInfo &info)
{
return _count != info._count;
}
CharInfo operator+(const CharInfo &info)
{
CharInfo ret;
ret._count = _count + info._count;
return ret;
}
bool operator<(const CharInfo &info)
{
return _count < info._count;
}
};
class FileCompress
{
typedef HuffmanTreeNode<CharInfo> Node;
struct ConfigInfo//解压缩配置
{
char _ch;
size_t _count;
};
public:
FileCompress()
{
for (size_t i = 0; i < 256; ++i)
{
_infos[i]._ch = i;
_infos[i]._count = 0;
_infos[i]._code = "";
}
}
void GetHuffmanCode(Node* root)//方法1
{
if (root == NULL)
{
return;
}
GetHuffmanCode(root->_left);
GetHuffmanCode(root->_right);
if (root->_left == NULL&&root->_right == NULL)
{
string &code = _infos[root->_w._ch]._code;
Node* cur = root;
Node* parent = cur->_parent;
while (parent)
{
if (cur == parent->_left)
{
code.push_back('0');
}
else
{
code.push_back('1');
}
cur = parent;
parent = cur->_parent;
}
reverse(code.begin(), code.end());//逆置,借助sting容器的迭代器
return;
}
}
//方法2
void GetHuffmanCode(Node* root, string code)
{
if (root == NULL)
{
return;
}
if (root->_left == NULL&&root->_right == NULL)
{
//叶子
_infos[(unsigned char)root->_w._ch]._code = code;
return;
}
GetHuffmanCode(root->_left, code + '0');
GetHuffmanCode(root->_right, code + '1');
}
// Input.txt->Input.txt.huffman
void Compress(const char* file)//文件压缩
{
//1.统计字符出现次数
FILE*fout = fopen(file, "r");
assert(fout!=NULL);
char ch = fgetc(fout);
while (ch != EOF)
{
_infos[ch]._count++;
ch = fgetc(fout);
}
//2.构建Huffman树
CharInfo invalid;
invalid._count = 0;
HuffmanTree<CharInfo> tree(_infos, 256, invalid);
//3.生成编码(存成字符串)
//方法一:倒着走,三叉链
//GetHuffmanCode(tree.GetRoot());
//方法二:
string code = "";
GetHuffmanCode(tree.GetRoot(), code);
//4.压缩
string compressFile = file;
long long count = 0;
compressFile += ".Huffman";
FILE* fin = fopen(compressFile.c_str(), "w");
assert(fin);
//写字符出现的次数到压缩文件---解压缩时候需要重建Huffman tree
ConfigInfo info;
for (size_t i = 0; i < 256; ++i)
{
if (_infos[i]._count >0)
{
//二进制形式写字符串
info._ch = _infos[i]._ch;
info._count = _infos[i]._count;
fwrite(&info, sizeof(ConfigInfo), 1, fin);//
}
}
info._count = 0;//在最后加上一个0,用来分隔
fwrite(&info, sizeof(ConfigInfo), 1, fin);
fseek(fout, 0, SEEK_SET);
ch = fgetc(fout);
char value = 0;
int pos = 0;
//while (ch != EOF)
while (!feof(fout))
{
string &code = _infos[(unsigned char)ch]._code;
for (size_t i = 0; i < code.size(); i++)
{
if (code[i] == '0')
{
value &= ~(1 << pos);
}
else if (code[i] == '1')
{
value |= (1 << pos);
}
else
{
assert(false);
}
++pos;
if (pos == 8)
{
count++;
fputc(value, fin);
value = 0;
pos = 0;
}
}
ch = fgetc(fout);
}
cout << "压缩字符数:" << count << endl;
//补齐剩余位
if (pos > 0)
{
fputc(value, fin);
}
fclose(fout);
fclose(fin);
}
// Input.txt.huffman -> Input.txt.unhuffman
void Uncompress(const char* file)//解压
{
assert(file);
string uncompressFile = file;
size_t pos = uncompressFile.rfind('.');
assert(pos != string::npos);
uncompressFile.erase(pos, uncompressFile.size()-pos);
//#ifdef _DEBUG_
uncompressFile += ".UnHuffman";
//#endif
FILE *fin = fopen(uncompressFile.c_str(), "w");
assert(fin);
FILE* fout = fopen(file, "r");
assert(fout);
////先读入配置信息---字符出现的次数
ConfigInfo info;
while (1)
{
fread(&info, sizeof(ConfigInfo), 1, fout);
if (info._count == 0)
{
break;
}
else
{
_infos[(unsigned char)info._ch]._ch = info._ch;
_infos[(unsigned char)info._ch]._count = info._count;
}
}
//1.重建Huffman树
CharInfo invalid;
invalid._count = 0;
HuffmanTree<CharInfo> tree(_infos, 256, invalid);
Node* root = tree.GetRoot();
Node* cur = root;
long long filesize = root->_w._count;
//2.解压缩
char value = fgetc(fout);
long long count = 0;
while (!feof(fout))
{
count++;
for (int pos = 0; pos < 8; ++pos)
{
if (value&(1 << pos))//pos位为1
{
cur = cur->_right;
}
else
{
cur = cur->_left;
}
if (cur->_left == NULL&&cur->_right == NULL)
{
fputc(cur->_w._ch, fin);
cur = root;
}
if (--filesize == 0)
{
break;
}
}
value = fgetc(fout);
}
cout << "解压缩字符数:" << count << endl;
fclose(fout);
fclose(fin);
}
protected:
CharInfo _infos[256];
};
void TestFileCompress()
{
FileCompress fc;
/*fc.Compress("C:\\Users\\Administrator\\Desktop\\CNN.png");
fc.Uncompress("C:\\Users\\Administrator\\Desktop\\CNN.png.huffman");*/
//fc.Compress("C:\\Users\\Administrator\\Desktop\\1.jpg");
fc.Uncompress("C:\\Users\\Administrator\\Desktop\\1.jpg");
}
//test.cpp
#define _CRT_SECURE_NO_WARNINGS 1
#include <iostream>
#include<string>
#include<vector>
#include"assert.h"
#include"HuffmanTree.h"
#include"FileCompress.h"
#include"Heap.h"
int main()
{
Testheap();
TestHuffman();
TestFileCompress();
system("pause");
return 0;
}
不足:
能够对文本文件进行压缩,对于图片和音频上述程序还不能正确解压缩