欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

基于Huffman编码的压缩和解压缩小软件(附C++源码)

程序员文章站 2022-07-02 20:46:26
...

基于Huffman编码的压缩和解压缩小软件(附C++源码)

压缩前:

基于Huffman编码的压缩和解压缩小软件(附C++源码)

将pic.png拖到.exe文件上,可得到.zLzip压缩文件:

基于Huffman编码的压缩和解压缩小软件(附C++源码)

编码过程:

基于Huffman编码的压缩和解压缩小软件(附C++源码)

压缩过程:

基于Huffman编码的压缩和解压缩小软件(附C++源码)

将.zLzip压缩文件拖回可解压缩得到原文件:

基于Huffman编码的压缩和解压缩小软件(附C++源码)
基于Huffman编码的压缩和解压缩小软件(附C++源码)
基于Huffman编码的压缩和解压缩小软件(附C++源码)
基于Huffman编码的压缩和解压缩小软件(附C++源码)

顺便一提,当原文件内字符分布均衡时,其信息熵很低,压缩效果不太好。

代码如下(编译器是TDM-GCC 4.9.2 64-bit Release, C++11标准)

main.cpp文件:

#include "HuffmanEncoderCompress.h"

void test();
void test2(int argc, char** argv);

int main(int argc, char** argv) {
	

//	test();					// 本地测试
	
	test2(argc, argv);		// 界面拖动 
	
	return 0;
}


// 本地测试 
void test() {
	
	HuffmanEncoderCompress* hecp = new HuffmanEncoderCompress("test\\pic.png");
	
	hecp->run();
//	hecp->printInfo();

	int n = 0;
	for (int i = 0; i < n; ++i) {
		HuffmanEncoderCompress* hecp2 = new HuffmanEncoderCompress(hecp->getOutputFileName(), true);
		hecp2->run();
		delete hecp;
		hecp = hecp2;
	}
	
	delete hecp;
}


// 读取输入(可在界面中拖动) 
void test2(int argc, char** argv) {
	if (argc < 2) exit(-1);
	
	HuffmanEncoderCompress* hecp = new HuffmanEncoderCompress(argv[1]);

	hecp->run();
	
	delete hecp;
	getchar();		// 防止运行界面闪退 
}

HuffmanEncoderCompress.h头文件:

#ifndef __HUFFMAN_ENCODER_COMPRESS_H__
#define __HUFFMAN_ENCODER_COMPRESS_H__

#include <string>
using std::string;


constexpr int _CODE_NUM = 256;			// 码数 
constexpr int _ZIP_NAME_LEN = 16;		// 压缩识别符长度 
constexpr int _FILE_NAME_LEN = 256;		//  文件名长度 
constexpr int _FILE_SIZE_LEN = 8; 		// 文件大小值长度 
constexpr int _CODE_FREQUENCY_LEN = 8;	//  码频值长度 
constexpr int _ZLZIP_HEAD_LEN = _ZIP_NAME_LEN + _FILE_NAME_LEN + _FILE_SIZE_LEN 	// 头文件信息长度 
		+ _CODE_FREQUENCY_LEN * _CODE_NUM;	
static const char _ZIP_NAME[_ZIP_NAME_LEN] = "zLimbo zLzip";	// 压缩 识别符 
		
		
// 编码类 
struct Code {
	unsigned char oldCode;				// 旧码 
	unsigned long long frequency;		// 频率 
	unsigned long long newCode;			// 新码 
	string newCodeStr;					// 新码字符串表示 debug 
	int length;							// 新码长度 
	
	Code(): oldCode(0), frequency(0), newCode(0), length(0) { }
};


// Huffman 节点 
struct HuffmanTreeNode {
	
	unsigned long long weight;			// 权重 
	Code* codePtr; 						// 指向码(叶子节点才有指向) 
	HuffmanTreeNode* left;				// 左分支 
	HuffmanTreeNode* right;				// 右分支 

	
	HuffmanTreeNode(unsigned long long w = 0, Code* cp = nullptr, 
			HuffmanTreeNode* l = nullptr, HuffmanTreeNode* r = nullptr):
		weight(w), codePtr(cp), left(l), right(r)
	{ }
};


// 自定义比较器,用于优先队列 
class CmparatorOfHuffmanTreeNode {
	public:
		 bool operator() (HuffmanTreeNode*& lhs, HuffmanTreeNode*& rhs) const {
	        return lhs->weight > rhs->weight;
	    }
};


class HuffmanEncoderCompress {
	
	private:
		
		Code _codes[_CODE_NUM];		// 码 
		
		bool _isCompress; 				// 是否压缩(用于多次压缩 ) 
		
		string _inputFileName;			// 输入文件名 
		string _outputFileName;			// 输出文件名 
		
		unsigned long long _inputFileSize;		// 输入文件大小 
		unsigned long long _outputFileSize;		// 输出文件大小 
		
		HuffmanTreeNode* _huffmanTreeRoot;		// Huffman树根节点 
	
	
	private:
		// 释放节点 
		freeNode(HuffmanTreeNode* np);
	
	public:
		// 构造 
		HuffmanEncoderCompress(const string& inputFileName, bool isCompress = false);
		
		// 析构 
		~HuffmanEncoderCompress();
		
		// 无法拷贝构造 
		HuffmanEncoderCompress(const HuffmanEncoderCompress& hec) = delete;
		
		// 运行 
		void run();
		
		// 统计频率 
		void statisticalFrequency();
		
		// 构建 Huffman 树 
		HuffmanTreeNode* buildHuffmanTree();
		
		// 获得 Huffman 编码 
		void getNewCodes(HuffmanTreeNode* np, unsigned long long newCode, string newCodeStr, int length);
		
		// 压缩文件 
		void compress();
		
		// 搜索节点,辅助于解码 
		bool findNode(HuffmanTreeNode*& np, unsigned char inputByte, int& pos);
		
		// 解压缩 
		void uncompress();
		
		// 打印哈夫曼编码信息 
		void printHuffmanEncodeInfo();
		
		// 打印压缩或解压缩信息 
		void printInfo(const char* type);
		
		// 获取输出文件名 
		string getOutputFileName() const { return _outputFileName; }
		
		// 判断两个文件是否相等 
		bool compare2File(const string& fileName1, const string& fileName2);
};


#endif

HuffmanEncoderCompress.cpp 文件

#include "HuffmanEncoderCompress.h"

#include <cstdio>
#include <cstring>
#include <queue>
using namespace std;


HuffmanEncoderCompress::HuffmanEncoderCompress(const string& inputFileName, bool isCompress):
		_isCompress(isCompress),
		_inputFileName(inputFileName), _outputFileName(inputFileName+".zLzip"), 
		_inputFileSize(0), _outputFileSize(0), _huffmanTreeRoot(nullptr)
{	
	for (int i = 0; i < _CODE_NUM; ++i) _codes[i].oldCode = i;
}


// 释放节点及其子树 
HuffmanEncoderCompress::freeNode(HuffmanTreeNode* np) {
	if (np) {
		freeNode(np->left);
		freeNode(np->right);
		delete np; 
	}
} 
	

// 析构 
HuffmanEncoderCompress::~HuffmanEncoderCompress() {
	freeNode(_huffmanTreeRoot); 
}


// 运行 
void HuffmanEncoderCompress::run() {
	
	FILE *inputFp = NULL;
	if ((inputFp = fopen(_inputFileName.c_str(), "rb")) == NULL) {
		printf("open file %s failed!\n", _inputFileName.c_str());
		exit(-1);
	}
	
	char zipName[_ZIP_NAME_LEN];
	fread(zipName, _ZIP_NAME_LEN, 1, inputFp);

	if (_isCompress || strcmp(zipName, _ZIP_NAME)) {		// 无识别符,非压缩文件 
		
		fclose(inputFp); 
		printf("开始压缩文件%s......\n", _inputFileName.c_str()); 
		printf("正在统计频率......\n");
		statisticalFrequency();			// 统计频率 
		printf("正在构建哈夫曼树......\n");
		_huffmanTreeRoot = buildHuffmanTree();		//  构建哈夫曼树 
		printf("正在产生新编码......\n");
		getNewCodes(_huffmanTreeRoot, 0, string(), 0);	// 获得新编码 
		printHuffmanEncodeInfo();
		printf("正在压缩......\n");
		compress();		// 压缩 
		printInfo("压缩"); 
		printf("压缩成功\n");
		
	} else {		// 有识别符,是压缩文件 
		
		printf("开始解压缩文件%s......\n", _inputFileName.c_str());
		printf("读取原始文件信息......\n"); 
		char outputFileName[_FILE_NAME_LEN];
		fread(outputFileName, _FILE_NAME_LEN, 1, inputFp);	// 读入原文件名 
		printf("原始文件名为%s\n", outputFileName); 
		_outputFileName = string(outputFileName); 
		fread(&_outputFileSize, _FILE_SIZE_LEN, 1, inputFp); 	// 读入原文件大小
		
		for (int i = 0; i < _CODE_NUM; ++i) 		// 读入字符频率表 
			fread(&_codes[i].frequency, _CODE_FREQUENCY_LEN, 1, inputFp);
			
		fclose(inputFp);
		printf("正在构建哈夫曼树......\n");
		_huffmanTreeRoot = buildHuffmanTree();		//  构建哈夫曼树 
		printf("正在产生新编码......\n");
		getNewCodes(_huffmanTreeRoot, 0, string(), 0);	// 获得新编码
		printHuffmanEncodeInfo();
		printf("正在解压缩......\n");
		uncompress();	// 解压缩 
		printInfo("解压缩");
		printf("解压成功\n");
	} 
}

// 统计频率 
void HuffmanEncoderCompress::statisticalFrequency() {
	// 打开文件 
	FILE *inputFp = NULL;
	if ((inputFp = fopen(_inputFileName.c_str(), "rb")) == NULL) {
		printf("open file %s failed!\n", _inputFileName.c_str());
		exit(-1);
	}
	// 统计频率 
	while (!feof(inputFp)) {
		unsigned char inputByte;
		fread(&inputByte, 1, 1, inputFp);
		if (feof(inputFp)) break;
		++_codes[inputByte].frequency;	
		++_inputFileSize;
	}
	fclose(inputFp);	
}


// 构建 Huffman 树 
HuffmanTreeNode* HuffmanEncoderCompress::buildHuffmanTree() {
	// 使用优先队列,自定义比较器 
	priority_queue<HuffmanTreeNode*, vector<HuffmanTreeNode*>, CmparatorOfHuffmanTreeNode> nps;
	// 初始化叶子节点 
	for (int i = 0; i < _CODE_NUM; ++i)
		nps.push(new HuffmanTreeNode(_codes[i].frequency, &_codes[i]));
	// 取最小两个节点合并 
	while (true) {
		HuffmanTreeNode* np1 = nps.top(); nps.pop();
		HuffmanTreeNode* np2 = nps.top(); nps.pop();
		HuffmanTreeNode* np3 = new HuffmanTreeNode(np1->weight + np2->weight, nullptr, np1, np2);
		if (!nps.empty()) nps.push(np3);	
		else return np3;	// 优先队列为空则构成 Huffman 树 
	}
}


// 获得 Huffman 编码 
void HuffmanEncoderCompress::getNewCodes(HuffmanTreeNode* np, unsigned long long newCode, string newCodeStr, int length) {
	// 是叶子节点则结束,记录编码 
	if (np->codePtr) {
		np->codePtr->newCode = newCode;
		np->codePtr->newCodeStr = newCodeStr;
		np->codePtr->length = length;
		return;
	}
	// 否则向左右分支探索 
	newCode <<= 1;
	++length; 
	if (np->left) getNewCodes(np->left, newCode, newCodeStr+"0", length);
	if (np->right) getNewCodes(np->right, newCode+1, newCodeStr+"1", length); 	
} 


// 压缩文件 
void HuffmanEncoderCompress::compress() {
	
	FILE *inputFp = NULL;
	if ((inputFp = fopen(_inputFileName.c_str(), "rb")) == NULL) {
		printf("open file %s failed!\n", _inputFileName.c_str());
		exit(-1);
	}
	
	FILE *outputFp = NULL;
	if ((outputFp = fopen(_outputFileName.c_str(), "wb")) == NULL) {
		printf("open file %s failed!\n", _outputFileName.c_str());
		exit(-1);
	}
	
	// 写入压缩文件头信息 
	fwrite(_ZIP_NAME, _ZIP_NAME_LEN, 1, outputFp);				// 识别符		 
	fwrite(_inputFileName.c_str(), _FILE_NAME_LEN, 1, outputFp);	// 文件名 
	fwrite(&_inputFileSize, _FILE_SIZE_LEN, 1, outputFp); 			// 文件大小 
	for (int i = 0; i < _CODE_NUM; ++i) 				// 字符频率,用以构建Huffman树 
		fwrite(&_codes[i].frequency, _CODE_FREQUENCY_LEN, 1, outputFp);
	
	// 压缩所需临时辅助变量 
	unsigned char inputByte;
	unsigned char outputByte = 0;
	unsigned long long newCode;
	int length;
	int cnt = 0;
	unsigned long long currentInputSize = 0;
	unsigned long long currentOutputSize = 0;
	double currRate = 0.0;
//	string s;		//debug
	
	while (!feof(inputFp)) {
		
		fread(&inputByte, 1, 1, inputFp);
		if (feof(inputFp)) break;
		
		// 记录速率 
		double rate = (double)(++currentInputSize) / _inputFileSize * 100;
		if (rate - currRate >= 10) {
			currRate = rate;
		//	system("cls");
			printf("已压缩:%.1f%%\t压缩率:%.2f%%\n", currRate, (double)_outputFileSize/currentInputSize*100);
		} 
		
		newCode = _codes[(int)inputByte].newCode;
		length = _codes[(int)inputByte].length;
//		printf("# %d(%s) ", newCode, _codes[(int)inputByte].newCodeStr.c_str());	//debug
		
		while (length--) {
			
			outputByte <<= 1;
			outputByte += (newCode>>length) & 1;
			
//			s += (newCode >> length) & 1 ? "1" : "0"; 		//debug
			if (++cnt == 8) {
				
//				printf("\t %d(%s)\n", (int)outputByte, s.c_str());								//debug
//				s.clear();																		//debug
//				system("pause");																//debug
//				if (length > 0) printf("# %s ", _codes[(int)inputByte].newCodeStr.c_str());		//debug
				fwrite(&outputByte, 1, 1, outputFp);
				outputByte = 0;
				cnt = 0;
				++_outputFileSize;
			}
		} 
	}
	
	if (cnt < 8) {	 // 最后一个不足8比特填充 0 
		outputByte <<= 8-cnt;
		fwrite(&outputByte, 1, 1, outputFp);
		++_outputFileSize;
	}
	
	//system("cls");
	// 打印压缩信息 
	printf("已压缩:%.1f%%\t压缩率:%.2f%%\n", 100.0, (double)_outputFileSize/currentInputSize*100);	
	
	fclose(inputFp);
	fclose(outputFp);
}


// 搜索节点,辅助于解码 
bool HuffmanEncoderCompress::findNode(HuffmanTreeNode*& np, unsigned char inputByte, int& pos) {
	// 叶子节点则搜索成功 
	if (np->codePtr) return true;
	if (pos < 0) return false;
	int val = (inputByte >> pos) & 1;
	--pos;
	if (val == 0) { 
		np = np->left;
		return findNode(np, inputByte, pos);
	} else {
		np = np->right;
		return findNode(np, inputByte, pos);
	}
}


// 解压缩 
void HuffmanEncoderCompress::uncompress() {
	
	FILE *inputFp = NULL;
	if ((inputFp = fopen(_inputFileName.c_str(), "rb")) == NULL) {
		printf("open file %s failed!\n", _inputFileName.c_str());
		exit(-1);
	}
	
	FILE *outputFp = NULL;
	if ((outputFp = fopen(_outputFileName.c_str(), "wb")) == NULL) {
		printf("open file %s failed!\n", _outputFileName.c_str());
		exit(-1);
	}
	
	fseek(inputFp, _ZLZIP_HEAD_LEN, SEEK_SET);
	_inputFileSize = _ZLZIP_HEAD_LEN;
	
	// 解压缩所需临时变量 
	unsigned char inputByte;
	unsigned char outputByte;
	HuffmanTreeNode* np = _huffmanTreeRoot;
	unsigned long long currentOutputSize = 0;
	int pos;
	double currRate = 0.0;
	
	while (!feof(inputFp)) {
		
		fread(&inputByte, 1, 1, inputFp);
		if (feof(inputFp)) break;
		++_inputFileSize; 
		pos = 7;
		
//		printf("%d(", (int)inputByte);				//debug
//		for (int i = 7; i >= 0; --i) {				//debug
//			printf("%d", (inputByte >> i) & 1);		//debug
//		}											//debug
//		printf(") ");								//debug
		
		while (findNode(np, inputByte, pos)) {
			
//			if (!np) printf("np = nullptr\n");						//debug
//			if (!np->codePtr) printf("np->codePtr = nullptr\n");	//debug
			
			outputByte = np->codePtr->oldCode;
//			printf("\tfind: %d(%s) ", (int)outputByte, np->codePtr->newCodeStr.c_str());	//debug
				
			fwrite(&outputByte, 1, 1, outputFp);
			
			// 记录速率 
			double rate = (double)(++currentOutputSize) / _outputFileSize * 100;
			if (rate - currRate >= 10) {
				currRate = rate;
			//	system("cls");
				printf("已解压缩:%.1f%%\t解压缩率:%.2f%%\n", currRate, (double)currentOutputSize/_inputFileSize*100);
			} 
			
			if (currentOutputSize == _outputFileSize) {
				printf("已解压缩:%.1f%%\t解压缩率:%.2f%%\n", 100.0, (double)currentOutputSize/_inputFileSize*100);
				break;
			}
			
			np = _huffmanTreeRoot;
		} 
		
//		if (!findNode(np, inputByte, pos)) {		//debug
//			printf("\n");							//debug
//			system("pause");						//debug
//		}
	}
	fclose(inputFp);
	fclose(outputFp);
}


// 打印哈夫曼编码信息 
void HuffmanEncoderCompress::printHuffmanEncodeInfo() {
	//printf("file size: %d B, %f KB, %f MB\n", _inputFileSize, _inputFileSize/1024.0, _inputFileSize/1024.0/1024.0); 
	printf("%-10s %-10s %-20s %-5s %-10s\n", "原码", "频率", "哈夫曼编码", "长度", "十进制");
	
	for (int i = 0; i < _CODE_NUM; ++i) {
		Code &code = _codes[i];
		printf("%-10d %-10llu %-20s %-5d %-10llu\n", (int)code.oldCode, code.frequency, 
				code.newCodeStr.c_str(), code.length, code.newCode);
	}
}


// 打印压缩或解压缩信息 
void HuffmanEncoderCompress::printInfo(const char* type) {
	
	double compressRate = (double)_outputFileSize / _inputFileSize * 100;
	printf("%s率:%.2f%%\n", type, compressRate); 
	double inputFileSize = _inputFileSize, outputFileSize = _outputFileSize;
	if (inputFileSize < 1024) {
		printf("输入文件大小:%.2fB, 输出文件大小:%.2fB\n", inputFileSize, outputFileSize);
		return;
	} 
	
	inputFileSize /= 1024; outputFileSize /= 1024;
	if (inputFileSize < 1024) {
		printf("输入文件大小:%.2fKB, 输出文件大小:%.2fKB\n", inputFileSize, outputFileSize);
		return;
	} 
	
	inputFileSize /= 1024; outputFileSize /= 1024;
	if (inputFileSize < 1024) {
		printf("输入文件大小:%.2fMB, 输出文件大小:%.2fMB\n", inputFileSize, outputFileSize);
		return;
	} 
	
	inputFileSize /= 1024; outputFileSize /= 1024;
	if (inputFileSize < 1024) {
		printf("输入文件大小:%.2fGB, 输出文件大小:%.2fGB\n", inputFileSize, outputFileSize);
		return;
	} 
}


// 判断两个文件是否相等 
bool HuffmanEncoderCompress::compare2File(const string& fileName1, const string& fileName2) {
	
	FILE *fp1 = NULL;
	if ((fp1 = fopen(fileName1.c_str(), "rb")) == NULL) {
		printf("open file %s failed!\n", fileName1.c_str());
		exit(-1);
	}
	
	FILE *fp2 = NULL;
	if ((fp2 = fopen(fileName2.c_str(), "rb")) == NULL) {
		printf("open file %s failed!\n", fileName2.c_str());
		exit(-1);
	}
	
	while (!feof(fp1) && !feof(fp2)) {
		unsigned char uch1, uch2;
		
		fread(&uch1, 1, 1, fp1);
		fread(&uch2, 1, 1, fp2);
		
		if (uch1 != uch2) return false;
	}
	
	if (!feof(fp1) || !feof(fp2)) return false;
	
	return true;
}