【数据压缩】Huffman原理与代码实现
huffman算法也是一种无损压缩算法,但与上篇文章lzw压缩算法不同,huffman需要得到每种字符出现概率的先验知识。通过计算字符序列中每种字符出现的频率,为每种字符进行唯一的编码设计,使得频率高的字符占的位数短,而频率低的字符长,来达到压缩的目的。通常可以节省20%~90%的空间,很大程度上依赖数据的特性!huffman编码是变长编码,即每种字符对应的编码长度不唯一。
前缀码:任何一个字符的编码都不是同一字符集中另一种字符编码的前缀。huffman编码为最优前缀码,即压缩后数据量最小。
---------------------------------------------------------------------------------------------------------------
huffman算法:
1.统计字符序列的每种字符的频率,并为每种字符建立一个节点,节点权重为其频率;
2.初始化最小优先队列中,把上述的结点全部插入到队列中;
3.取出优先队列的前两种符号节点,并从优先队列中删除;
4.新建一个父节点,并把上述两个节点作为其左右孩子节点,父节点的权值为左右节点之和;
5.如果此时优先队列为空,则退出并返回父节点的指针!否则把父节点插入到优先队列中,重复步骤3;
----------------------------------------------------------------------------------------------------------------
通过上述建造的huffman树,可以看到,每种字符结点都是叶子结点,编码方法:从根节点开始向左定义编码'0',向右定义为'1',遍历到叶子结点所得到的二值码串,即为此种字符的编码值。由于字符码字为前缀码,在译码过程中,每种字符可以参照huffman树被唯一的译码出,但是前缀码的缺点是,错误具有传播功能,当有1位码字错误,此后的译码过程很可能都不正确;
代码实现:
/* csdn 勿在浮沙筑高台 https://blog.csdn.net/luoshixian099 数据压缩--huffman编码 2015年12月21日 */ #include #include #include "compress.h" using namespace std; void showcode(pnode root, vector &code); int main() { char a[] = "xxznxznnvvccncvzzbzzvxxczbzvmnzvnnz";//原始数据 uint length = sizeof(a)-1; priority_q queue(a, length); //建立优先队列 //输出每组字符的频率 for (uint i = 0; i <= queue.heap_size;i++) { cout << (char)(queue.table[i]->key) << " frequency: " << queue.table[i]->frequency << endl; } cout << "--------------------" << endl; pnode root = build_huffman_tree(queue);//构建huffman树 vector code; showcode(root, code); //显示编码数据 return 0; } void showcode(pnode root,vector &code) { if (root!=null) { if (root->_left == null && root->_right == null) //叶子结点 { cout << (char)(root->key) << " code : " ; for (uint i = 0; i < code.size() ; i++) { cout << (int)code[i]; } cout << endl; return; } code.push_back(0); showcode(root->_left,code); code[code.size()-1] = 1; showcode(root->_right,code); code.resize(code.size()-1); } }
/* compress.cpp */ #include "compress.h" priority_q::priority_q(char *a,int length) //统计各种字符的频率 { for (int i = 0; i < 256; i++) { table[i] = new node; } heap_size = 0; for (int i = 0; i < length; i++) //统计字符频率 { bool flag = true; for (int j = 0; j < heap_size; j++) { if ( table[j]->key == *(a+i) ) { table[j]->frequency = table[j]->frequency + 1; flag = false; break; } } if (flag) //加入新的字符 { table[heap_size]->key = *(a + i); table[heap_size]->frequency = table[heap_size]->frequency + 1; heap_size++; } } heap_size--; build_min_heap(heap_size); //建立优先队列 } void priority_q::build_min_heap(uint length) { for (int i = (int)(length / 2); i >= 0; i--) { min_heapify(i); } } void priority_q::min_heapify(uint i) { uint smaller = i; uint left = 2 * i + 1; uint right = 2 * i + 2; if (left <= heap_size && table[left]->frequency < table[i]->frequency) //判断是否小于其孩子的值 { smaller = left; } if (right <= heap_size && table[right]->frequency < table[smaller]->frequency) { smaller = right; } if (smaller != i) //如果小于,就与其中最大的孩子调换位置 { swap(i, smaller); min_heapify(smaller); } } void priority_q::swap(int x, int y) //交换两个元素的数据 { pnode temp = table[x]; table[x] = table[y]; table[y] = temp; } pnode copynode(pnode _src, pnode _dst)//拷贝数据 { _dst->frequency = _src->frequency; _dst->key = _src->key; _dst->_left = _src->_left; _dst->_right = _src->_right; return _dst; } pnode priority_q::extract_min() //输出队列最前结点 { if (heap_size == empty) return null; if (heap_size == 0) { heap_size = empty; return table[0]; } if (heap_size >= 0) { swap(heap_size, 0); heap_size--; min_heapify(0); } return table[heap_size+1]; } void priority_q::insert(pnode pnode)//优先队列的插入 { heap_size++; copynode(pnode, table[heap_size]); delete pnode; uint i = heap_size; while ( i > 0 && table[parent(i)]->frequency > table[i]->frequency ) { swap(i, parent(i)); i = parent(i); } } pnode build_huffman_tree(priority_q &queue) //建立huffman树 { pnode parent=null,left=null,right=null; while (queue.heap_size != empty) { left = new node; right = new node; parent = new node; copynode(queue.extract_min(), left); //取出两个元素 copynode(queue.extract_min(), right); //复制左右节点数据 parent->frequency = right->frequency + left->frequency;//建立父节点 parent->_left = left; parent->_right = right; if (queue.heap_size == empty) break; queue.insert(parent); //再插入回优先队列 } return parent; }
/* compress.h */ #ifndef compress #define compress #include #define uint unsigned int #define uchar unsigned char #define empty 0xffffffff #define parent(i) (uint)(((i) - 1) / 2) typedef struct node //结点 { node::node():key(empty), frequency(0),_left(null),_right(null){} uint key; uint frequency; struct node * _left; struct node * _right; }node,*pnode; class priority_q //优先队列 { public: priority_q(char *a, int length); void insert(pnode pnode); //插入 pnode extract_min(); //取出元素 uint heap_size; //队列的长度 pnode table[256]; //建立256种结点 private: void build_min_heap(uint length); //建立队列 void swap(int x, int y); //交换两个元素 void min_heapify(uint i); //维护优先队列的性质 }; pnode build_huffman_tree(priority_q &queue);//构建优先队列 #endif // compress参考:
https://wenku.baidu.com/view/04a8a13b580216fc700afd2e.html
上一篇: 小米8的透明版背壳 是一块高通的广告牌
下一篇: 怎么买特价机票 这个方法很实用喔