LZW压缩算法 C#源码
程序员文章站
2022-05-28 18:43:27
using system;
using system.io;
namespace gif.components
{
public class lzwe...
using system; using system.io; namespace gif.components { public class lzwencoder { private static readonly int eof = -1; private int imgw, imgh; private byte[] pixary; private int initcodesize; private int remaining; private int curpixel; // gifcompr.c - gif image compression routines // // lempel-ziv compression based on 'compress'. gif modifications by // david rowley (mgardi@watdcsu.waterloo.edu) // general defines static readonly int bits = 12; static readonly int hsize = 5003; // 80% occupancy // gif image compression - modified 'compress' // // based on: compress.c - file compression ala ieee computer, june 1984. // // by authors: spencer w. thomas (decvax!harpo!utah-cs!utah-gr!thomas) // jim mckie (decvax!mcvax!jim) // steve davies (decvax!vax135!petsd!peora!srd) // ken turkowski (decvax!decwrl!turtlevax!ken) // james a. woods (decvax!ihnp4!ames!jaw) // joe orost (decvax!vax135!petsd!joe) int n_bits; // number of bits/code int maxbits = bits; // user settable max # bits/code int maxcode; // maximum code, given n_bits int maxmaxcode = 1 << bits; // should never generate this code int[] htab = new int[hsize];//这个是放hash的筒子,在这里面可以很快的找到1个key int[] codetab = new int[hsize]; int hsize = hsize; // for dynamic table sizing int free_ent = 0; // first unused entry // block compression parameters -- after all codes are used up, // and compression rate changes, start over. bool clear_flg = false; // algorithm: use open addressing double hashing (no chaining) on the // prefix code / next character combination. we do a variant of knuth's // algorithm d (vol. 3, sec. 6.4) along with g. knott's relatively-prime // secondary probe. here, the modular division first probe is gives way // to a faster exclusive-or manipulation. also do block compression with // an adaptive reset, whereby the code table is cleared when the compression // ratio decreases, but after the table fills. the variable-length output // codes are re-sized at this point, and a special clear code is generated // for the decompressor. late addition: construct the table according to // file size for noticeable speed improvement on small files. please direct // questions about this implementation to ames!jaw. int g_init_bits; int clearcode; int eofcode; // output // // output the given code. // inputs: // code: a n_bits-bit integer. if == -1, then eof. this assumes // that n_bits =< wordsize - 1. // outputs: // outputs code to the file. // assumptions: // chars are 8 bits long. // algorithm: // maintain a bits character long buffer (so that 8 codes will // fit in it exactly). use the vax insv instruction to insert each // code in turn. when the buffer fills up empty it and start over. int cur_accum = 0; int cur_bits = 0; int [] masks = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff }; // number of characters so far in this 'packet' int a_count; // define the storage for the packet accumulator byte[] accum = new byte[256]; //---------------------------------------------------------------------------- public lzwencoder(int width, int height, byte[] pixels, int color_depth) { imgw = width; imgh = height; pixary = pixels; initcodesize = math.max(2, color_depth); } // add a character to the end of the current packet, and if it is 254 // characters, flush the packet to disk. void add(byte c, stream outs) { accum[a_count++] = c; if (a_count >= 254) flush(outs); } // clear out the hash table // table clear for block compress void cleartable(stream outs) { resetcodetable(hsize); free_ent = clearcode + 2; clear_flg = true; output(clearcode, outs); } // reset code table // 全部初始化为-1 void resetcodetable(int hsize) { for (int i = 0; i < hsize; ++i) htab[i] = -1; } void compress(int init_bits, stream outs) { int fcode; int i /* = 0 */; int c; int ent; int disp; int hsize_reg; int hshift; // set up the globals: g_init_bits - initial number of bits //原始数据的字长,在gif文件中,原始数据的字长可以为1(单色图),4(16色),和8(256色) //开始的时候先加上1 //但是当原始数据长度为1的时候,开始为3 //因此原始长度1->3,4->5,8->9 //?为何原始数据字长为1的时候,开始长度为3呢?? //如果+1=2,只能表示四种状态,加上clearcode和endcode就用完了。所以必须扩展到3 g_init_bits = init_bits; // set up the necessary values //是否需要加清除标志 //gif为了提高压缩率,采用的是变长的字长(vcl)。比如说原始数据是8位,那么开始先加上1位(8+1=9) //当标号到2^9=512的时候,超过了当前长度9所能表现的最大值,此时后面的标号就必须用10位来表示 //以此类推,当标号到2^12的时候,因为最大为12,不能继续扩展了,需要在2^12=4096的位置上插入一个clearcode,表示从这往后,从9位重新再来了 clear_flg = false; n_bits = g_init_bits; //获得n位数能表述的最大值(gif图像中开始一般为3,5,9,故maxcode一般为7,31,511) maxcode = maxcode(n_bits); //表示从这里我重新开始构造字典字典了,以前的所有标记作废, //开始使用新的标记。这个标号集的大小多少比较合适呢?据说理论上是越大压缩率越高(我个人感觉太大了也不见得就好), //不过处理的开销也呈指数增长 //gif规定,clearcode的值为原始数据最大字长所能表达的数值+1;比如原始数据长度为8,则clearcode=1<<(9-1)=256 clearcode = 1 << (init_bits - 1); //结束标志为clearcode+1 eofcode = clearcode + 1; //这个是解除结束的 free_ent = clearcode + 2; //清楚数量 a_count = 0; // clear packet //从图像中获得下一个像素 ent = nextpixel(); hshift = 0; for (fcode = hsize; fcode < 65536; fcode *= 2) ++hshift; //设置hash码范围 hshift = 8 - hshift; // set hash code range bound hsize_reg = hsize; //清除固定大小的hash表,用于存储标记,这个相当于字典 resetcodetable(hsize_reg); // clear hash table output(clearcode, outs); outer_loop : while ((c = nextpixel()) != eof) { fcode = (c << maxbits) + ent; i = (c << hshift) ^ ent; // xor hashing //嘿嘿,小样,又来了,我认识你 if (htab[i] == fcode) { ent = codetab[i]; continue; } //这小子,新来的 else if (htab[i] >= 0) // non-empty slot { disp = hsize_reg - i; // secondary hash (after g. knott) if (i == 0) disp = 1; do { if ((i -= disp) < 0) i += hsize_reg; if (htab[i] == fcode) { ent = codetab[i]; goto outer_loop; } } while (htab[i] >= 0); } output(ent, outs); //从这里可以看出,ent就是前缀(prefix),而当前正在处理的字符标志就是后缀(suffix) ent = c; //判断终止结束符是否超过当前位数所能表述的范围 if (free_ent < maxmaxcode) { //如果没有超 codetab[i] = free_ent++; // code -> hashtable //hash表里面建立相应索引 htab[i] = fcode; } else //说明超过了当前所能表述的范围,清空字典,重新再来 cleartable(outs); } // put out the final code. output(ent, outs); output(eofcode, outs); } //---------------------------------------------------------------------------- public void encode( stream os) { os.writebyte( convert.tobyte( initcodesize) ); // write "initial code size" byte //这个图像包含多少个像素 remaining = imgw * imgh; // reset navigation variables //当前处理的像素索引 curpixel = 0; compress(initcodesize + 1, os); // compress and write the pixel data os.writebyte(0); // write block terminator } // flush the packet to disk, and reset the accumulator void flush(stream outs) { if (a_count > 0) { outs.writebyte( convert.tobyte( a_count )); outs.write(accum, 0, a_count); a_count = 0; } } /// <summary> /// 获得n位数所能表达的最大数值 /// </summary> /// <param name="n_bits">位数,一般情况下n_bits = 9</param> /// <returns>最大值,例如n_bits=8,则返回值就为2^8-1=255</returns> int maxcode(int n_bits) { return (1 << n_bits) - 1; } //---------------------------------------------------------------------------- // return the next pixel from the image //---------------------------------------------------------------------------- /// <summary> /// 从图像中获得下一个像素 /// </summary> /// <returns></returns> private int nextpixel() { //还剩多少个像素没有处理 //如果没有了,返回结束标志 if (remaining == 0) return eof; //否则处理下一个,并将未处理像素数目-1 --remaining; //当前处理的像素 int temp = curpixel + 1; //如果当前处理像素在像素范围之内 if ( temp < pixary.getupperbound( 0 )) { //下一个像素 byte pix = pixary[curpixel++]; return pix & 0xff; } return 0xff; } /// <summary> /// 输出字到输出流 /// </summary> /// <param name="code">要输出的字</param> /// <param name="outs">输出流</param> void output(int code, stream outs) { //得到当前标志位所能表示的最大标志值 cur_accum &= masks[cur_bits]; if (cur_bits > 0) cur_accum |= (code << cur_bits); else //如果标志位为0,就将当前标号为输入流 cur_accum = code; //当前能标志的最大字长度(9-10-11-12-9-10。。。。。。。) cur_bits += n_bits; //如果当前最大长度大于8 while (cur_bits >= 8) { //向流中输出一个字节 add((byte) (cur_accum & 0xff), outs); //将当前标号右移8位 cur_accum >>= 8; cur_bits -= 8; } // if the next entry is going to be too big for the code size, // then increase it, if possible. if (free_ent > maxcode || clear_flg) { if (clear_flg) { maxcode = maxcode(n_bits = g_init_bits); clear_flg = false; } else { ++n_bits; if (n_bits == maxbits) maxcode = maxmaxcode; else maxcode = maxcode(n_bits); } } if (code == eofcode) { // at eof, write the rest of the buffer. while (cur_bits > 0) { add((byte) (cur_accum & 0xff), outs); cur_accum >>= 8; cur_bits -= 8; } flush(outs); } } } }
以上就是本文的全部内容,希望能给大家一个参考,也希望大家多多支持。
上一篇: C#判断一个String是否为数字类型
下一篇: ubutun 搭建scrapy环境