C#实现获取文本文件的编码的一个类(区分GB2312和UTF8)
程序员文章站
2023-12-16 22:37:34
以下是获取文件编码的一个类:
using system;
using system.io;
using system.text;
/// <...
以下是获取文件编码的一个类:
using system; using system.io; using system.text; /// <summary> /// fileencoding 的摘要说明 /// </summary> namespace fileencoding { /// <summary> /// 获取文件的编码格式 /// </summary> public class encodingtype { /// <summary> /// 给定文件的路径,读取文件的二进制数据,判断文件的编码类型 /// </summary> /// <param name="file_name">文件路径</param> /// <returns>文件的编码类型</returns> public static system.text.encoding gettype(string file_name) { filestream fs = new filestream(file_name, filemode.open, fileaccess.read); encoding r = gettype(fs); fs.close(); return r; } /// <summary> /// 通过给定的文件流,判断文件的编码类型 /// </summary> /// <param name="fs">文件流</param> /// <returns>文件的编码类型</returns> public static system.text.encoding gettype(filestream fs) { byte[] unicode = new byte[] { 0xff, 0xfe, 0x41 }; byte[] unicodebig = new byte[] { 0xfe, 0xff, 0x00 }; byte[] utf8 = new byte[] { 0xef, 0xbb, 0xbf }; //带bom encoding reval = encoding.default; binaryreader r = new binaryreader(fs, system.text.encoding.default); int i; int.tryparse(fs.length.tostring(), out i); byte[] ss = r.readbytes(i); if (isutf8bytes(ss) || (ss[0] == 0xef && ss[1] == 0xbb && ss[2] == 0xbf)) { reval = encoding.utf8; } else if (ss[0] == 0xfe && ss[1] == 0xff && ss[2] == 0x00) { reval = encoding.bigendianunicode; } else if (ss[0] == 0xff && ss[1] == 0xfe && ss[2] == 0x41) { reval = encoding.unicode; } r.close(); return reval; } /// <summary> /// 判断是否是不带 bom 的 utf8 格式 /// </summary> /// <param name="data"></param> /// <returns></returns> private static bool isutf8bytes(byte[] data) { int charbytecounter = 1; //计算当前正分析的字符应还有的字节数 byte curbyte; //当前分析的字节. for (int i = 0; i < data.length; i++) { curbyte = data[i]; if (charbytecounter == 1) { if (curbyte >= 0x80) { //判断当前 while (((curbyte <<= 1) & 0x80) != 0) { charbytecounter++; } //标记位首位若为非0 则至少以2个1开始 如:110xxxxx...........1111110x if (charbytecounter == 1 || charbytecounter > 6) { return false; } } } else { //若是utf-8 此时第一位必须为1 if ((curbyte & 0xc0) != 0x80) { return false; } charbytecounter--; } } if (charbytecounter > 1) { throw new exception("非预期的byte格式"); } return true; } } }
以下是使用示例:
#region 打开按钮 /// <summary> /// 打开按钮 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void txtmenuopen_click(object sender, eventargs e) { string fname; openfiledialog openfiledialog = new openfiledialog(); openfiledialog.initialdirectory = "";//注意这里写路径时要用c:而不是c: openfiledialog.filter = "文本文档|*.txt"; openfiledialog.restoredirectory = true; openfiledialog.filterindex = 1; if (openfiledialog.showdialog() == dialogresult.ok) { fname = openfiledialog.filename; txtbox.text = system.io.file.readalltext(fname, fileencoding.encodingtype.gettype(fname)); } } #endregion