欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

C#实现获取文本文件的编码的一个类(区分GB2312和UTF8)

程序员文章站 2023-12-16 22:37:34
以下是获取文件编码的一个类: using system; using system.io; using system.text; /// <...

以下是获取文件编码的一个类:

using system;
using system.io;
using system.text;
 
/// <summary>
/// fileencoding 的摘要说明
/// </summary>
namespace fileencoding
{
/// <summary>
/// 获取文件的编码格式
/// </summary>
public class encodingtype
{
/// <summary>
/// 给定文件的路径,读取文件的二进制数据,判断文件的编码类型
/// </summary>
/// <param name="file_name">文件路径</param>
/// <returns>文件的编码类型</returns>
public static system.text.encoding gettype(string file_name)
{
filestream fs = new filestream(file_name, filemode.open, fileaccess.read);
encoding r = gettype(fs);
fs.close();
return r;
}
 
/// <summary>
/// 通过给定的文件流,判断文件的编码类型
/// </summary>
/// <param name="fs">文件流</param>
/// <returns>文件的编码类型</returns>
public static system.text.encoding gettype(filestream fs)
{
byte[] unicode = new byte[] { 0xff, 0xfe, 0x41 };
byte[] unicodebig = new byte[] { 0xfe, 0xff, 0x00 };
byte[] utf8 = new byte[] { 0xef, 0xbb, 0xbf }; //带bom
encoding reval = encoding.default;
 
binaryreader r = new binaryreader(fs, system.text.encoding.default);
int i;
int.tryparse(fs.length.tostring(), out i);
byte[] ss = r.readbytes(i);
if (isutf8bytes(ss) || (ss[0] == 0xef && ss[1] == 0xbb && ss[2] == 0xbf))
{
reval = encoding.utf8;
}
else if (ss[0] == 0xfe && ss[1] == 0xff && ss[2] == 0x00)
{
reval = encoding.bigendianunicode;
}
else if (ss[0] == 0xff && ss[1] == 0xfe && ss[2] == 0x41)
{
reval = encoding.unicode;
}
r.close();
return reval;
 
}
 
/// <summary>
/// 判断是否是不带 bom 的 utf8 格式
/// </summary>
/// <param name="data"></param>
/// <returns></returns>
private static bool isutf8bytes(byte[] data)
{
int charbytecounter = 1; //计算当前正分析的字符应还有的字节数
byte curbyte; //当前分析的字节.
for (int i = 0; i < data.length; i++)
{
curbyte = data[i];
if (charbytecounter == 1)
{
if (curbyte >= 0x80)
{
//判断当前
while (((curbyte <<= 1) & 0x80) != 0)
{
charbytecounter++;
}
//标记位首位若为非0 则至少以2个1开始 如:110xxxxx...........1111110x 
if (charbytecounter == 1 || charbytecounter > 6)
{
return false;
}
}
}
else
{
//若是utf-8 此时第一位必须为1
if ((curbyte & 0xc0) != 0x80)
{
return false;
}
charbytecounter--;
}
}
if (charbytecounter > 1)
{
throw new exception("非预期的byte格式");
}
return true;
}
 
}
 
 
}

以下是使用示例:

#region 打开按钮
/// <summary>
/// 打开按钮
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void txtmenuopen_click(object sender, eventargs e)
{
string fname;
openfiledialog openfiledialog = new openfiledialog();
openfiledialog.initialdirectory = "";//注意这里写路径时要用c:而不是c: 
openfiledialog.filter = "文本文档|*.txt";
openfiledialog.restoredirectory = true;
openfiledialog.filterindex = 1;
if (openfiledialog.showdialog() == dialogresult.ok)
{
fname = openfiledialog.filename;
 
txtbox.text = system.io.file.readalltext(fname,
fileencoding.encodingtype.gettype(fname));
} 
 
}
#endregion

上一篇:

下一篇: