C#实现的字符串相似度对比类
程序员文章站
2024-02-08 20:36:40
本类适用于比较2个字符的相似度,代码如下:
using system;
using system.collections.generic;
using sys...
本类适用于比较2个字符的相似度,代码如下:
using system; using system.collections.generic; using system.text; public class stringcompute { #region 私有变量 /// <summary> /// 字符串1 /// </summary> private char[] _arrchar1; /// <summary> /// 字符串2 /// </summary> private char[] _arrchar2; /// <summary> /// 统计结果 /// </summary> private result _result; /// <summary> /// 开始时间 /// </summary> private datetime _begintime; /// <summary> /// 结束时间 /// </summary> private datetime _endtime; /// <summary> /// 计算次数 /// </summary> private int _computetimes; /// <summary> /// 算法矩阵 /// </summary> private int[,] _matrix; /// <summary> /// 矩阵列数 /// </summary> private int _column; /// <summary> /// 矩阵行数 /// </summary> private int _row; #endregion #region 属性 public result computeresult { get { return _result; } } #endregion #region 构造函数 public stringcompute(string str1, string str2) { this.stringcomputeinit(str1, str2); } public stringcompute() { } #endregion #region 算法实现 /// <summary> /// 初始化算法基本信息 /// </summary> /// <param name="str1">字符串1</param> /// <param name="str2">字符串2</param> private void stringcomputeinit(string str1, string str2) { _arrchar1 = str1.tochararray(); _arrchar2 = str2.tochararray(); _result = new result(); _computetimes = 0; _row = _arrchar1.length + 1; _column = _arrchar2.length + 1; _matrix = new int[_row, _column]; } /// <summary> /// 计算相似度 /// </summary> public void compute() { //开始时间 _begintime = datetime.now; //初始化矩阵的第一行和第一列 this.initmatrix(); int intcost = 0; for (int i = 1; i < _row; i++) { for (int j = 1; j < _column; j++) { if (_arrchar1[i - 1] == _arrchar2[j - 1]) { intcost = 0; } else { intcost = 1; } //关键步骤,计算当前位置值为左边+1、上面+1、左上角+intcost中的最小值 //循环遍历到最后_matrix[_row - 1, _column - 1]即为两个字符串的距离 _matrix[i, j] = this.minimum(_matrix[i - 1, j] + 1, _matrix[i, j - 1] + 1, _matrix[i - 1, j - 1] + intcost); _computetimes++; } } //结束时间 _endtime = datetime.now; //相似率 移动次数小于最长的字符串长度的20%算同一题 int intlength = _row > _column ? _row : _column; _result.rate = (1 - (decimal)_matrix[_row - 1, _column - 1] / intlength); _result.usetime = (_endtime - _begintime).tostring(); _result.computetimes = _computetimes.tostring(); _result.difference = _matrix[_row - 1, _column - 1]; } /// <summary> /// 计算相似度(不记录比较时间) /// </summary> public void speedycompute() { //开始时间 //_begintime = datetime.now; //初始化矩阵的第一行和第一列 this.initmatrix(); int intcost = 0; for (int i = 1; i < _row; i++) { for (int j = 1; j < _column; j++) { if (_arrchar1[i - 1] == _arrchar2[j - 1]) { intcost = 0; } else { intcost = 1; } //关键步骤,计算当前位置值为左边+1、上面+1、左上角+intcost中的最小值 //循环遍历到最后_matrix[_row - 1, _column - 1]即为两个字符串的距离 _matrix[i, j] = this.minimum(_matrix[i - 1, j] + 1, _matrix[i, j - 1] + 1, _matrix[i - 1, j - 1] + intcost); _computetimes++; } } //结束时间 //_endtime = datetime.now; //相似率 移动次数小于最长的字符串长度的20%算同一题 int intlength = _row > _column ? _row : _column; _result.rate = (1 - (decimal)_matrix[_row - 1, _column - 1] / intlength); // _result.usetime = (_endtime - _begintime).tostring(); _result.computetimes = _computetimes.tostring(); _result.difference = _matrix[_row - 1, _column - 1]; } /// <summary> /// 计算相似度 /// </summary> /// <param name="str1">字符串1</param> /// <param name="str2">字符串2</param> public void compute(string str1, string str2) { this.stringcomputeinit(str1, str2); this.compute(); } /// <summary> /// 计算相似度 /// </summary> /// <param name="str1">字符串1</param> /// <param name="str2">字符串2</param> public void speedycompute(string str1, string str2) { this.stringcomputeinit(str1, str2); this.speedycompute(); } /// <summary> /// 初始化矩阵的第一行和第一列 /// </summary> private void initmatrix() { for (int i = 0; i < _column; i++) { _matrix[0, i] = i; } for (int i = 0; i < _row; i++) { _matrix[i, 0] = i; } } /// <summary> /// 取三个数中的最小值 /// </summary> /// <param name="first"></param> /// <param name="second"></param> /// <param name="third"></param> /// <returns></returns> private int minimum(int first, int second, int third) { int intmin = first; if (second < intmin) { intmin = second; } if (third < intmin) { intmin = third; } return intmin; } #endregion } /// <summary> /// 计算结果 /// </summary> public struct result { /// <summary> /// 相似度 /// </summary> public decimal rate; /// <summary> /// 对比次数 /// </summary> public string computetimes; /// <summary> /// 使用时间 /// </summary> public string usetime; /// <summary> /// 差异 /// </summary> public int difference; }
调用方法:
// 方式一 stringcompute stringcompute1 = new stringcompute(); stringcompute1.speedycompute("对比字符一", "对比字符二"); // 计算相似度, 不记录比较时间 decimal rate = stringcompute1.computeresult.rate; // 相似度百分之几,完全匹配相似度为1 // 方式二 stringcompute stringcompute2 = new stringcompute(); stringcompute2.compute(); // 计算相似度, 记录比较时间 string usetime = stringcompute2.computeresult.usetime; // 对比使用时间