序列比对算法-计算生物学
1.序列比对指将两个或多个序列排列在一起,标明其相似之处。序列中可以插入间隔(通常用短横线“-”表示)。对应的相同或相似的符号(在核酸中是A, T(或U), C, G,在蛋白质中是氨基酸残基的单字母表示)排列在同一列上。这一方法常用于研究由共同祖先进化而来的序列,特别是如蛋白质序列或DNA序列等生物序列。进行相似度分析。
2.分类
- 全局比对:将两个序列中的所有字符都进行依次比对,由于各方面的缺陷,可用性不强。
- 局部比对(local alignment):通过动态规划的方式,改动最少来匹配两个序列最相似的部分。
- 双序列比对:只需要比对两个序列
- 多序列比对:基于双序列比对,这样,主要是用来提取多个不同序列中,具有的共同特征信息。
3.双序列局部比对。
- 最大子序列问题,对于两个字符串,通过比较找到两个字符串中相同的子序列,不考虑间隔。
/**
1. Design an algorithm to find the Longest Common Subsequence,
Longest Common Substring between two sequences.
* Created by zoutai on 2017/10/1.
* 从后向前遍历查找,迭代/遍历+动态规划
* 但是递归的方式有一个坏处,就是内部往往那个存在很多重复计算,最大时间复杂度2^n
* 所以我们采用空间换时间的办法:直接将所有的L[i][j]遍历出来,时间复杂度即为(mn)
*/
public class LongestCommonSubsequence {
public static void main(String[] args) {
String s1 = "AGGTAB";
String s2 = "GXTXAYB";
int m = s1.length(),n=s2.length();
String result = getLongestSubStr(s1,s2,m,n);
System.out.println(result);
}
// 从后向前遍历
public static String getLongestSubStr(String s1,String s2,int m,int n) {
if (m == 0 || n == 0)
return "";
if (s1.charAt(m - 1) == s2.charAt(n - 1))
return getLongestSubStr(s1, s2, m - 1, n - 1)+s1.charAt(m - 1);
else
return max(getLongestSubStr(s1, s2, m, n - 1), getLongestSubStr(s1, s2, m - 1, n));
}
// 比较字符串长度
public static String max(String a, String b) {
if(a==null||b==null){
return "";
}
return (a.length() > b.length()) ? a : b;
}
}
- Smith-Waterman算法:
算法主要思想:
对于A,B两个序列,n,m分别表示他们的长度,构造相应的矩阵和罚分;通过罚分矩阵,得到分数最大的一条路径,即为做好的匹配路径。
参考链接:wiki
实现方式:- 普通空白函数:
import java.util.LinkedList;
import java.util.List;
/**
* Created by zoutai on 2017/10/2.
* 2.Tandem repeats. Let P be a pattern of length n and T a text of length m.
* Let Pm be the concatenation of P with itself m times, so Pm has length mn.
* We want to compute a local alignment between Pm and T .
*/
public class Alignment02 {
public static void main(String[] args) {
List<String> list = new LinkedList<String>();
/* 样例1:
输出为:
G-CTAGCT
GACTA-CT
*/
String P = new String("AGCT");
String T = new String("GACTACT");
String Pm = new String();
Pm = pToPm(P,T);
list = localAlig(Pm,T);
/* (不复制)样例2:
输出为:
GTT-AC
GTTGAC
*/
// String t1 = new String("TGTTACGG");
// String t2 = new String("GGTTGACTA");
// list = localAlig(t1, t2);
for (String str : list) {
System.out.println(str.toString());
}
}
private static List<String> localAlig(String A, String B) {
List<String> twoList = new LinkedList<String>();
int allMax = 0;
int col = A.length();
int row = B.length();
int[][] mat = new int[row + 1][col + 1];
for (int i = 0; i < col + 1; i++) {
mat[0][i] = 0;
}
for (int j = 0; j < row + 1; j++) {
mat[j][0] = 0;
}
for (int i = 1; i < col+1; i++) {
for (int j = 1; j < row+1; j++) {
int Match = mat[j - 1][i - 1] + (B.charAt(j-1)==A.charAt(i-1)?3:-3);
int Delete = mat[j - 1][i] + (0 - 2);
int Insert = mat[j][i - 1] + (0 - 2);
mat[j][i] = Math.max(Math.max(Match,0), Math.max(Insert, Delete));
}
}
// 再次遍历矩阵,获取矩阵最大值,用于查找序列初始点
int tempMax = 0;
int temprow =0,tempcol = 0;
for (int i =0;i<row+1;i++) {
for (int j =0;j<col+1;j++) {
System.out.print(mat[i][j]+".");
if (tempMax < mat[i][j]) {
tempMax = mat[i][j];
temprow = i;
tempcol = j;
}
}
System.out.println();
}
String AlignmentA = "";
String AlignmentB = "";
int i = tempcol;
int j = temprow;
while (i > 0 && j > 0) {
if (i > 0 && j > 0 && mat[j][i] == mat[j-1][i-1]
+ (B.charAt(j-1)==A.charAt(i-1)?3:-3)) {
AlignmentA = A.charAt(i - 1) + AlignmentA;
AlignmentB = B.charAt(j - 1) + AlignmentB;
i = i - 1;
j = j - 1;
} else if (i == 1 || j == 1) {
break;
} else if (j > 0 && mat[j][i] == mat[j - 1][i] + (0 - 2)) {
AlignmentA = "-" + AlignmentA;
AlignmentB = B.charAt(j - 1) + AlignmentB;
j = j - 1;
} else {
AlignmentA = A.charAt(i - 1) + AlignmentA;
AlignmentB = "-" + AlignmentB;
i = i - 1;
}
}
twoList.add(AlignmentA);
twoList.add(AlignmentB);
return twoList;
}
// 将p字符串转为pm
private static String pToPm(String p, String t) {
// TODO Auto-generated method stub
int n = p.length();
int m = t.length();
StringBuffer sb = new StringBuffer();
for (int i=0;i<m;i++) {
sb.append(p);
}
return sb.toString();
}
}
* 线性空白函数:
import java.util.LinkedList;
import java.util.List;
/**
* Created by zoutai on 2017/10/2.
* Implementation of simple local alignment algorithm
*
* 线性罚分,Ws=2;Wd=0;
*/
public class Alignment0301 {
public static final int[][] PAM250 = {
{4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0},
{-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3},
{-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3},
{-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3},
{0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1},
{-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2},
{-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2},
{0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3},
{-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3},
{-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3},
{-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1},
{-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2},
{-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1},
{-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1},
{-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2},
{1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2},
{0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0},
{-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3},
{-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1},
{0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4}};
public static void main(String[] args) {
String t1 = new String("TGTTACGG");
String t2 = new String("GGTTGACTA");
List<String> list = new LinkedList<String>();
list = localAlig(t1, t2);
for (String str : list) {
System.out.println(str.toString());
}
}
private static List<String> localAlig(String A, String B) {
List<String> twoList = new LinkedList<String>();
int allMax = 0;
int col = A.length();
int row = B.length();
int[][] mat = new int[row + 1][col + 1];
for (int i = 0; i < col + 1; i++) {
mat[0][i] = 0;
}
for (int j = 0; j < row + 1; j++) {
mat[j][0] = 0;
}
for (int i = 1; i < col+1; i++) {
for (int j = 1; j < row+1; j++) {
int Match = mat[j - 1][i - 1] + (B.charAt(j-1)==A.charAt(i-1)?3:-3);
int Delete = mat[j - 1][i] + (0 - 2);
int Insert = mat[j][i - 1] + (0 - 2);
mat[j][i] = Math.max(Math.max(Match,0), Math.max(Insert, Delete));
}
}
// 再次遍历矩阵,获取矩阵最大值,用于查找序列初始点
int tempMax = 0;
int temprow =0,tempcol = 0;
for (int i =0;i<row+1;i++) {
for (int j =0;j<col+1;j++) {
System.out.print(mat[i][j]+".");
if (tempMax < mat[i][j]) {
tempMax = mat[i][j];
temprow = i;
tempcol = j;
}
}
System.out.println();
}
String AlignmentA = "";
String AlignmentB = "";
int i = tempcol;
int j = temprow;
while (i > 1 && j > 1) {
if (i > 1 && j > 1 && mat[j][i] == mat[j-1][i-1]
+ (B.charAt(j-1)==A.charAt(i-1)?3:-3)) {
AlignmentA = A.charAt(i - 1) + AlignmentA;
AlignmentB = B.charAt(j - 1) + AlignmentB;
i = i - 1;
j = j - 1;
} else if (i == 1 || j == 1) {
break;
} else if (j > 1 && mat[j][i] == mat[j - 1][i] + (0 - 2)) {
AlignmentA = "-" + AlignmentA;
AlignmentB = B.charAt(j - 1) + AlignmentB;
j = j - 1;
} else {
AlignmentA = A.charAt(i - 1) + AlignmentA;
AlignmentB = "-" + AlignmentB;
i = i - 1;
}
}
twoList.add(AlignmentA);
twoList.add(AlignmentB);
return twoList;
}
}
* Affine空白函数:
import java.util.LinkedList;
import java.util.List;
/**
* Created by zoutai on 2017/10/2.
* Implementation the classical affine-gap local alignment algorithm
*
* Affine罚分,Ws=2;Wd=10;
*/
public class Alignment0302 {
public static final int[][] PAM250 = {
{4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0},
{-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3},
{-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3},
{-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3},
{0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1},
{-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2},
{-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2},
{0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3},
{-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3},
{-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3},
{-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1},
{-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2},
{-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1},
{-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1},
{-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2},
{1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2},
{0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0},
{-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3},
{-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1},
{0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4}};
public static void main(String[] args) {
String t1 = new String("TGTTACGG");
String t2 = new String("GGTTGACTA");
List<String> list = new LinkedList<String>();
list = localAlig(t1, t2);
for (String str : list) {
System.out.println(str.toString());
}
}
private static List<String> localAlig(String A, String B) {
List<String> twoList = new LinkedList<String>();
int allMax = 0;
int col = A.length();
int row = B.length();
int[][] mat = new int[row + 1][col + 1];
for (int i = 0; i < col + 1; i++) {
mat[0][i] = -10;
}
for (int j = 0; j < row + 1; j++) {
mat[j][0] = -10;
}
for (int i = 1; i < col+1; i++) {
for (int j = 1; j < row+1; j++) {
int Match = mat[j - 1][i - 1] + Scoring.getScore(PAM250, A.charAt(i - 1), B.charAt(j - 1));
int Delete = mat[j - 1][i] + (0 - 2);
int Insert = mat[j][i - 1] + (0 - 2);
mat[j][i] = Math.max(Math.max(Match,0), Math.max(Insert, Delete));
}
}
// 再次遍历矩阵,获取矩阵最大值,用于查找序列初始点
int tempMax = 0;
int temprow =0,tempcol = 0;
for (int i =0;i<row+1;i++) {
for (int j =0;j<col+1;j++) {
System.out.print(mat[i][j]+".");
if (tempMax < mat[i][j]) {
tempMax = mat[i][j];
temprow = i;
tempcol = j;
}
}
System.out.println();
}
String AlignmentA = "";
String AlignmentB = "";
int i = tempcol;
int j = temprow;
while (i > 1 && j > 1) {
if (i > 1 && j > 1 && mat[j][i] == mat[j-1][i-1]
+ Scoring.getScore(PAM250, A.charAt(i - 1), B.charAt(j - 1))) {
AlignmentA = A.charAt(i - 1) + AlignmentA;
AlignmentB = B.charAt(j - 1) + AlignmentB;
i = i - 1;
j = j - 1;
} else if (i == 1 || j == 1) {
break;
} else if (j > 1 && mat[j][i] == mat[j - 1][i] + (0 - 2)) {
AlignmentA = "-" + AlignmentA;
AlignmentB = B.charAt(j - 1) + AlignmentB;
j = j - 1;
} else {
AlignmentA = A.charAt(i - 1) + AlignmentA;
AlignmentB = "-" + AlignmentB;
i = i - 1;
}
}
twoList.add(AlignmentA);
twoList.add(AlignmentB);
return twoList;
}
}
* 其中给定罚分矩阵:
import java.util.HashMap;
//This stores scoring resources, including matrices
public class Scoring {
//matrix contents are from rosalind:
public static final int[][] BLOSUM62 = {
{ 4, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0, 0, -3, -2},
{ 0, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2},
{-2, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1, -3, -4, -3},
{-1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2, -3, -2},
{-2, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2, -1, 1, 3},
{ 0, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2, -3, -2, -3},
{-2, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2, -3, -2, 2},
{-1, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1, 3, -3, -1},
{-1, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1, -2, -3, -2},
{-1, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1, 1, -2, -1},
{-1, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1, 1, -1, -1},
{-2, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -2},
{-1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1, -1, -2, -4, -3},
{-1, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2, -2, -1},
{-1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3, -3, -2},
{ 1, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2, -3, -2},
{ 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -2, -2},
{ 0, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -1},
{-3, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, 2},
{-2, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, 7}};
//This following matrix is copied from rosalind:
public static final int[][] PAM250 = {
{4,-1,-2,-2,0,-1,-1,0,-2,-1,-1,-1,-1,-2,-1,1,0,-3,-2,0},
{-1,5,0,-2,-3,1,0,-2,0,-3,-2,2,-1,-3,-2,-1,-1,-3,-2,-3},
{-2,0,6,1,-3,0,0,0,1,-3,-3,0,-2,-3,-2,1,0,-4,-2,-3},
{-2,-2,1,6,-3,0,2,-1,-1,-3,-4,-1,-3,-3,-1,0,-1,-4,-3,-3},
{0,-3,-3,-3,9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1},
{-1,1,0,0,-3,5,2,-2,0,-3,-2,1,0,-3,-1,0,-1,-2,-1,-2},
{-1,0,0,2,-4,2,5,-2,0,-3,-3,1,-2,-3,-1,0,-1,-3,-2,-2},
{0,-2,0,-1,-3,-2,-2,6,-2,-4,-4,-2,-3,-3,-2,0,-2,-2,-3,-3},
{-2,0,1,-1,-3,0,0,-2,8,-3,-3,-1,-2,-1,-2,-1,-2,-2,2,-3},
{-1,-3,-3,-3,-1,-3,-3,-4,-3,4,2,-3,1,0,-3,-2,-1,-3,-1,3},
{-1,-2,-3,-4,-1,-2,-3,-4,-3,2,4,-2,2,0,-3,-2,-1,-2,-1,1},
{-1,2,0,-1,-3,1,1,-2,-1,-3,-2,5,-1,-3,-1,0,-1,-3,-2,-2},
{-1,-1,-2,-3,-1,0,-2,-3,-2,1,2,-1,5,0,-2,-1,-1,-1,-1,1},
{-2,-3,-3,-3,-2,-3,-3,-3,-1,0,0,-3,0,6,-4,-2,-2,1,3,-1},
{-1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4,7,-1,-1,-4,-3,-2},
{1,-1,1,0,-1,0,0,0,-1,-2,-2,0,-1,-2,-1,4,1,-3,-2,-2},
{0,-1,0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1,1,5,-2,-2,0},
{-3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1,1,-4,-3,-2,11,2,-3},
{-2,-2,-2,-3,-2,-1,-2,-3,2,-1,-1,-2,-1,3,-3,-2,-2,2,7,-1},
{0,-3,-3,-3,-1,-2,-2,-3,-3,3,1,-2,1,-1,-2,-2,0,-3,-1,4}};
public static HashMap<Character, Integer> matrixIndex = new HashMap<>();
//initializing values for the matrices, as they are arranged here:
static {
matrixIndex.put('A', 0);
matrixIndex.put('C', 1);
matrixIndex.put('D', 2);
matrixIndex.put('E', 3);
matrixIndex.put('F', 4);
matrixIndex.put('G', 5);
matrixIndex.put('H', 6);
matrixIndex.put('I', 7);
matrixIndex.put('K', 8);
matrixIndex.put('L', 9);
matrixIndex.put('M', 10);
matrixIndex.put('N', 11);
matrixIndex.put('P', 12);
matrixIndex.put('Q', 13);
matrixIndex.put('R', 14);
matrixIndex.put('S', 15);
matrixIndex.put('T', 16);
matrixIndex.put('V', 17);
matrixIndex.put('W', 18);
matrixIndex.put('Y', 19);
}
// returns the score of a match between the two provided amino acids, as scored by the specified matrix:
public static int getScore(int[][] matrix, char reference, char search) {
reference = Character.toUpperCase(reference);
search = Character.toUpperCase(search);
return matrix[matrixIndex.get(reference)][matrixIndex.get(search)];
}
}
* 对文本进行处理测试:
import java.io.*;
import java.util.LinkedList;
import java.util.List;
/**
* Created by zoutai on 2017/10/2.
*/
public class TestAlignment03 {
public static final int[][] PAM250 = {
{4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0},
{-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3},
{-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3},
{-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3},
{0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1},
{-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2},
{-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2},
{0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3},
{-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3},
{-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3},
{-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1},
{-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2},
{-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1},
{-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1},
{-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2},
{1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2},
{0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0},
{-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3},
{-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1},
{0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4}};
public static void main(String[] args) throws FileNotFoundException,IOException{
File txtFile = new File("data_hm.txt");
BufferedReader br = new BufferedReader(new FileReader(txtFile));
int lineNumber = 0;
String line = null;
StringBuffer t1 = new StringBuffer();
StringBuffer t2 = new StringBuffer();
List strArr1 = new LinkedList();
List strArr2 = new LinkedList();
int[] resultArr = new int[25];
Boolean first = true;
Boolean flag = true;
int isget = 1;
while ((line = br.readLine())!=null) {
if(first) {
first = false;
continue;
} else if(line.equals("")) {
first = true;
if(isget==2) {
strArr1.add(t1);
strArr2.add(t2);
t1 = new StringBuffer();
t2 = new StringBuffer();
isget--;
continue;
}
isget++;
flag = !flag;
} else {
if(flag) {
t1.append(line);
} else {
t2.append(line);
}
}
}
List<String> list = new LinkedList<String>();
System.out.println(strArr1.size());
for (int i =0;i<25;i++) {
System.out.println(strArr1.get(i).toString());
System.out.println(strArr2.get(i).toString());
System.out.println();
resultArr[i] = localAlig(strArr1.get(i).toString(), strArr2.get(i).toString());
}
System.out.println(resultArr);
for (int score: resultArr) {
System.out.println(score);
}
}
private static int localAlig(String A, String B) {
List<String> twoList = new LinkedList<String>();
int allMax = 0;
int col = A.length();
int row = B.length();
int[][] mat = new int[row + 1][col + 1];
for (int i = 0; i < col + 1; i++) {
mat[0][i] = -10;
}
for (int j = 0; j < row + 1; j++) {
mat[j][0] = -10;
}
for (int i = 1; i < col+1; i++) {
for (int j = 1; j < row+1; j++) {
int Match = mat[j - 1][i - 1] + Scoring.getScore(PAM250, A.charAt(i - 1), B.charAt(j - 1));
int Delete = mat[j - 1][i] + (0 - 2);
int Insert = mat[j][i - 1] + (0 - 2);
mat[j][i] = Math.max(Math.max(Match,0), Math.max(Insert, Delete));
}
}
// 再次遍历矩阵,获取矩阵最大值,用于查找序列初始点
int tempMax = 0;
int temprow =0,tempcol = 0;
for (int i =0;i<row+1;i++) {
for (int j =0;j<col+1;j++) {
System.out.print(mat[i][j]+".");
if (tempMax < mat[i][j]) {
tempMax = mat[i][j];
temprow = i;
tempcol = j;
}
}
System.out.println();
}
System.out.println(tempMax);
String AlignmentA = "";
String AlignmentB = "";
int i = tempcol;
int j = temprow;
while (i > 1 && j > 1) {
if (i > 1 && j > 1 && mat[j][i] == mat[j-1][i-1]
+ Scoring.getScore(PAM250, A.charAt(i - 1), B.charAt(j - 1))) {
AlignmentA = A.charAt(i - 1) + AlignmentA;
AlignmentB = B.charAt(j - 1) + AlignmentB;
i = i - 1;
j = j - 1;
} else if (i == 1 || j == 1) {
break;
} else if (j > 1 && mat[j][i] == mat[j - 1][i] + (0 - 2)) {
AlignmentA = "-" + AlignmentA;
AlignmentB = B.charAt(j - 1) + AlignmentB;
j = j - 1;
} else {
AlignmentA = A.charAt(i - 1) + AlignmentA;
AlignmentB = "-" + AlignmentB;
i = i - 1;
}
}
twoList.add(AlignmentA);
twoList.add(AlignmentB);
//return twoList;
return tempMax;
}
}
第一题:查找最长子序列
第二题:序列比较算法,使用最普通的方式,Ws = 2, Wd = 0, 无罚分矩阵,默认(相等:不相等)=(+3,-1)
第三题:相对于第二题,添加了罚分矩阵PAM250,(20*20);同时simple local alignment algorithm使用线性罚分,Wd=0,Ws=2;
affine-gap local alignment algorithm使用affine罚分,Wd=10,Ws=2。
一题:求两个序列的最长公共子序列或子串(动态规划)
参考链接:http://www.geeksforgeeks.org/longest-common-subsequence/
第二题:
参考:https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm
https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm#Example
第三题:
参考github用户:https://github.com/hswaffield/Linear-Space-Local-Alignment
http://www.cs.utexas.edu/~mobios/cs329e/rosetta/src/Blosum.java
https://en.wikipedia.org/wiki/Gap_penalty#Affine
https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm#Gap_penalty
相关资料连接链接:http://pan.baidu.com/s/1nvaENdn 密码:gmyf
上一篇: [模板] 可持久化数组
下一篇: 利用PLINK进行GWAS分析
推荐阅读