读书笔记 Bioinformatics Algorithms Chapter5
程序员文章站
2022-05-15 11:27:00
Chapter5 HOW DO WE COMPARE DNA SEQUENCES Bioinformatics Algorithms-An_Active Learning Approach http://bioinformaticsalgorithms.com/ 一、 1983年,Russell D ......
chapter5 how do we compare dna sequences
bioinformatics algorithms-an_active learning approach
一、
1983年,russell doolitte 将血小板源生长因子[platelet derived growth factor(pdgf),一种刺激细胞增殖的物质]和其它已知基因比对,发现它的序列和原癌基因(oncogene)的序列有很高的相似度。这让科学家猜测某些癌症是因为基因在不合适的时机作用所致(scientists hypothesized that some forms of cancer might be caused by a good gene doing the right thing at the wrong time.)。
二、提出问题 序列比对:动态规划法
1.全局比对:
1 ''' 2 code challenge: solve the global alignment problem. 3 input: two protein strings written in the single-letter amino acid alphabet. 4 output: the maximum alignment score of these strings followed by an alignment achieving this maximum score. use the 5 blosum62 scoring matrix for matches and mismatches as well as the indel penalty σ = 5. 6 ---------- 7 sample input: 8 pleasantly 9 meanly 10 ---------- 11 sample output: 12 8 13 pleasantly 14 -mea--n-ly 15 ---------- 16 @ lo kowngho 2018.9 17 ''' 18 import numpy 19 from os.path import dirname 20 21 def grade(symb1,symb2): 22 indx1 = symbollist[symb1] 23 indx2 = symbollist[symb2] 24 return matrix[indx1][indx2] 25 26 def init_graph_global(l1,l2): 27 graph = numpy.zeros([l2,l1]) 28 for x in range(1,l2): 29 graph[x][0] = graph[x-1][0]-5 30 for y in range(1,l1): 31 graph[0][y] = graph[0][y-1]-5 32 return graph 33 34 def init_path(l1,l2): 35 path = numpy.zeros([l2,l1]) 36 for x in range(1,l2): 37 path[x][0] = 1 38 for y in range(1,l1): 39 path[0][y] = 2 40 return path 41 42 def buildglobalalignmentgraph(text1,text2): 43 44 l1 = len(text1) 45 l2 = len(text2) 46 graph = init_graph_global(l1, l2) 47 path = init_path(l1, l2) 48 49 for x in range(1,l2): 50 for y in range(1,l1): 51 graph[x][y] = max(graph[x-1][y]-5, graph[x][y-1]-5, graph[x-1][y-1]+grade(text1[y],text2[x])) 52 if graph[x-1][y]-5==graph[x][y]: 53 path[x][y]=1 54 elif graph[x][y-1]-5==graph[x][y]: 55 path[x][y]=2 56 else: 57 path[x][y]=3 58 return [graph,path] 59 60 61 def outputglobalaligement(path,graph,text1,text2): 62 outt1 = '' 63 outt2 = '' 64 l1 = len(text1) 65 l2 = len(text2) 66 x = l2-1 67 y = l1-1 68 while(x!=0 or y!=0): 69 if path[x][y]==1: 70 outt1 += '-' 71 outt2 += text2[x] 72 x -= 1 73 elif path[x][y]==2: 74 outt1 += text1[y] 75 outt2 += '-' 76 y -= 1 77 else: 78 outt1 += text1[y] 79 outt2 += text2[x] 80 x -= 1 81 y -= 1 82 return [outt1[::-1],outt2[::-1]] 83 84 def importscorematrix(): 85 dataset = open(dirname(__file__)+'blosum62.txt').read().strip().split('\n') 86 symbollist = dataset[0].split() 87 for i in range(len(symbollist)): 88 symbollist[i]=[symbollist[i],i] 89 symbollist = dict(symbollist) 90 print(symbollist) 91 matrix = [] 92 for i in range(1,len(dataset)): 93 matrix.append(dataset[i].split()[1:]) 94 for l in range(len(matrix)): 95 for i in range(len(matrix[l])): 96 matrix[l][i]=int(matrix[l][i]) 97 return [matrix,symbollist] 98 99 if __name__ == '__main__': 100 101 [matrix,symbollist] = importscorematrix() 102 103 dataset = open(dirname(__file__)+'dataset.txt').read().strip().split() 104 text1 = '0'+dataset[0] 105 text2 = '0'+dataset[1] 106 107 [graph,path] = buildglobalalignmentgraph(text1, text2) 108 109 [outt1,outt2] = outputglobalaligement(path,graph,text1,text2) 110 111 print(int(graph[-1][-1])) 112 print(outt1) 113 print(outt2)
2. 局部比对
可以把局部比对想象成下面的free taxi场景,在开始和结尾都不受罚分约束,只在中间的某一过程受罚分约束。
在全局比对的基础上,状态转移方程在加上一个0,表示每一个点,既可以由→、↓、↘经过罚分得到,也可以直接由起点,不经罚分得到(free taxi)。
1 ''' 2 code challenge: solve the local alignment problem. 3 input: two protein strings written in the single-letter amino acid alphabet. 4 output: the maximum score of a local alignment of the strings, followed by a local alignment of these strings achieving the maximum 5 score. use the pam250 scoring matrix for matches and mismatches as well as the indel penalty σ = 5. 6 --------------- 7 sample input: 8 meanly 9 penalty 10 --------------- 11 sample output: 12 15 13 eanl-y 14 enalty 15 --------------- 16 lo kwongho 2018.9 17 ''' 18 import numpy 19 from os.path import dirname 20 21 def grade(symb1,symb2): 22 indx1 = symbollist[symb1] 23 indx2 = symbollist[symb2] 24 return matrix[indx1][indx2] 25 26 def init_graph_local(l1,l2): 27 graph = numpy.zeros([l1,l2]) 28 return graph 29 30 def init_path(l1,l2): 31 path = numpy.ones([l1,l2])*-1 32 for x in range(1,l1): 33 path[x][0] = 1 34 for y in range(1,l2): 35 path[0][y] = 2 36 return path 37 38 def buildlocalalignmentgraph(text1,text2): 39 l1 = len(text1) 40 l2 = len(text2) 41 graph = init_graph_local(l1, l2) 42 path = init_path(l1, l2) 43 44 for x in range(1,l1): 45 for y in range(1,l2): 46 graph[x][y] = max(graph[x-1][y]-5, graph[x][y-1]-5, graph[x-1][y-1]+grade(text1[x],text2[y]),0) 47 if graph[x-1][y]-5 == graph[x][y]: 48 path[x][y] = 1 49 elif graph[x][y-1]-5==graph[x][y]: 50 path[x][y] = 2 51 elif graph[x][y] == 0: 52 path[x][y] = 0 53 else: 54 path[x][y] = 3 55 maxval = 0 56 maxindx = [-1,-1] 57 for x in range(1,l1): 58 for y in range(1,l2): 59 if graph[x][y]>maxval: 60 maxval=graph[x][y] 61 maxindx=[x,y] 62 63 return [graph,path,maxindx] 64 65 def outputlocalaligement(path,graph,text1,text2,maxindx): 66 outt1 = '' 67 outt2 = '' 68 l1 = len(text1) 69 l2 = len(text2) 70 x = maxindx[0] 71 y = maxindx[1] 72 while(x!=0 or y!=0): 73 if path[x][y]==1: 74 outt1 += text1[x] 75 outt2 += '-' 76 x -= 1 77 elif path[x][y]==2: 78 outt1 += '-' 79 outt2 += text2[y] 80 y -= 1 81 elif path[x][y]==3: 82 outt1 += text1[x] 83 outt2 += text2[y] 84 x -= 1 85 y -= 1 86 else: 87 x=0 88 y=0 89 return [outt1[::-1],outt2[::-1]] 90 91 92 def importscorematrix(): 93 dataset = open(dirname(__file__)+'pam250.txt').read().strip().split('\n') 94 symbollist = dataset[0].split() 95 for i in range(len(symbollist)): 96 symbollist[i]=[symbollist[i],i] 97 symbollist = dict(symbollist) 98 matrix = [] 99 for i in range(1,len(dataset)): 100 matrix.append(dataset[i].split()[1:]) 101 for l in range(len(matrix)): 102 for i in range(len(matrix[l])): 103 matrix[l][i]=int(matrix[l][i]) 104 return [matrix,symbollist] 105 106 if __name__ == '__main__': 107 [matrix,symbollist] = importscorematrix() 108 109 dataset = open(dirname(__file__)+'dataset.txt').read().strip().split() 110 text1 = '0'+dataset[0] 111 text2 = '0'+dataset[1] 112 113 [graph,path,maxindx] = buildlocalalignmentgraph(text1,text2) 114 115 [outt1,outt2]=outputlocalaligement(path,graph,text1,text2,maxindx) 116 print(int(graph[maxindx[0]][maxindx[1]])) 117 print(outt1) 118 print(outt2)
3. overlarp alignment
1 ''' 2 code challenge: solve the overlap alignment problem. 3 >>input: two strings v and w, each of length at most 1000. 4 >>output: the score of an optimal overlap alignment of v and w, followed by an alignment of a suffix v' of v and a prefix w' of w. 5 achieving this maximum score. use an alignment score in which matches count +1 and both the mismatch and indel penalties are 2. 6 ------------------- 7 sample input: 8 pawheae 9 heagawghee 10 ------------------- 11 sample output: 12 1 13 heae 14 heag 15 ------------------- 16 coder: lo kwongho 17 ''' 18 19 import numpy 20 from os.path import dirname 21 22 def init_graph_overlap(l1,l2): 23 graph = numpy.zeros([l1,l2]) 24 for y in range(1,l2): 25 graph[0][y] = graph[0][y-1]-1 26 return graph 27 28 def init_path(l1,l2): 29 path = numpy.ones([l1,l2])*-1 30 for x in range(1,l1): 31 path[x][0] = 1 32 for y in range(1,l2): 33 path[0][y] = 2 34 return path 35 36 def buildoverlapalignmentgraph(text1,text2): 37 l1 = len(text1) 38 l2 = len(text2) 39 graph = init_graph_overlap(l1, l2) 40 path = init_path(l1,l2) 41 for x in range(1,l1): 42 for y in range(1,l2): 43 if text1[x]==text2[y]: 44 graph[x][y]=max(graph[x-1][y-1]+1,graph[x-1][y]-2,graph[x][y-1]-2) 45 else: 46 graph[x][y]=max(graph[x-1][y-1]-2,graph[x-1][y]-2,graph[x][y-1]-2) 47 if graph[x][y]==graph[x-1][y]-2: 48 path[x][y]=1 49 elif graph[x][y]==graph[x][y-1]-2: 50 path[x][y]=2 51 else: 52 path[x][y]=3 53 54 maxval = 0 55 maxindx = -1 56 for i in range(l2): 57 if graph[l1-1][i]>maxval: 58 maxval=graph[l1-1][i] 59 maxindx=i 60 61 return [graph,path,maxindx,maxval] 62 63 def outputoverlapaligement(path,graph,text1,text2,maxindx): 64 outt1 = '' 65 outt2 = '' 66 l1 = len(text1) 67 l2 = len(text2) 68 x = l1-1 69 y = maxindx 70 while(y!=0): 71 if path[x][y]==1: 72 outt1 += text1[x] 73 outt2 += '-' 74 x -= 1 75 elif path[x][y]==2: 76 outt1 += '-' 77 outt2 += text2[y] 78 y -= 1 79 elif path[x][y]==3: 80 outt1 += text1[x] 81 outt2 += text2[y] 82 x -= 1 83 y -= 1 84 else: 85 x=0 86 y=0 87 return [outt1[::-1],outt2[::-1]] 88 89 90 if __name__ == '__main__': 91 dataset = open(dirname(__file__)+'dataset.txt').read().strip().split() 92 text1 = '0'+dataset[0] 93 text2 = '0'+dataset[1] 94 l1 = len(text1) 95 l2 = len(text2) 96 [graph,path,maxindx,maxval] = buildoverlapalignmentgraph(text1,text2) 97 #print(graph) 98 99 [outtext1,outtext2]=outputoverlapaligement(path, graph, text1, text2, maxindx) 100 101 print(int(maxval)) 102 print(outtext1) 103 print(outtext2)
4.fitting alignment
1 ''' 2 fitting alignment problem: construct a highest-scoring fitting alignment between two strings. 3 >>input: strings v and w as well as a matrix score. 4 >>output: a highest-scoring fitting alignment of v and w as defined by the scoring matrix score. 5 ------------------- 6 sample input: 7 gtaggcttaaggtta 8 tagata 9 ------------------- 10 sample output: 11 2 12 taggctta 13 taga--ta 14 ------------------- 15 coder: lo kwongho 16 ''' 17 18 import numpy 19 from os.path import dirname 20 21 def init_graph_fiting(l1,l2): 22 graph = numpy.zeros([l1,l2]) 23 for y in range(1,l2): 24 graph[0][y] = graph[0][y-1]-1 25 return graph 26 27 def init_path(l1,l2): 28 path = numpy.ones([l1,l2])*-1 29 for x in range(1,l1): 30 path[x][0] = 1 31 for y in range(1,l2): 32 path[0][y] = 2 33 return path 34 35 def buildfittingalignmentgraph(text1,text2): 36 l1 = len(text1) 37 l2 = len(text2) 38 graph = init_graph_fiting(l1, l2) 39 path = init_path(l1,l2) 40 for x in range(1,l1): 41 for y in range(1,l2): 42 if text1[x]==text2[y]: 43 graph[x][y]=max(graph[x-1][y-1]+1,graph[x-1][y]-1,graph[x][y-1]-1) 44 else: 45 graph[x][y]=max(graph[x-1][y-1]-1,graph[x-1][y]-1,graph[x][y-1]-1) 46 if graph[x][y]==graph[x-1][y]-1: 47 path[x][y]=1 48 elif graph[x][y]==graph[x][y-1]-1: 49 path[x][y]=2 50 else: 51 path[x][y]=3 52 53 maxval = 0 54 maxindx = -1 55 for i in range(l1): 56 if graph[i][l2-1]>maxval: 57 maxval=graph[i][l2-1] 58 maxindx=i 59 60 return [graph,path,maxindx,maxval] 61 62 def outputfittingaligement(path,graph,text1,text2,maxindx): 63 outt1 = '' 64 outt2 = '' 65 l1 = len(text1) 66 l2 = len(text2) 67 x = maxindx 68 y = l2-1 69 while(y!=0): 70 if path[x][y]==1: 71 outt1 += text1[x] 72 outt2 += '-' 73 x -= 1 74 elif path[x][y]==2: 75 outt1 += '-' 76 outt2 += text2[y] 77 y -= 1 78 elif path[x][y]==3: 79 outt1 += text1[x] 80 outt2 += text2[y] 81 x -= 1 82 y -= 1 83 else: 84 x=0 85 y=0 86 return [outt1[::-1],outt2[::-1]] 87 88 89 if __name__ == '__main__': 90 dataset = open(dirname(__file__)+'dataset.txt').read().strip().split() 91 text1 = '0'+dataset[0] 92 text2 = '0'+dataset[1] 93 l1 = len(text1) 94 l2 = len(text2) 95 [graph,path,maxindx,maxval] = buildfittingalignmentgraph(text1,text2) 96 97 [outtext1,outtext2]=outputfittingaligement(path, graph, text1, text2, maxindx) 98 #print(graph) 99 print(int(maxval)) 100 print(outtext1) 101 print(outtext2)
这四种比对的关系如图:
全局比对 局部比对
overlarp alignment fitting alignment
5、基因的插入和删除,通常都是连续的一段,故在比对出现的连续空位,应该把它们当作一个整体看待。在比对的空位罚分中,生物学家认为,在每一条链上新开一个空位,应罚重分,而空位的延续,罚分应较少:
解决问题的方法是:开三个矩阵,每个矩阵代表一种方向。在→、↓方向上行走,代表产生空位。故每当从↘转移到→、↓,或者→、↓间转移,代表在某链上产生新空位,重罚,而在→、↓内转移,代表空位延续,轻罚。
1 ''' 2 code challenge: solve the alignment with affine gap penalties problem. 3 >>input: two amino acid strings v and w (each of length at most 100). 4 >>output: the maximum alignment score between v and w, followed by an alignment of v and w achieving this maximum score. use the 5 blosum62 scoring matrix, a gap opening penalty of 11, and a gap extension penalty of 1. 6 --------------------- 7 sample input: 8 prteins 9 prtwpsein 10 --------------------- 11 sample output: 12 8 13 prt---eins 14 prtwpsein- 15 --------------------- 16 coder: lo kwongho 17 ''' 18 import numpy 19 from os.path import dirname 20 neginfinity = -999 21 #penalties 22 gs = -10 #gap_start 23 ge = -1 #gap_extend 24 # 25 def grade(symb1,symb2): 26 indx1 = symbollist[symb1] 27 indx2 = symbollist[symb2] 28 return matrix[indx1][indx2] 29 30 def initgraph(l1,l2): 31 graph = [numpy.zeros([l1,l2] ,dtype=int) for i in range(3)] 32 33 graph[1][0][0] = neginfinity 34 graph[2][0][0] = neginfinity 35 for x in range(1,l1): 36 graph[0][x][0]=neginfinity 37 if x==1: 38 graph[1][x][0]=ge+gs 39 else: 40 graph[1][x][0]=graph[1][x-1][0]+ge 41 graph[2][x][0]=neginfinity 42 for y in range(1,l2): 43 graph[0][0][y]=neginfinity 44 if y ==1: 45 graph[2][0][y]=ge+gs 46 else: 47 graph[2][0][y]=graph[2][0][y-1]+ge 48 graph[1][0][y]=neginfinity 49 return graph 50 51 def init_path(l1,l2): 52 path = [numpy.ones([l1,l2])*-1 for i in range(3)] 53 '''for x in range(1,l1): 54 path[x][0] = 1 55 for y in range(1,l2): 56 path[0][y] = 2''' 57 return path 58 59 def buildalignmentgraph(text1,text2,l1,l2): 60 61 graph = initgraph(l1,l2) 62 #path = #init_path(l1,l2) 63 for x in range(1,l1): 64 for y in range(1,l2): 65 # x ###### 66 graph[1][x][y]=max(gs+ge+graph[0][x-1][y],gs+ge+graph[2][x-1][y],ge+graph[1][x-1][y]) 67 68 69 # y ###### 70 graph[2][x][y]=max(gs+ge+graph[0][x][y-1],gs+ge+graph[1][x][y-1],ge+graph[2][x][y-1]) 71 72 # m ###### 73 graph[0][x][y]=grade(text1[x], text2[y])+max(graph[0][x-1][y-1],graph[1][x-1][y-1],graph[2][x-1][y-1]) 74 75 maxval = 0 76 maxindx = -1 77 for i in range(3): 78 if graph[i][l1-1][l2-1]>maxval: 79 maxval=graph[i][l1-1][l2-1] 80 maxindx=i 81 return [graph,maxindx,maxval] 82 83 def trackback(graph,maxindx,text1,text2): 84 x = len(text1)-1 85 y = len(text2)-1 86 otext1 = '' 87 otext2 = '' 88 indx = maxindx 89 while(1): 90 if indx==0: 91 otext1 += text1[x] 92 otext2 += text2[y] 93 if x ==1: 94 break 95 if graph[0][x][y]==graph[1][x-1][y-1]+grade(text1[x], text2[y]): 96 indx = 1 97 elif graph[0][x][y]==graph[2][x-1][y-1]+grade(text1[x], text2[y]): 98 indx = 2 99 else: 100 indx = 0 101 x -= 1 102 y -= 1 103 elif indx==1: 104 otext1 += text1[x] 105 otext2 += '-' 106 if x == 1: 107 break 108 if graph[1][x][y]==graph[0][x-1][y]+ge+gs: 109 indx = 0 110 elif graph[1][x][y]==graph[2][x-1][y]+ge+gs: 111 indx = 2 112 else: 113 indx = 1 114 x -= 1 115 else: 116 otext1 += '-' 117 otext2 += text2[y] 118 if y == 1: 119 break 120 if graph[2][x][y]==graph[0][x][y-1]+ge+gs: 121 indx = 0 122 elif graph[2][x][y]==graph[1][x][y-1]+ge+gs: 123 indx = 1 124 else: 125 indx = 2 126 y -= 1 127 128 return [otext1[::-1],otext2[::-1]] 129 130 def importscorematrix(): 131 dataset = open(dirname(__file__)+'blosum62.txt').read().strip().split('\n') 132 symbollist = dataset[0].split() 133 for i in range(len(symbollist)): 134 symbollist[i]=[symbollist[i],i] 135 symbollist = dict(symbollist) 136 matrix = [] 137 for i in range(1,len(dataset)): 138 matrix.append(dataset[i].split()[1:]) 139 for l in range(len(matrix)): 140 for i in range(len(matrix[l])): 141 matrix[l][i]=int(matrix[l][i]) 142 return [matrix,symbollist] 143 144 145 if __name__ == '__main__': 146 [matrix,symbollist] = importscorematrix() # 打分矩阵 147 148 dataset = open(dirname(__file__)+'dataset.txt').read().strip().split() 149 text1 = '0'+dataset[0] 150 text2 = '0'+dataset[1] 151 l1 = len(text1) 152 l2 = len(text2) 153 [graph,maxindx,maxval] = buildalignmentgraph(text1, text2, l1, l2) 154 #print(graph) 155 156 [output_text1,output_text2] = trackback(graph,maxindx,text1,text2) 157 print(maxval) 158 print(output_text1) 159 print(output_text2) 160
6 * 一种线性空间的比对方法 space-efficient sequence alignment(分治+动态规划)
https://www.cs.rit.edu/~rlaz/algorithms20082/slides/spaceefficientalignment.pdf
1 ''' 2 code challenge: implement linearspacealignment to solve the global alignment problem for a large dataset. 3 >>>input: two long (10000 amino acid) protein strings written in the single-letter amino acid alphabet. 4 >>>output: the maximum alignment score of these strings, followed by an alignment achieving this maximum score. use the 5 6 blosum62 scoring matrix and indel penalty σ = 5. 7 ------------ 8 sample input: 9 pleasantly 10 meanly 11 ------------ 12 sample output: 13 8 14 pleasantly 15 -mea--n-ly 16 ------------ 17 coder: lo kwongho in 2018.9 18 ''' 19 from os.path import dirname 20 import numpy 21 # 22 indel = -5 23 neginf = -9999 24 # 25 # 26 def grade(symb1,symb2): 27 indx1 = symbollist[symb1] 28 indx2 = symbollist[symb2] 29 return matrix[indx1][indx2] 30 31 def importscorematrix(): 32 dataset = open(dirname(__file__)+'blosum62.txt').read().strip().split('\n') 33 symbollist = dataset[0].split() 34 for i in range(len(symbollist)): 35 symbollist[i]=[symbollist[i],i] 36 symbollist = dict(symbollist) 37 matrix = [] 38 for i in range(1,len(dataset)): 39 matrix.append(dataset[i].split()[1:]) 40 for l in range(len(matrix)): 41 for i in range(len(matrix[l])): 42 matrix[l][i]=int(matrix[l][i]) 43 return [matrix,symbollist] 44 # 45 def half_alignment(v,w): 46 nv = len(v) 47 mw = len(w) 48 s = numpy.zeros(shape=(nv+1,2),dtype=int) 49 for i in range(nv+1): 50 s[i,0] = indel*i 51 if mw==0: 52 return s[:,0] # 53 for j in range(mw): 54 s[0,1]=s[0,0]+indel 55 for i in range(nv): 56 s[i+1,1]=max(s[i,1]+indel,s[i+1,0]+indel,s[i,0]+grade(w[j],v[i])) 57 s[:,0]=s[:,1] 58 return s[:,1] 59 60 def midedge(v,w): 61 nv = len(v) 62 mw = len(w) 63 mid = int((mw-1)/2) 64 wl = w[:mid] 65 wr = w[mid+1:] 66 pre = half_alignment(v,wl) 67 suf = half_alignment(v[::-1],wr[::-1])[::-1] 68 hs = [pre[i]+suf[i]+indel for i in range(nv+1)] 69 ds = [pre[i]+suf[i+1]+grade(w[mid],v[i]) for i in range(nv)] 70 maxhs = max(hs) 71 maxds = max(ds) 72 if maxhs>maxds: 73 return ( (hs.index(maxhs),mid) ,(hs.index(maxhs),mid+1) ) 74 else: 75 return ( (ds.index(maxds),mid) ,(ds.index(maxds)+1,mid+1) ) 76 77 def build_alignment_track(v,w): 78 vn = len(v) 79 wm = len(w) 80 if vn==0 and wm==0: 81 return [] 82 elif vn==0: 83 return ['-']*wm 84 elif wm==0: 85 return ['|']*vn 86 ((x1,y1),(x2,y2)) = midedge(v,w) 87 if x1==x2: 88 edge = ['-'] 89 else: 90 edge = ['\\'] 91 wleft = w[:y1] 92 wright = w[y2:] 93 vupper = v[:x1] 94 vbotm = v[x2:] 95 96 upper_left_track = build_alignment_track(vupper,wleft) 97 bottom_right_track = build_alignment_track(vbotm,wright) 98 return upper_left_track+edge+bottom_right_track 99 100 def tracktostring(v,w,track): 101 vi = 0 102 wj = 0 103 outv = '' 104 outw = '' 105 score = 0 106 for i in track: 107 if i == '|': 108 outv += v[vi] 109 outw += '-' 110 score += indel 111 vi += 1 112 elif i == '-': 113 outv += '-' 114 outw += w[wj] 115 score += indel 116 wj += 1 117 else: 118 outv += v[vi] 119 outw += w[wj] 120 score += grade(v[vi], w[wj]) 121 vi += 1 122 wj += 1 123 124 return [score,outv,outw] 125 126 def linearspacealignment(v,w): 127 track = build_alignment_track(v,w) 128 [score,outv, outw] = tracktostring(v,w,track) 129 print(score) 130 print(outv) 131 print(outw) 132 133 if __name__ == '__main__': 134 dataset = open(dirname(__file__)+'dataset.txt').read().strip().split() 135 [matrix,symbollist] = importscorematrix() 136 v = dataset[0] 137 w = dataset[1] 138 linearspacealignment(v,w)