大豆SNP位点查找V2.0
程序员文章站
2022-05-29 16:49:36
...
大豆SNP位点查找V2.0
from time import perf_counter
import csv
import time
start = perf_counter()
f = open("GM18.txt" , "r")
txt = f.read()
#print(txt[:1000])
txt = txt.strip(">Gm18")
txt = txt.replace("\n","").replace("\r","") #更改了此处的删除换行符!
# txt = txt.strip(r"\n")
# txt = txt.strip(r"\r")
txt = txt.strip("") #注意这个地方有错误就是开头的第一个字符串是1,光标位置是0。
#print(txt[:1000])
aimseq = input("请输入你想查找的DNA序列文件(回车结束):")
def indexstr(str1,str2):
'''查找指定字符串str1包含指定子字符串str2的全部位置,
以列表形式返回'''
lenth2=len(str2)
lenth1=len(str1)
indexstr2=[]
i=0
while str2 in str1[i:]:
indextmp = str1.index(str2, i, lenth1)
indexstr2.append(indextmp)
i = (indextmp + lenth2)
return indexstr2
if __name__ == '__main__':
print(indexstr(txt,aimseq))
pos = indexstr(txt,aimseq)
# def getseq():
num = len(aimseq)
for number in pos:
if num == 45:
sequence1 = txt[number - 300 : number]
sequence2 = txt[number + 45 : number + 345]
elif num == 50:
sequence1 = txt[number - 300 : number]
sequence2 = txt[number + 50 : number + 350]
print("由以上条件我们可以得知前300个碱基为 {} ,后300个碱基为 {} ".format(sequence1,sequence2))
print("此次查找花费的时间是:{:.5f} 秒".format(perf_counter() - start))
c= open ("SNP相关数据.csv","w+")
writer=csv.writer(c)
datalist = []
datalist.append(aimseq)
time.sleep(0.1)
datalist.append(sequence1)
time.sleep(0.1)
datalist.append(sequence2)
time.sleep(0.1)
for numbers in pos:
datalist.append(numbers)
time.sleep(0.1)
writer.writerow(["SNP","前300个碱基","后300个碱基","所在位置"])
writer.writerow(datalist)
c.close()