欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

大豆SNP位点查找V2.0

程序员文章站 2022-05-29 16:49:36
...

大豆SNP位点查找V2.0

from time import perf_counter
import csv
import time


start  = perf_counter()

f = open("GM18.txt" , "r")
txt = f.read()
#print(txt[:1000])
txt = txt.strip(">Gm18")
txt = txt.replace("\n","").replace("\r","")        #更改了此处的删除换行符!
# txt = txt.strip(r"\n")
# txt = txt.strip(r"\r")
txt = txt.strip("")      #注意这个地方有错误就是开头的第一个字符串是1,光标位置是0。

#print(txt[:1000])
aimseq = input("请输入你想查找的DNA序列文件(回车结束):")
def indexstr(str1,str2):
    '''查找指定字符串str1包含指定子字符串str2的全部位置,
    以列表形式返回'''
    lenth2=len(str2)
    lenth1=len(str1)
    indexstr2=[]
    i=0
    while str2 in str1[i:]:
        indextmp = str1.index(str2, i, lenth1)
        indexstr2.append(indextmp)
        i = (indextmp + lenth2)
    return indexstr2



if __name__ == '__main__':
    print(indexstr(txt,aimseq))
    pos = indexstr(txt,aimseq)
#     def getseq():
    num = len(aimseq)
    for number in pos:
        if num == 45:
            sequence1 = txt[number - 300 : number]
            sequence2 = txt[number + 45 : number + 345]
        elif num == 50:
            sequence1 = txt[number - 300 : number]
            sequence2 = txt[number + 50 : number + 350]
        print("由以上条件我们可以得知前300个碱基为 {} ,后300个碱基为 {} ".format(sequence1,sequence2))

    print("此次查找花费的时间是:{:.5f} 秒".format(perf_counter() - start))

        
    c= open ("SNP相关数据.csv","w+")
    writer=csv.writer(c)
    datalist = []
    datalist.append(aimseq)
    time.sleep(0.1)
    datalist.append(sequence1)
    time.sleep(0.1)
    datalist.append(sequence2)
    time.sleep(0.1)
    for numbers in pos:
        datalist.append(numbers)
    time.sleep(0.1)
    writer.writerow(["SNP","前300个碱基","后300个碱基","所在位置"])
    writer.writerow(datalist)
    c.close()