欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

mzml数据处理及质谱图像绘制

程序员文章站 2022-03-11 21:37:13
...

数据处理

from pyteomics import mzml
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import json
import csv
import os
import math
def change_mzML(fd):
    for spectrum in mzml.read(fd):
        if(spectrum.get('base peak m/z') >= 114 and spectrum.get('base peak m/z')< 115):
            dfmz = pd.DataFrame(spectrum.get('m/z array'))
            dfin = pd.DataFrame(spectrum.get('intensity array'))
            dfmz.to_csv(fd+"114m_z.csv", mode='a+', index =False, header = True)
            dfin.to_csv(fd+"114intensity.csv", mode='a+', index =False, header = True)
    for spectrum in mzml.read(fd):
        if(spectrum.get('base peak m/z') >= 115 and spectrum.get('base peak m/z')< 116):
            dfmz = pd.DataFrame(spectrum.get('m/z array'))
            dfin = pd.DataFrame(spectrum.get('intensity array'))
            dfmz.to_csv(fd+"115m_z.csv", mode='a+', index =False, header = True)
            dfin.to_csv(fd+"115intensity.csv", mode='a+', index =False, header = True)
    for spectrum in mzml.read(fd):
        if(spectrum.get('base peak m/z') >= 116 and spectrum.get('base peak m/z')< 117):
            dfmz = pd.DataFrame(spectrum.get('m/z array'))
            dfin = pd.DataFrame(spectrum.get('intensity array'))
            dfmz.to_csv(fd+"116m_z.csv", mode='a+', index =False, header = True)
            dfin.to_csv(fd+"116intensity.csv", mode='a+', index =False, header = True)
    for spectrum in mzml.read(fd):
        if(spectrum.get('base peak m/z') >= 117 and spectrum.get('base peak m/z')< 118):
            dfmz = pd.DataFrame(spectrum.get('m/z array'))
            dfin = pd.DataFrame(spectrum.get('intensity array'))
            dfmz.to_csv(fd+"116m_z.csv", mode='a+', index =False, header = True)
            dfin.to_csv(fd+"116intensity.csv", mode='a+', index =False, header = True)
for fn in os.listdir('C:/hqh/data/breast/'):
     if fn.endswith('.mzML'):
        fd = os.path.join('C:/hqh/data/breast/', fn)
        change_mzML(fd)

质谱图绘制

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import json
import csv
import os
import math
# 获取csv其中一行
def getColumn(filename, column):
    results = csv.reader(open(filename), delimiter=",")
    return [result[column] for result in results]
# 转换成浮点数
def getFloat(result):
    res_float_m = map(float, result)
    return list(res_float_m)
# 获取数组中的最大值
def getMax(res_float):
    max_n = 0
    for res in res_float:
        if res > max_n:
            max_n = res
    return max_n
x = getColumn("C:/hqh/1.csv",0)
y = getColumn("C:/hqh/1.csv",1)
x_float = getFloat(x)
y_float = getFloat(y)
max_y = getMax(y_float)
y_float = np.array(y_float)*100/max_y # 将数组中全部值化为相对强度
plt.xlim(0, 2000)
plt.bar(x_float, y_float, width = 5)
plt.savefig("C:/hqh/examples.png")
相关标签: 生物信息学