{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Assessing spectral quality" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The DreaMS project uses a set of mass spectral quality metrics to filter out low-quality or large-molecule spectra mined from public repositories (Figure 2b,c in the paper). This tutorial demonstrates how to apply MS/MS single-spectrum quality metrics to a custom dataset of MS/MS spectra. If you’re interested in using LC-MS/MS dataset-level metrics (e.g., instrument accuracy estimation), please refer to the utils/lcms.py subpackage within the DreaMS package." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from pathlib import Path\n", "from dreams.utils.dformats import DataFormatA\n", "from dreams.utils.data import MSData\n", "from dreams.utils.io import append_to_stem" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load example dataset in the `.mzML` format" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loading dataset G73954_1x_BC8_01_17287 into memory (1930 spectra)...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/builder/jenkins/ws/enms_ntly_pyoms_whl_Release3.0.0/OpenMS/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp(130): While loading '../data/MSV00008490/G73954_1x_BC8_01_17287.mzML': Required attribute 'softwareRef' not present!\n", "Warning: Parsing error, \"processingMethod\" is missing the required attribute \"softwareRef\".\n", "The software tool which generated this mzML should be fixed. Please notify the maintainers.\n" ] } ], "source": [ "in_pth = Path('../data/MSV00008490/G73954_1x_BC8_01_17287.mzML')\n", "msdata = MSData.from_mzml(in_pth)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Subject spectra to quality control checks" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/roman/DreaMS/dreams/utils/spectra.py:258: RuntimeWarning: divide by zero encountered in scalar divide\n", " return max(peak_list[1]) / min(peak_list[1])\n" ] }, { "data": { "text/plain": [ "All checks passed 1039\n", "Number of high intensity peaks >= 3 863\n", "m/z range <= 1000.0 17\n", "Precursor m/z <= 1000.0 7\n", "Intensity amplitude >= 20.0 4\n", "Name: count, dtype: int64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Get spectra (m/z and inetsnity arrays) and precursor m/z values from the input dataset\n", "spectra = msdata['spectrum']\n", "prec_mzs = msdata['precursor_mz']\n", "\n", "# Subject each spectrum to spectral quality checks\n", "dformat = DataFormatA()\n", "quality_lvls = [dformat.val_spec(s, p, return_problems=True) for s, p in zip(spectra, prec_mzs)]\n", "\n", "# Check how many spectra passed all filters (`All checks passed`) and how many spectra did not pass some of the filters\n", "pd.Series(quality_lvls).value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create new dataset with only high-quality spectra" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "MSData(pth=../data/MSV00008490/G73954_1x_BC8_01_17287_high_quality.hdf5, in_mem=False) with 1,039 spectra." ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Define path for output high-quality file\n", "hq_pth = append_to_stem(in_pth, 'high_quality').with_suffix('.hdf5')\n", "\n", "# Pick only high-quality spectra and save them to `hq_pth`\n", "msdata.form_subset(\n", " idx=np.where(np.array(quality_lvls) == 'All checks passed')[0],\n", " out_pth=hq_pth\n", ")\n", "\n", "# Try reading the new file\n", "msdata_hq = MSData.load(hq_pth)\n", "msdata_hq" ] } ], "metadata": { "kernelspec": { "display_name": "dreams", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 2 }