Source code for matchms.filtering.metadata_processing.interpret_pepmass
import logging
import re
import numpy as np
from .make_charge_int import _convert_charge_to_int
logger = logging.getLogger("matchms")
_accepted_types = (float, str, int)
_accepted_missing_entries = ["", "N/A", "NA", "n/a"]
[docs]def interpret_pepmass(spectrum_in):
"""Reads pepmass field (if present) and adds values to correct field(s).
The field "pepmass" or "PEPMASS" is often used to describe the precursor ion.
This function will interpret the values as (mz, intensity, charge) tuple. Those
will be splitted (if present) added to the fields "precursor_mz",
"precursor_intensity", and "charge".
"""
if spectrum_in is None:
return None
spectrum = spectrum_in.clone()
metadata_updated = _interpret_pepmass_metadata(spectrum.metadata)
spectrum.metadata = metadata_updated
return spectrum
def _interpret_pepmass_metadata(metadata):
pepmass = metadata.get("pepmass")
if pepmass is None:
return metadata
mz, intensity, charge = _get_mz_intensity_charge(pepmass)
mz = _convert_mz_or_intensity(mz)
intensity = _convert_mz_or_intensity(intensity)
charge = _convert_charge_to_int(charge)
if mz is not None:
if metadata.get("precursor_mz") is not None\
and _substantial_difference(metadata.get("precursor_mz"), mz, atol=0.001):
logger.warning("Overwriting existing precursor_mz %s with new one: %s",
metadata.get("precursor_mz"), str(mz))
metadata["precursor_mz"] = mz
logger.info("Added precursor_mz entry based on field 'pepmass'.")
if intensity is not None:
if metadata.get("precursor_intensity") is not None:
logger.warning("Overwriting existing precursor_intensity %s with new one: %s",
metadata.get("precursor_intensity"), str(intensity))
metadata["precursor_intensity"] = intensity
logger.info("Added precursor_intensity entry based on field 'pepmass'.")
if charge is not None:
if metadata.get("charge") is not None:
logger.warning("Overwriting existing charge %s with new one: %s",
metadata.get("charge"), str(charge))
metadata["charge"] = charge
logger.info("Added charge entry based on field 'pepmass'.")
del metadata["pepmass"]
logger.info("Removed pepmass, since the information was added to other fields")
return metadata
def _get_mz_intensity_charge(pepmass):
try:
if isinstance(pepmass, str):
matches = re.findall(r'\(([^)]+)\)', pepmass)
if len(matches) > 1:
raise ValueError("Found more than one tuple in pepmass field.")
if len(matches) == 1:
pepmass = matches[0].split(",")
if len(matches) == 0:
try:
pepmass = float(pepmass)
except ValueError:
return None, None, None
length = len(pepmass)
values = [None, None, None]
for i in range(length):
values[i] = pepmass[i]
return values[0], values[1], values[2]
except TypeError:
if pepmass is not None:
return pepmass, None, None
return None, None, None
def _convert_mz_or_intensity(entry):
"""Convert mz or intensity to number if possible. Otherwise return None."""
if entry is None:
return None
if isinstance(entry, str) and entry in _accepted_missing_entries:
return None
if not isinstance(entry, _accepted_types):
logger.warning("Found undefined type.")
return None
if isinstance(entry, str):
try:
return float(entry.strip())
except ValueError:
logger.warning("%s can't be converted to float.", entry)
return None
return entry
def _substantial_difference(mz_now, mz_new, atol=0.001):
"""Returns True if mz_now and mz_new differ by more than atol."""
if mz_now is None:
return True
try:
mz_now_float = float(mz_now)
except ValueError:
return True
if np.abs(mz_now_float - mz_new) > atol:
return True
return False