Source code for matchms.filtering.metadata_processing.repair_adduct_based_on_smiles

import logging
from matchms import Spectrum
from matchms.filtering.filter_utils.get_neutral_mass_from_smiles import \
    get_monoisotopic_neutral_mass
from ..filter_utils.load_known_adducts import load_known_adducts
from .repair_parent_mass_is_mol_wt import repair_parent_mass_is_mol_wt


logger = logging.getLogger("matchms")


[docs]def repair_adduct_based_on_smiles(spectrum_in: Spectrum, mass_tolerance: float, accept_parent_mass_is_mol_wt: bool = True): """ Corrects the adduct of a spectrum based on its SMILES representation and the precursor m/z. Given a spectrum, this function tries to match the spectrum's parent mass, derived from its precursor m/z and known adducts, to the neutral monoisotopic mass of the molecule derived from its SMILES representation. If a match is found within a given mass tolerance, the adduct and parent mass of the spectrum are updated. Parameters: ---------- spectrum_in : Spectrum The input spectrum whose adduct needs to be repaired. mass_tolerance : float Maximum allowed mass difference between the calculated parent mass and the neutral monoisotopic mass derived from the SMILES. accept_parent_mass_is_mol_wt : bool, optional (default=True) Allows the function to attempt repairing the spectrum's parent mass by assuming it represents the molecule's weight. If True, further checks and corrections are made using `repair_parent_mass_is_mol_wt`. """ if spectrum_in is None: return None changed_spectrum = spectrum_in.clone() precursor_mz = changed_spectrum.get("precursor_mz") ion_mode = changed_spectrum.get("ionmode") if ion_mode not in ("positive", "negative"): logger.warning("Ionmode: %s not positive or negative, first run derive_ionmode", ion_mode) return changed_spectrum if precursor_mz is None: logger.warning("Precursor_mz is None, first run add_precursor_mz") return changed_spectrum adducts_df = load_known_adducts() smiles_mass = get_monoisotopic_neutral_mass(changed_spectrum.get("smiles")) if smiles_mass is None: return changed_spectrum parent_masses = (precursor_mz - adducts_df["correction_mass"]) / adducts_df["mass_multiplier"] mass_differences = abs(parent_masses-smiles_mass) # Select the lowest value smalles_mass_index = mass_differences.idxmin() parent_mass = parent_masses[smalles_mass_index] adduct = adducts_df.iloc[smalles_mass_index]["adduct"] # Change spectrum. This spectrum will only be returned if the mass difference is smaller than mass tolerance changed_spectrum.set("parent_mass", parent_mass) changed_spectrum.set("adduct", adduct) if mass_differences[smalles_mass_index] < mass_tolerance: logger.info("Adduct was set from %s to %s", spectrum_in.get('adduct'), adduct) return changed_spectrum if accept_parent_mass_is_mol_wt: changed_spectrum = repair_parent_mass_is_mol_wt(changed_spectrum, mass_tolerance) if abs(changed_spectrum.get("parent_mass") - smiles_mass) < mass_tolerance: logger.info("Adduct was set from %s to %s", spectrum_in.get('adduct'), adduct) return changed_spectrum return spectrum_in