Source code for matchms.filtering.metadata_processing.repair_smiles_of_salts

import itertools
import logging
from matchms.filtering.filter_utils.get_neutral_mass_from_smiles import \
    get_monoisotopic_neutral_mass
from matchms.filtering.filter_utils.smile_inchi_inchikey_conversions import \
    is_valid_smiles


logger = logging.getLogger("matchms")


[docs]def repair_smiles_of_salts(spectrum_in, mass_tolerance): """Repairs the smiles of a salt to match the parent mass. E.g. C1=NC2=NC=NC(=C2N1)N.Cl is converted to 1=NC2=NC=NC(=C2N1)N if this matches the parent mass Checks if parent mass matches one of the ions""" if spectrum_in is None: return None spectrum = spectrum_in.clone() smiles = spectrum.get("smiles") if smiles is None: return spectrum if not is_valid_smiles(smiles): return spectrum parent_mass = spectrum.get("parent_mass") possible_ion_combinations = _create_possible_ions(smiles) if not possible_ion_combinations: # It is not a salt return spectrum for ion, not_used_ions in possible_ion_combinations: ion_mass = get_monoisotopic_neutral_mass(ion) if ion_mass is None: continue mass_diff = abs(parent_mass - ion_mass) # Check for Repair parent mass is mol wt did only return 1 spectrum. So not added as option for simplicity. if mass_diff < mass_tolerance: spectrum_with_ions = spectrum.clone() spectrum_with_ions.set("smiles", ion) spectrum_with_ions.set("salt_ions", not_used_ions) logger.info("Removed salt ions: %s from %s to match parent mass", not_used_ions, smiles) return spectrum_with_ions logger.warning("None of the parts of the smile %s match the parent mass: %s", smiles, parent_mass) return spectrum
def _create_possible_ions(smiles): """Selects all possible ion combinations of a salt""" results = [] if "." in smiles: single_ions = smiles.split(".") for r in range(1, len(single_ions) + 1): combinations = itertools.combinations(single_ions, r) for combination in combinations: combined_ion = ".".join(combination) removed_ions = single_ions.copy() for used_ion in combination: removed_ions.remove(used_ion) removed_ions = ".".join(removed_ions) results.append((combined_ion, removed_ions)) return results