Source code for matchms.filtering.filter_utils.get_neutral_mass_from_smiles

import logging
from typing import Optional
from matchms.constants import PROTON_MASS


logger = logging.getLogger("matchms")


try:  # rdkit is not included in pip package
    from rdkit import Chem
    from rdkit.Chem import Descriptors
except ImportError:
    _has_rdkit = False
    from collections import UserString

    class ChemMock(UserString):
        def __call__(self, *args, **kwargs):
            return self

        def __getattr__(self, key):
            return self

    Chem = AllChem = ChemMock("")
else:
    _has_rdkit = True
rdkit_missing_message = "Conda package 'rdkit' is required for this functionality."


[docs]def get_monoisotopic_neutral_mass(smiles: str) -> float: """Get the monoisotopic neutral mass from a SMILES string chemical identifier for the described molecule. Args: smiles (str): SMILES string defining the structure of the molecule. Raises: ImportError: If RDkit chem is not installed. Returns: float: Computed monoisotopic mass. """ return _get_neutral_mass(smiles, True)
[docs]def get_molecular_weight_neutral_mass(smiles: str) -> float: """Get the (average) molecular weight for the isotopic distribution of the SMILES code. Args: smiles (str): SMILES string describing the structure of the molecule. Raises: ImportError: Raises import error if RDKitChem is not found. Returns: float: Average molecular mass. """ return _get_neutral_mass(smiles, False)
def _get_neutral_mass(smiles: str, monoisotopic: bool) -> Optional[float]: """Get neutral mass of molecule, either average or most abundant monoisotopic mass. Args: smiles (str): SMILES describing the molecule. monoisotopic (bool): whether to compute the monoisotopic mass or average. Raises: ImportError: If RDKit Chem is not found Returns: float: Neutral mass. """ if not _has_rdkit: raise ImportError(rdkit_missing_message) if smiles is None: return None mol = Chem.MolFromSmiles(smiles) if mol is None: logger.warning("No mass could be calculated for smiles: %s, since it is not a valid smiles.", smiles) return None mass = Descriptors.ExactMolWt(mol) if monoisotopic else Descriptors.MolWt(mol) charge = sum(atom.GetFormalCharge() for atom in mol.GetAtoms()) neutral_mass = mass + -charge * PROTON_MASS return neutral_mass