Source code for matchms.filtering.filter_utils.smile_inchi_inchikey_conversions

import re
from typing import Optional


try:  # rdkit is not included in pip package
    from rdkit import Chem
except ImportError:
    _has_rdkit = False
    from collections import UserString

    class ChemMock(UserString):
        def __call__(self, *args, **kwargs):
            return self

        def __getattr__(self, key):
            return self

    Chem = ChemMock("")
else:
    _has_rdkit = True
rdkit_missing_message = "Conda package 'rdkit' is required for this functionality."


[docs]def convert_smiles_to_inchi(smiles: str) -> Optional[str]: """Convert smiles to inchi using rdkit.""" return mol_converter(smiles, "smiles", "inchi")
[docs]def convert_inchi_to_smiles(inchi: str) -> Optional[str]: """Convert inchi to smiles using rdkit.""" return mol_converter(inchi, "inchi", "smiles")
[docs]def convert_inchi_to_inchikey(inchi: str) -> Optional[str]: """Convert inchi to inchikey using rdkit.""" return mol_converter(inchi, "inchi", "inchikey")
[docs]def mol_converter(mol_input: str, input_type: str, output_type: str) -> Optional[str]: """Convert molecular representations using rdkit. Convert from "smiles" or "inchi" to "inchi", "smiles", or "inchikey". Requires conda package *rdkit* to be installed. Parameters ---------- mol_input Input data in "inchi" or "smiles" molecular representation. input_type Define input type: "smiles" for smiles and "inchi" for inchi. output_type Define output type: "smiles", "inchi", or "inchikey". Returns: -------- Mol string in output type or None when conversion failure occurs. """ if not _has_rdkit: raise ImportError(rdkit_missing_message) input_function = {"inchi": Chem.MolFromInchi, "smiles": Chem.MolFromSmiles} output_function = {"inchi": Chem.MolToInchi, "smiles": Chem.MolToSmiles, "inchikey": Chem.MolToInchiKey} mol = input_function[input_type](mol_input.strip('"')) if mol is None: return None output = output_function[output_type](mol) if output: return output return None
[docs]def is_valid_inchi(inchi: str) -> bool: """Return True if input string is valid InChI. This functions test if string can be read by rdkit as InChI. Requires conda package *rdkit* to be installed. Parameters ---------- inchi Input string to test if it has format of InChI. """ if not _has_rdkit: raise ImportError(rdkit_missing_message) # First quick test to avoid excess in-depth testing if inchi is None: return False inchi = inchi.strip('"') regexp = r"(InChI=1|1)(S\/|\/)[0-9a-zA-Z\.]{2,}" if not re.search(regexp, inchi): return False # Proper chemical test mol = Chem.MolFromInchi(inchi) if mol: return True return False
[docs]def is_valid_smiles(smiles: str) -> bool: """Return True if input string is valid smiles. This functions test if string can be read by rdkit as smiles. Requires conda package *rdkit* to be installed. Parameters ---------- smiles Input string to test if it can be imported as smiles. """ if not _has_rdkit: raise ImportError(rdkit_missing_message) if smiles is None: return False regexp = r"^([^J][0-9ABCOHNMSPIFKiergalcons@+\-\[\]\(\)\\\/%=#$,.~&!]*)$" if not re.match(regexp, smiles): return False mol = Chem.MolFromSmiles(smiles) if mol: return True return False
[docs]def is_valid_inchikey(inchikey: str) -> bool: """Return True if string has format of inchikey. Parameters ---------- inchikey Input string to test if it format of an inchikey. """ if inchikey is None: return False regexp = r"[A-Z]{14}-[A-Z]{10}-[A-Z]" if re.fullmatch(regexp, inchikey): return True return False