Source code for matchms.filtering.metadata_processing.harmonize_undefined_smiles

from typing import List
from matchms.typing import SpectrumType


[docs]def harmonize_undefined_smiles(spectrum_in: SpectrumType, undefined: str = "", aliases: List[str] = None) -> SpectrumType: """Replace all aliases for empty/undefined smiles entries by ``undefined``. Parameters ---------- undefined: Give desired entry for undefined smiles fields. Default is "". aliases: Enter list of strings that are expected to represent undefined entries. Default is ["", "N/A", "NA", "n/a", "no data"]. """ if spectrum_in is None: return None spectrum = spectrum_in.clone() if aliases is None: aliases = [ "", "N/A", "NA", "n/a", "no data" ] smiles = spectrum.get("smiles") if smiles is None: # spectrum does not have a "smiles" key in its metadata spectrum.set("smiles", undefined) return spectrum if smiles in aliases: # harmonize aliases for undefined values spectrum.set("smiles", undefined) return spectrum