Source code for matchms.filtering.metadata_processing.add_retention

import logging
import re
from typing import Any, List, Optional
from matchms.typing import SpectrumType
from matchms.utils import filter_none, get_common_keys


logger = logging.getLogger("matchms")


_retention_time_keys = ["retention_time", "retentiontime", "rt", "scan_start_time",
                        "rt_query", "rtinseconds"]
_retention_index_keys = ["retention_index", "retentionindex", "ri"]


def _safe_store_value(metadata: dict, value: Any, target_key: str) -> dict:
    """Helper function to safely store a value in the target key without throwing an exception, but storing 'None' instead.

    Parameters
    ----------
    spectrum
        Spectrum to which to add 'value' in 'target_key'.
    value
        Value to parse into 'target_key'.
    target_key
        Name of the key in which to store the value.

    Returns
    -------
    Spectrum with added key.
    """
    if value is not None:   # one of accepted keys is present
        value = _safe_convert_to_float(value)
    metadata[target_key] = value
    return metadata


def _safe_convert_to_float(retention_time: Any) -> Optional[float]:
    """Safely convert value to float. Return 'None' on failure.

    Parameters
    ----------
    value
        Object to convert to float.

    Returns
    -------
    Converted float value or 'None' if conversion is not possible.
    """
    if isinstance(retention_time, list):
        if len(retention_time) == 1:
            retention_time = retention_time[0]
        else:
            return None

    # logic to read MoNA msp files which specify rt as string with "min" in it
    if isinstance(retention_time, str):
        retention_time = retention_time.strip()
        pattern = r'^([+-]?\d*\.?\d+)\s*(min|s|h|ms|sec)$'
        conversion = {"min": 60, "s": 1, "h": 3600, "ms": 1e-3, "sec": 1}
        match = re.search(pattern, retention_time)

        if match and len(match.groups()) == 2:
            value = match.group(1)
            unit = match.group(2)
            return float(value) * conversion[unit]
    try:
        retention_time = float(retention_time)
        rt = retention_time if retention_time >= 0 else None  # discard negative RT values
    except (ValueError, TypeError):
        logger.warning("%s can't be converted to float.", str(retention_time))
        rt = None
    return rt


def _add_retention(metadata: dict, target_key: str, accepted_keys: List[str]) -> dict:
    """Add value from one of accepted keys to target key.

    Parameters
    ----------
    spectrum
        Spectrum from which to read the values.
    target_key
        Key under which to store the value.
    accepted_keys
        List of accepted keys from which a value will be read (in order).

    Returns
    -------
    Spectrum with value from first accepted key stored under target_key.
    """
    common_keys = get_common_keys(metadata.keys(), accepted_keys)
    values_for_keys = filter_none([metadata[key] for key in common_keys])
    values = list(map(_safe_convert_to_float, values_for_keys))
    value = next(filter_none(values), None)

    metadata = _safe_store_value(metadata, value, target_key)
    return metadata


[docs]def add_retention_time(spectrum_in: SpectrumType) -> SpectrumType: """Add retention time information to the 'retention_time' key as float. Negative values and those not convertible to a float result in 'retention_time' being 'None'. Parameters ---------- spectrum Spectrum with retention time information. Returns ------- Spectrum with harmonized retention time information. """ if spectrum_in is None: return None spectrum = spectrum_in.clone() target_key = "retention_time" spectrum.metadata = _add_retention(spectrum.metadata, target_key, _retention_time_keys) return spectrum
[docs]def add_retention_index(spectrum_in: SpectrumType) -> SpectrumType: """Add retention index into 'retention_index' key if present. Parameters ---------- spectrum Spectrum with RI information. Returns ------- Spectrum with RI info stored under 'retention_index'. """ if spectrum_in is None: return None spectrum = spectrum_in.clone() target_key = "retention_index" spectrum.metadata = _add_retention(spectrum.metadata, target_key, _retention_index_keys) return spectrum