Source code for matchms.importing.load_from_json
import ast
import json
import logging
from typing import List, Union
import numpy as np
from matchms.importing.parsing_utils import sort_by_mz
from matchms.Spectrum import Spectrum
logger = logging.getLogger("matchms")
[docs]def load_from_json(filename: str,
metadata_harmonization: bool = True) -> List[Spectrum]:
"""Load spectrum(s) from json file.
JSON document formatted like the `GNPS Spectra library <https://gnps-external.ucsd.edu/gnpslibrary>`_.
Spectrums with zero peaks will be skipped.
Example:
.. code-block:: python
from matchms.importing import load_from_json
file_json = "gnps_testdata.json"
spectrums = load_from_json(file_json)
Parameters
----------
filename
Provide filename for json file containing spectrum(s).
metadata_harmonization : bool, optional
Set to False if metadata harmonization to default keys is not desired.
The default is True.
"""
with open(filename, 'rb') as fin:
spectrums = []
for spectrum_dict in json.load(fin):
spectrum = as_spectrum(spectrum_dict, metadata_harmonization=metadata_harmonization)
if spectrum is not None:
spectrums.append(spectrum)
return spectrums
[docs]def as_spectrum(dct: dict,
metadata_harmonization: bool = True) -> Union[dict, Spectrum, None]:
"""A :py:func:`json.load` object_hook to convert dictionary shaped like
spectrum into :py:class:`~matchms.Spectrum.Spectrum` object.
Parameters
----------
dct
Dictionary shaped like spectrum
Returns
-------
A Spectrum or None when no peaks where found.
"""
# Recognize Spectrum by peaks_json key
if 'peaks_json' in dct:
return dict2spectrum(dct, metadata_harmonization=metadata_harmonization)
return None
[docs]def dict2spectrum(spectrum_dict: dict,
metadata_harmonization: bool) -> Union[Spectrum, None]:
"""Convert dictionary to a :py:class:`~matchms.Spectrum.Spectrum` object.
Parameters
----------
spectrum_dict
Dictionary shaped like a single JSON object from the
`GNPS Spectra library <https://gnps-external.ucsd.edu/gnpslibrary>`_
Returns
-------
A Spectrum or None when no peaks where found.
"""
not_metadata_fields = ["peaks_json"]
parse_fieldnames = {
"inchi_aux":"inchiaux",
"ion_mode":"ionmode"
}
def get_peaks_list(spectrum_dict, fieldname):
peaks_list = spectrum_dict.get(fieldname)
if isinstance(peaks_list, list):
return peaks_list
# Handle peaks list when stored as string
if isinstance(peaks_list, str):
return ast.literal_eval(peaks_list)
return []
def parse_fieldname(key):
"""Add options to read GNPS style json files."""
key_parsed = key.lower()
key_parsed = parse_fieldnames.get(key_parsed, key_parsed)
return key_parsed
metadata_dict = {parse_fieldname(key): spectrum_dict[key]
for key in spectrum_dict if key not in not_metadata_fields}
peaks_list = get_peaks_list(spectrum_dict, "peaks_json")
if len(peaks_list) > 0 and metadata_dict:
mz = np.array(peaks_list)[:, 0]
intensities = np.array(peaks_list)[:, 1]
mz, intensities = sort_by_mz(mz=mz, intensities=intensities)
return Spectrum(mz=mz,
intensities=intensities,
metadata=metadata_dict,
metadata_harmonization=metadata_harmonization)
logger.info("Empty spectrum found (no peaks in 'peaks_json'). Will not be imported.")
return None
[docs]def scores_json_decoder(dct):
"""
Object_hook function to convert JSON dictionary with :py:class:`~matchms.Score.Score` object into a python dictionary.
"""
if "__Scores__" not in dct and "__Similarity__" not in dct:
return dict2spectrum(dct, metadata_harmonization=False)
return dct