Source code for matchms.exporting.metadata_export
import csv
import json
from typing import Any, Dict, List, Optional, Set, Tuple
import numpy as np
from ..Spectrum import Spectrum
def _get_metadata_dict(spectrum: Spectrum, include_fields: Optional[List[str]] = None) -> Dict[str, Any]:
"""Extract keys from spectrum metadata. Will silently continue if a key is not found.
Args:
spectrum (Spectrum): Spectrum with metadata to extract.
include_fields (List[str] | str): "all" or set of metadata keys to extract.
Returns:
dict[str, Any]: Dictionary containing the specified keys.
"""
if include_fields is None or include_fields[0] == "all":
return spectrum.metadata
if not isinstance(include_fields, list):
print("'Include_fields' must be 'all' or list of keys.")
return None
return {key: spectrum.metadata[key] for key in spectrum.metadata.keys()
& include_fields}
[docs]def export_metadata_as_json(spectrums: List[Spectrum], filename: str,
include_fields: Optional[List[str]] = None):
"""Export metadata to json file.
Parameters
----------
spectrums:
Expected input is a list of :py:class:`~matchms.Spectrum.Spectrum` objects.
filename:
Provide filename to save metadata of spectrum(s) as json file.
identifier:
Identifier used for naming each spectrum in the output file.
"""
metadata_dicts = []
for spec in spectrums:
metadata_dict = _get_metadata_dict(spec, include_fields)
if metadata_dict:
metadata_dicts.append(metadata_dict)
with open(filename, 'w', encoding="utf-8") as fout:
json.dump(metadata_dicts, fout)
[docs]def export_metadata_as_csv(spectra: List[Spectrum], filename: str,
include_fields: Optional[List[str]] = None):
"""Export metadata to csv file.
Parameters
----------
spectra:
Expected input is a list of :py:class:`~matchms.Spectrum.Spectrum` objects.
filename:
Provide filename to save metadata of spectrum(s) as csv file.
identifier:
Identifier used for naming each spectrum in the output file.
"""
metadata, columns = get_metadata_as_array(spectra)
if include_fields is not None:
metadata, columns = _subset_metadata(include_fields, metadata, columns)
with open(filename, 'a', encoding="utf-8") as csvfile: #TODO: assert if file exists
writer = csv.writer(csvfile)
writer.writerow(columns)
for data in metadata:
writer.writerow(data)
def _subset_metadata(include_fields: List[str], metadata: np.array, columns: Set[str]) -> Tuple[np.array, Set[str]]:
"""Subset metadata to 'include_fields' and return intersection of columns.
Parameters
----------
include_fields:
Columns to include.
metadata:
Data to subset.
columns:
Set of columns present in data
Returns:
Tuple[np.array, set[str]]: Subset data and columns.
"""
return metadata[include_fields], columns.intersection(include_fields)
[docs]def get_metadata_as_array(spectra: List[Spectrum]) -> Tuple[np.array, List[str]]:
"""Extract union of all metadata as numpy array from all spectra.
Parameters
----------
spectra:
Spectra from which to collect metadata.
Returns:
Tuple[np.array, List[str]]: Metadata and union of all columns detected in all spectra.
"""
keys = spectra[0].metadata.keys()
for s in spectra:
keys |= s.metadata.keys()
values = []
for s in spectra:
value = tuple((s.get(k) for k in keys))
values.append(value)
values_array = np.array(values, dtype=[(k, np.chararray) for k in keys])
return values_array, keys