import os
import re
from typing import List, TextIO
from matchms.Spectrum import Spectrum
ANALYTE_ATTRIBUTES = {
'formula': 'MS:1000866|molecular formula',
'smiles': 'MS:1000868|SMILES formula',
'inchi': 'MS:1003403|InChI',
'inchikey': 'MS:1002894|InChIKey'
}
SPECTRUM_ATTRIBUTES = {
'compound_name': 'MS:1003061|library spectrum name',
'precursor_mz': 'MS:1003208|experimental precursor monoisotopic m/z',
'scans': 'MS:1003057|scan number',
'charge': 'MS:1000041|charge state',
'ionmode': 'MS:1000465|scan polarity'
}
MAPPED_SPECTRUM_ATTRIBUTES = {
'MS:1000465|scan polarity': {
'positive': 'MS:1000130|positive scan',
'negative': 'MS:1000129|negative scan'
}
}
STANDARDIZED_SPECTRUM_ATTRIBUTES = {
'collision_energy': ['MS:1000045|collision energy', 'UO:0000266|electronvolt'],
}
[docs]
def save_as_mzspeclib(spectra: List[Spectrum], filename: str) -> None:
"""
Save a list of spectra to a file in mzSpecLib format.
Parameters:
spectra (List[Spectrum]): List of Spectrum objects to save.
filename (str): The name of the file to save the spectra to.
"""
with open(filename, 'w', encoding='UTF-8') as file:
_write_header(filename, file)
for idx, spectrum in enumerate(spectra):
_write_spectrum(file, idx, spectrum)
def _write_spectrum(file: TextIO, idx: int, spectrum: Spectrum) -> None:
"""
Write a single spectrum to the file.
Parameters:
file (TextIO): The file object to write to.
idx (int): The index of the spectrum in the list.
spectrum (Spectrum): The Spectrum object to write.
"""
print(f'<Spectrum={idx + 1}>', file=file)
_write_spectrum_attributes(file, spectrum)
if _has_analyte(spectrum):
_write_analyte(file, spectrum)
_write_peaks(file, spectrum)
print('', file=file)
def _write_analyte(file: TextIO, spectrum: Spectrum) -> None:
"""
Write analyte information for a spectrum to the file.
Parameters:
file (TextIO): The file object to write to.
spectrum (Spectrum): The Spectrum object containing analyte information.
"""
print('<Analyte=1>', file=file)
for key, attribute in ANALYTE_ATTRIBUTES.items():
value = spectrum.get(key)
if value is not None:
print(f'{attribute}={value}', file=file)
def _write_spectrum_attributes(file: TextIO, spectrum: Spectrum) -> None:
"""
Write spectrum attributes to the file.
Parameters:
file (TextIO): The file object to write to.
spectrum (Spectrum): The Spectrum object containing attributes.
"""
_write_defined_spectrum_attributes(file, spectrum)
spectrum_attributes = spectrum.metadata.keys()
attr_counter = 1
for attr in spectrum_attributes:
if attr in STANDARDIZED_SPECTRUM_ATTRIBUTES:
_write_spectrum_attribute_with_unit(file, spectrum, attr_counter, attr)
attr_counter += 1
elif attr not in SPECTRUM_ATTRIBUTES and attr not in ANALYTE_ATTRIBUTES:
_write_other_spectrum_attribute(file, spectrum, attr_counter, attr)
attr_counter += 1
print(f'MS:1003059|number of peaks={len(spectrum.peaks)}', file=file)
def _write_other_spectrum_attribute(file: TextIO, spectrum: Spectrum, attr_counter: int, attr: str) -> None:
"""
Write other spectrum attributes to the file.
Parameters:
file (TextIO): The file object to write to.
spectrum (Spectrum): The Spectrum object containing attributes.
attr_counter (int): The counter for the attribute.
attr (str): The attribute name.
"""
value = spectrum.get(attr)
print(f'[{attr_counter}]MS:1003275|other attribute name={attr}', file=file)
print(f'[{attr_counter}]MS:1003276|other attribute value={value}', file=file)
def _write_spectrum_attribute_with_unit(file: TextIO, spectrum: Spectrum, attr_counter: int, attr: str) -> None:
"""
Write spectrum attributes with units to the file.
Parameters:
file (TextIO): The file object to write to.
spectrum (Spectrum): The Spectrum object containing attributes.
attr_counter (int): The counter for the attribute.
attr (str): The attribute name.
"""
term, unit = STANDARDIZED_SPECTRUM_ATTRIBUTES.get(attr)
# remove non-numeric unit identifiers from value
value = _extract_numeric_value(spectrum.get(attr))
print(f'[{attr_counter}]{term}={value}', file=file)
print(f'[{attr_counter}]UO:0000000|unit={unit}', file=file)
def _extract_numeric_value(value: str) -> str:
"""
Extract numeric value from a string.
Parameters:
value (str): The string containing numeric value.
Returns:
str: The extracted numeric value.
"""
value = re.findall('[\\d]+[.,\\d]+|[\\d]*[.][\\d]+|[\\d]+', value)[0]
return value
def _write_defined_spectrum_attributes(file: TextIO, spectrum: Spectrum) -> None:
"""
Write defined spectrum attributes to the file.
Parameters:
file (TextIO): The file object to write to.
spectrum (Spectrum): The Spectrum object containing attributes.
"""
for key, attribute in SPECTRUM_ATTRIBUTES.items():
value = spectrum.get(key)
if attribute in MAPPED_SPECTRUM_ATTRIBUTES:
value = MAPPED_SPECTRUM_ATTRIBUTES[attribute].get(value)
if value is not None:
print(f'{attribute}={value}', file=file)
def _write_peaks(file: TextIO, spectrum: Spectrum) -> None:
"""
Write peaks information for a spectrum to the file.
Parameters:
file (TextIO): The file object to write to.
spectrum (Spectrum): The Spectrum object containing peaks information.
"""
print('<Peaks>', file=file)
peak_comments = spectrum.get('peak_comments', {})
for i in range(len(spectrum.peaks)):
mz = spectrum.peaks.mz[i]
intensities = f'{spectrum.peaks.intensities[i]:.2f}'.rstrip('0').rstrip('.')
comment = peak_comments.get(mz, '?')
print(f'{mz}\t{intensities}\t{comment}', file=file)
def _has_analyte(spectrum: Spectrum) -> bool:
"""
Check if a spectrum has analyte information.
Parameters:
spectrum (Spectrum): The Spectrum object to check.
Returns:
bool: True if the spectrum has analyte information, False otherwise.
"""
return any(spectrum.get(key) for key in ANALYTE_ATTRIBUTES)
def _write_header(filename: str, file: TextIO) -> None:
"""
Write the header information to the file.
Parameters:
filename (str): The name of the file.
file (TextIO): The file object to write to.
"""
basename, _ = os.path.splitext(filename)
name = basename.split(os.path.sep)[-1]
print('<mzSpecLib>', file=file)
print('MS:1003186|library format version=1.0', file=file)
print(f'MS:1003188|library name={name}', file=file)
print('<AttributeSet Spectrum=all>', file=file)
print('<AttributeSet Analyte=all>', file=file)
print('<AttributeSet Interpretation=all>', file=file)