Source code for matchms.networking.networking_functions

""" Helper functions to build and handle spectral networks
"""
from typing import Tuple
import numpy as np
from matchms import Scores


[docs]def get_top_hits(scores: Scores, identifier_key: str = "spectrum_id", top_n: int = 25, search_by: str = "queries", score_name: str = None, ignore_diagonal: bool = False) -> Tuple[dict, dict]: """Get top_n highest scores (and indices) for every entry. Parameters ---------- scores Matchms Scores object containing all similarities. identifier_key Metadata key for unique intentifier for each spectrum in scores. Will also be used for the naming the network nodes. Default is 'spectrum_id'. top_n Return the indexes and scores for the top_n highest scores. Scores between a spectrum with itself (diagonal of scores.scores) will not be taken into account. search_by Chose between 'queries' or 'references' which decides if the top_n matches for every spectrum in scores.queries or in scores.references will be collected and returned. score_name Name of the score that should be used (if scores contains multiple different scores). ignore_diagonal Set to True if scores.scores is symmetric (i.e. if references and queries were the same) and if scores between spectra with themselves should be excluded. """ # pylint: disable=protected-access, too-many-arguments assert search_by in ["queries", "references"], \ "search_by must be 'queries' or 'references" if score_name is None: score_name = scores._scores.guess_score_name() similars_idx = {} similars_scores = {} if search_by == "queries": for i, spec in enumerate(scores.queries): spec_id = spec.get(identifier_key) r, _, v = scores.scores[:, i, score_name] idx = np.argsort(v)[::-1] if ignore_diagonal: idx = idx[r[idx] != i] similars_idx[spec_id] = r[idx][:top_n] similars_scores[spec_id] = v[idx][:top_n] elif search_by == "references": for i, spec in enumerate(scores.references): spec_id = spec.get(identifier_key) _, c, v = scores.scores[i, :, score_name] idx = np.argsort(v)[::-1][:top_n] if ignore_diagonal: idx = idx[c[idx] != i] similars_idx[spec_id] = c[idx][:top_n] similars_scores[spec_id] = v[idx][:top_n] return similars_idx, similars_scores