Source code for indra_network_search.autocomplete.autocomplete

"""
An API wrapping SortedStringTrie from pytrie (see
https://github.com/gsakkis/pytrie)
"""
from itertools import islice
from typing import List, Optional, Tuple, Union

from networkx import DiGraph, MultiDiGraph
from pytrie import SortedStringTrie
from tqdm import tqdm

# Derived types
Prefixes = List[Tuple[str, str, str]]
DirGraph = Union[DiGraph, MultiDiGraph]

__all__ = ["NodesTrie", "Prefixes"]


[docs]class NodesTrie(SortedStringTrie): """A Trie structure that has case insensitive search methods"""
[docs] @classmethod def from_node_names(cls, graph: DirGraph) -> "NodesTrie": """Produce a NodesTrie instance from a graph with node names as keys Parameters ---------- graph: Graph from which nodes should be searchable. It is assumed the nodes are all keyed by strings Returns ------- : An instance of a NodesTrie containing the node names of the graph as keys and the corresponding (name, ns, id, node degree) tuple as values """ _is_str_nodes(graph) name_indexing = {} for node in tqdm(graph.nodes, desc="Building node name index"): # Get node name in lowercase node_name = node.lower() if node_name in name_indexing: ix = 1 node_name += f"_{ix}" # Increase index until no key is not present while node_name in name_indexing: ix += 1 node_name = node.lower() + f"_{ix}" name_indexing[node_name] = ( node, graph.nodes[node]["ns"], graph.nodes[node]["id"], graph.degree(node), ) return cls(**name_indexing)
[docs] @classmethod def from_node_ns_id(cls, graph: DirGraph) -> "NodesTrie": """Produce a NodesTrie instance from a graph using ns:id as key Parameters ---------- graph: Graph from which nodes should be searchable. It is assumed the nodes have the attributes 'ns' and 'id' accessible via g.nodes[node]['ns'] and g.nodes[node]['id'] Returns ------- : An instance of a NodesTrie containing ns:id of each node of the graph as keys and the corresponding (name, ns, id, node degree) tuple as values """ _is_str_nodes(graph) return cls( **{ f'{graph.nodes[n]["ns"]}:{graph.nodes[n]["id"]}'.lower(): ( n, graph.nodes[n]["ns"], graph.nodes[n]["id"], graph.degree(n), ) for n in tqdm(graph.nodes, desc="Building node grounding autocomplete index") } )
[docs] def case_keys(self, prefix: Optional[str] = None, top_n: Optional[int] = 100) -> List[str]: """Case insensitive wrapper around NodeTrie.keys() Parameters ---------- prefix : The prefix to search top_n : The top ranked entities (by node degree) Returns ------- : Return a list of this trie's keys """ res = [(name, node_degree) for _, (name, _, _, node_degree) in self.items(prefix.lower())] return [name for (name, _) in islice(sorted(res, key=lambda t: (t[1], t[0]), reverse=True), top_n)]
[docs] def case_items(self, prefix: Optional[str] = None, top_n: int = 100) -> Prefixes: """Case insensitive wrapper around NodeTrie.items() Parameters ---------- prefix : The prefix to search Returns ------- : Return a list of (name, namespace, id) tuples """ res = [tup for _, tup in self.items(prefix.lower())] return [ (name, namespace, identifier) for name, namespace, identifier, _ in islice(sorted(res, key=lambda t: (t[3], t[0]), reverse=True), top_n) ]
def _is_str_nodes(g: DirGraph): node = list(islice(g.nodes, 1))[0] if not isinstance(node, str): raise ValueError("Graph nodes are not str, cannot create NodesTrie instance")