Source code for indra_network_search.autocomplete.autocomplete

An API wrapping SortedStringTrie from pytrie (see
from itertools import islice
from typing import List, Optional, Tuple, Union

from networkx import DiGraph, MultiDiGraph
from pytrie import SortedStringTrie
from tqdm import tqdm

# Derived types
Prefixes = List[Tuple[str, str, str]]
DirGraph = Union[DiGraph, MultiDiGraph]

__all__ = ["NodesTrie", "Prefixes"]

[docs]class NodesTrie(SortedStringTrie): """A Trie structure that has case insensitive search methods"""
[docs] @classmethod def from_node_names(cls, graph: DirGraph) -> "NodesTrie": """Produce a NodesTrie instance from a graph with node names as keys Parameters ---------- graph: Graph from which nodes should be searchable. It is assumed the nodes are all keyed by strings Returns ------- : An instance of a NodesTrie containing the node names of the graph as keys and the corresponding (name, ns, id, node degree) tuple as values """ _is_str_nodes(graph) name_indexing = {} for node in tqdm(graph.nodes, desc="Building node name index"): # Get node name in lowercase node_name = node.lower() if node_name in name_indexing: ix = 1 node_name += f"_{ix}" # Increase index until no key is not present while node_name in name_indexing: ix += 1 node_name = node.lower() + f"_{ix}" name_indexing[node_name] = ( node, graph.nodes[node]["ns"], graph.nodes[node]["id"],, ) return cls(**name_indexing)
[docs] @classmethod def from_node_ns_id(cls, graph: DirGraph) -> "NodesTrie": """Produce a NodesTrie instance from a graph using ns:id as key Parameters ---------- graph: Graph from which nodes should be searchable. It is assumed the nodes have the attributes 'ns' and 'id' accessible via g.nodes[node]['ns'] and g.nodes[node]['id'] Returns ------- : An instance of a NodesTrie containing ns:id of each node of the graph as keys and the corresponding (name, ns, id, node degree) tuple as values """ _is_str_nodes(graph) return cls( **{ f'{graph.nodes[n]["ns"]}:{graph.nodes[n]["id"]}'.lower(): ( n, graph.nodes[n]["ns"], graph.nodes[n]["id"],, ) for n in tqdm(graph.nodes, desc="Building node grounding autocomplete index") } )
[docs] def case_keys(self, prefix: Optional[str] = None, top_n: Optional[int] = 100) -> List[str]: """Case insensitive wrapper around NodeTrie.keys() Parameters ---------- prefix : The prefix to search top_n : The top ranked entities (by node degree) Returns ------- : Return a list of this trie's keys """ res = [(name, node_degree) for _, (name, _, _, node_degree) in self.items(prefix.lower())] return [name for (name, _) in islice(sorted(res, key=lambda t: (t[1], t[0]), reverse=True), top_n)]
[docs] def case_items(self, prefix: Optional[str] = None, top_n: int = 100) -> Prefixes: """Case insensitive wrapper around NodeTrie.items() Parameters ---------- prefix : The prefix to search Returns ------- : Return a list of (name, namespace, id) tuples """ res = [tup for _, tup in self.items(prefix.lower())] return [ (name, namespace, identifier) for name, namespace, identifier, _ in islice(sorted(res, key=lambda t: (t[3], t[0]), reverse=True), top_n) ]
def _is_str_nodes(g: DirGraph): node = list(islice(g.nodes, 1))[0] if not isinstance(node, str): raise ValueError("Graph nodes are not str, cannot create NodesTrie instance")