From 0314c7d81e64214cc2f06851dd41c35e89efeab9 Mon Sep 17 00:00:00 2001 From: Yunchae Kim Date: Tue, 10 Oct 2023 16:05:05 -0400 Subject: [PATCH] fix: Removed Py2neo-related code [#93] Refer to GitHub issue... This commit removed all code references to the Py2neo package, which were previously removed from the project's dependencies. The changes included: - Deleted Python code that imported and referenced Py2neo. - Commented out references to the `Graph` class in `graph/graph.py` as it was based on Py2neo. The decision to comment out instead of deleting was made for future reference purposes. - Autoformatted several Python files that hadn't been previously styled according to the project's style guide. - Removed the Py2neo package from `environment.yml`. The current commit successfully passed the Pytest after modification. --- comptox_ai/__init__.py | 22 +- comptox_ai/graph/__init__.py | 12 +- comptox_ai/graph/graph.py | 653 ++++++++++++++++++----------------- comptox_ai/graph/io.py | 375 ++++++++++---------- environment.yml | 2 +- 5 files changed, 536 insertions(+), 528 deletions(-) diff --git a/comptox_ai/__init__.py b/comptox_ai/__init__.py index ebc02802..f709aeb2 100644 --- a/comptox_ai/__init__.py +++ b/comptox_ai/__init__.py @@ -8,20 +8,20 @@ import os from pathlib import Path -#from .comptox_ai import ComptoxAI -#from . import graph -#from . import ontology -#from . import aop +# from .comptox_ai import ComptoxAI +# from . import graph +# from . import ontology +# from . import aop -#from . import cypher +# from . import cypher -### THIS BREAKS ON SOME BUILD SYSTEMS (e.g., TravisCI): +# THIS BREAKS ON SOME BUILD SYSTEMS (e.g., TravisCI): # package_src_dir = Path(__file__).parent.parent # version_file = open(os.path.join(package_src_dir, 'VERSION'), 'r') # str_version = version_file.read().strip() -##__version__ = str_version +# __version__ = str_version -### INSTEAD: +# INSTEAD: # PEP0440 compatible formatted version, see: # https://www.python.org/dev/peps/pep-0440/ # @@ -39,9 +39,9 @@ # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # __version__ = '0.1.dev0' -## ^^ Will be in dev on master branch until 0.1a is ready to go +# ^^ Will be in dev on master branch until 0.1a is ready to go -from .graph import Graph +# from .graph import Graph from .db import GraphDB -__all__ = ["Graph"] \ No newline at end of file +__all__ = ["Graph"] diff --git a/comptox_ai/graph/__init__.py b/comptox_ai/graph/__init__.py index bb53304e..57972293 100644 --- a/comptox_ai/graph/__init__.py +++ b/comptox_ai/graph/__init__.py @@ -1,9 +1,9 @@ -from .graph import Graph -from .subgraph import Subgraph -from .path import Path -from .vertex import Vertex from .edge import Edge -from .io import Neo4jData, NetworkXData, GraphSAGEData +# from .graph import Graph +from .io import GraphSAGEData, NetworkXData +from .path import Path +from .subgraph import Subgraph +from .vertex import Vertex -__all__ = ["Graph", "Neo4jData", "NetworkXData", "GraphSAGEData"] +__all__ = ["Graph", "NetworkXData", "GraphSAGEData"] diff --git a/comptox_ai/graph/graph.py b/comptox_ai/graph/graph.py index 64e8da20..f2c1aad2 100644 --- a/comptox_ai/graph/graph.py +++ b/comptox_ai/graph/graph.py @@ -10,25 +10,25 @@ >>> GS = G.convert(to='graphsage') """ +import json +import os +from abc import abstractmethod +from collections import defaultdict +from textwrap import dedent +from typing import Iterable, List, Union + +import networkx as nx import numpy as np import scipy.sparse -import networkx as nx from networkx.readwrite import json_graph -from collections import defaultdict -from py2neo import Graph - -from abc import abstractmethod -from typing import List, Iterable, Union -import os -import json -from textwrap import dedent from comptox_ai.cypher import queries +from comptox_ai.graph.metrics import ensure_nx_available, vertex_count from comptox_ai.utils import execute_cypher_transaction -from comptox_ai.graph.metrics import vertex_count, ensure_nx_available + from ..utils import load_config +from .io import GraphDataMixin, GraphSAGEData, NetworkXData -from .io import GraphDataMixin, Neo4jData, NetworkXData, GraphSAGEData def _load_neo4j_config(config_file: str = None): config_dict = load_config(config_file) @@ -47,7 +47,7 @@ def _load_neo4j_config(config_file: str = None): return (uri, username, password) -def _convert(data: GraphDataMixin, from_fmt: str, to_fmt: str, safe: bool=True): +def _convert(data: GraphDataMixin, from_fmt: str, to_fmt: str, safe: bool = True): # Initialize the new data structure if to_fmt == 'neo4j': # TODO: Only compatible with default config file for now @@ -68,316 +68,321 @@ def _convert(data: GraphDataMixin, from_fmt: str, to_fmt: str, safe: bool=True): new_data.add_edges(edges) return new_data - - -class Graph(object): - """ - A graph representation of ComptoxAI data. - - The internal data storage can be in several different formats, each of - which has advantages in different scenarios. - - Read more in the :ref:`User Guide `. - - Parameters - ---------- - data : comptox_ai.graph.io.GraphDataMixin - A graph data structure that is of one of the formats compliant with - ComptoxAI's standardized graph API. - - Attributes - ---------- - format : {"graphsage", "networkx", "neo4j"} - Internal format of the graph data. The format determines many aspects - of how you interact with the graph, including the set of methods that - can be called on it and the types of models that you can construct - without first converting to another format. - """ - - def __init__(self, data: GraphDataMixin): - self._data = data - - def __repr__(self): - return dedent( - """ - ComptoxAI Graph - --------------- - Format: {0} - Node count: {1} - Edge count: {2} - """ - ).format( - self.format, - len(self._data._nodes), - len(self._data._edges) - ) - - @property - def data(self): - return self._data - - @property - def format(self): - return self._data.format - - def nodes(self): - """ - Get all nodes in the graph and return as an iterable of tuples. - - Returns - ------- - iterable - Iterable over 2-tuples containing graph nodes. The first element is - the node's integer ID and the second is the URI of that node (if - available). - """ - return self._data.nodes - - def edges(self): - """ - Get all edges in the graph and return as an iterable of tuples. - - Returns - ------- - iterable - Iterable over tuples containing graph edge triples. - """ - return self._data.edges - - def add_nodes(self, nodes: Union[List[tuple], tuple]): - """ - Add one or more nodes to the graph. - """ - if isinstance(nodes, tuple): - self._data.add_node(nodes) - elif isinstance(nodes, list): - self._data.add_nodes(nodes) - else: - raise AttributeError("`nodes` must be a node tuple or list of node tuples - got {0}".format(type(nodes))) - - def add_edges(self, edges: Union[List[tuple], tuple]): - """ - Add one or more edges to the graph. - - Parameters - ---------- - edges : tuple or list of tuple - Edge or edges to add to the graph. - """ - if isinstance(edges, tuple): - self._data.add_edge(edges) - elif isinstance(edges, list): - self._data.add_edges(edges) - else: - raise AttributeError("`edges` must be a node tuple or list of node tuples - got {0}".format(type(edges))) - - def node_id_map(self): - return self._data._node_map - - def is_heterogeneous(self): - return self._data._is_heterogeneous - - def classes(self): - """ - Get a list of ontology classes present in the graph. - """ - return self._data.node_labels - - def convert(self, to_fmt: str): - """ - Convert the graph data structure into the specified format. - - The actual graph contained in a `comptox_ai.Graph` can be in a variety - of different formats. When the user loads a graph - """ - if to_fmt not in [ - 'neo4j', - 'networkx', - 'graphsage', - 'dgl' - ]: - raise AttributeError("Invalid format provided for graph conversion.") - - from_fmt = self._data.format - - if from_fmt == to_fmt: - return True - - new_graph = _convert(data = self._data, - from_fmt=from_fmt, - to_fmt=to_fmt) - - # Free memory held for old graph - #delattr(self, _data) - - self._data = new_graph - - @classmethod - def from_neo4j(cls, config_file: str = None, verbose: bool = False): - """Load a connection to a Neo4j graph database and use it to - instantiate a comptox_ai.graph.io.Neo4j object. - - NOTE: All we do here is create a driver for the graph database; the - Neo4j constructor handles building the node index and other important - attributes. This is different from most of the other formats, where - the attributes are provided by the constructor - - Parameters - ---------- - config_file : str, default None - Path to a ComptoxAI configuration file. If None, ComptoxAI will - search for a configuration file in the default location. For more - information, refer to http://comptox.ai/docs/guide/building.html. - - Raises - ------ - RuntimeError - If the data in the configuration file does not point to a valid - Neo4j graph database. - - See Also - -------- - comptox_ai.graph.Neo4jData - """ - if verbose: - print("Parsing Neo4j configuration...") - uri, username, password = _load_neo4j_config(config_file) - if verbose: - print(" URI:", uri) - - if verbose: - print("Creating database connection via py2neo...") - database = Graph(uri, auth=(username, password)) - if verbose: - print("Connected to database, now reading contents") - neo4j_data = Neo4jData(database = database) - - return cls(data = neo4j_data) - - @classmethod - def from_networkx(cls): - """ - Create a new ComptoxAI graph from a JSON node-link graph file, storing - the data as a NetworkX graph. - - See Also - -------- - comptox_ai.graph.NetworkXData - """ - - print("Reading NetworkX graph from file...") - with open("./test_json.json", 'r') as fp: - graph_text = json.load(fp) - - nx_g = nx.readwrite.json_graph.node_link_graph(graph_text) - - networkx_data = NetworkXData(graph = nx_g) - - return cls(data = networkx_data) - - @classmethod - def from_graphsage(cls, prefix: str, directory: str=None): - """ - Create a new GraphSAGE data structure from files formatted according to - the examples given in https://github.com/williamleif/GraphSAGE. - - - Parameters - ---------- - prefix : str - The prefix used at the beginning of each file name (see above for - format specification). - directory : str, default=None - The directory (fully specified or relative) containing the data - files to load. - - See Also - -------- - comptox_ai.graph.GraphSAGEData - - Notes - ----- - - The parameters should point to files with the following structure: - - {prefix}-G.json - JSON file containing a NetworkX 'node link' instance of the input - graph. GraphSAGE usually expects there to be 'val' and 'test' - attributes on each node indicating if they are part of the - validation and test sets, but this isn't enforced by ComptoxAI (at - least not currently). - - {prefix}-id_map.json - A JSON object that maps graph node ids (integers) to consecutive - integers (0-indexed). - - {prefix}-class_map.json (OPTIONAL) - A JSON object that maps graph node ids (integers) to a one-hot list - of binary class membership (e.g., {2: [0, 0, 1, 0, 1]} means that - node 2 is a member of classes 3 and 5). NOTE: While this is shown - as a mandatory component of a dataset in GraphSAGE's documentation, - we don't enforce that. NOTE: The notion of a class in terms of - GraphSAGE is different than the notion of a class in heterogeneous - network theory. Here, a 'class' is a label to be used in a - supervised learning setting (such as classifying chemicals as - likely carcinogens versus likely non-carcinogens). - - {prefix}-feats.npy (OPTIONAL) - A NumPy ndarray containing numerical node features. NOTE: This - serialization is currently not compatible with heterogeneous - graphs, as GraphSAGE was originally implemented for - nonheterogeneous graphs only. - - {prefix}-walks.txt (OPTIONAL) - A text file containing precomputed random walks along the graph. - Each line is a pair of node integers (e.g., the second fields in - the id_map file) indicating an edge included in random walks. The - lines should be arranged in ascending order, starting with the - first item in each pair. - """ - - nx_json_file = os.path.join(directory, "".join([prefix, '-G.json'])) - id_map_file = os.path.join(directory, "".join([prefix, '-id_map.json'])) - class_map_file = os.path.join(directory, "".join([prefix, '-class_map.json'])) - feats_map_file = os.path.join(directory, "".join([prefix, '-feats.npy'])) - walks_file = os.path.join(directory, "".join([prefix, '-walks.txt'])) - - G = json_graph.node_link_graph(json.load(open(nx_json_file, 'r'))) - id_map = json.load(open(id_map_file, 'r')) - - try: - class_map = json.load(open(class_map_file, 'r')) - except FileNotFoundError: - class_map = None - - try: - feats_map = np.load(feats_map_file) - except FileNotFoundError: - feats_map = None - - try: - walks = [] - with open(walks_file, 'r') as fp: - for l in fp: - walks.append(l) - except FileNotFoundError: - walks = None - - graph_data = GraphSAGEData(graph=G, node_map=id_map, - node_classes=class_map, - node_features=feats_map) - - return cls(data = graph_data) - - @classmethod - def from_dgl(cls): - """ - Create a ComptoxAI graph, populating the contents from a DGL graph (not - yet implemented). - - Raises - ------ - NotImplementedError - """ - raise NotImplementedError + +# class Graph(object): +# """ +# A graph representation of ComptoxAI data. + +# The internal data storage can be in several different formats, each of +# which has advantages in different scenarios. + +# Read more in the :ref:`User Guide `. + +# Parameters +# ---------- +# data : comptox_ai.graph.io.GraphDataMixin +# A graph data structure that is of one of the formats compliant with +# ComptoxAI's standardized graph API. + +# Attributes +# ---------- +# format : {"graphsage", "networkx", "neo4j"} +# Internal format of the graph data. The format determines many aspects +# of how you interact with the graph, including the set of methods that +# can be called on it and the types of models that you can construct +# without first converting to another format. +# """ + +# def __init__(self, data: GraphDataMixin): +# self._data = data + +# def __repr__(self): +# return dedent( +# """ +# ComptoxAI Graph +# --------------- +# Format: {0} +# Node count: {1} +# Edge count: {2} +# """ +# ).format( +# self.format, +# len(self._data._nodes), +# len(self._data._edges) +# ) + +# @property +# def data(self): +# return self._data + +# @property +# def format(self): +# return self._data.format + +# def nodes(self): +# """ +# Get all nodes in the graph and return as an iterable of tuples. + +# Returns +# ------- +# iterable +# Iterable over 2-tuples containing graph nodes. The first element is +# the node's integer ID and the second is the URI of that node (if +# available). +# """ +# return self._data.nodes + +# def edges(self): +# """ +# Get all edges in the graph and return as an iterable of tuples. + +# Returns +# ------- +# iterable +# Iterable over tuples containing graph edge triples. +# """ +# return self._data.edges + +# def add_nodes(self, nodes: Union[List[tuple], tuple]): +# """ +# Add one or more nodes to the graph. +# """ +# if isinstance(nodes, tuple): +# self._data.add_node(nodes) +# elif isinstance(nodes, list): +# self._data.add_nodes(nodes) +# else: +# raise AttributeError( +# "`nodes` must be a node tuple or list of node tuples - got {0}".format(type(nodes))) + +# def add_edges(self, edges: Union[List[tuple], tuple]): +# """ +# Add one or more edges to the graph. + +# Parameters +# ---------- +# edges : tuple or list of tuple +# Edge or edges to add to the graph. +# """ +# if isinstance(edges, tuple): +# self._data.add_edge(edges) +# elif isinstance(edges, list): +# self._data.add_edges(edges) +# else: +# raise AttributeError( +# "`edges` must be a node tuple or list of node tuples - got {0}".format(type(edges))) + +# def node_id_map(self): +# return self._data._node_map + +# def is_heterogeneous(self): +# return self._data._is_heterogeneous + +# def classes(self): +# """ +# Get a list of ontology classes present in the graph. +# """ +# return self._data.node_labels + +# def convert(self, to_fmt: str): +# """ +# Convert the graph data structure into the specified format. + +# The actual graph contained in a `comptox_ai.Graph` can be in a variety +# of different formats. When the user loads a graph +# """ +# if to_fmt not in [ +# 'neo4j', +# 'networkx', +# 'graphsage', +# 'dgl' +# ]: +# raise AttributeError( +# "Invalid format provided for graph conversion.") + +# from_fmt = self._data.format + +# if from_fmt == to_fmt: +# return True + +# new_graph = _convert(data=self._data, +# from_fmt=from_fmt, +# to_fmt=to_fmt) + +# # Free memory held for old graph +# # delattr(self, _data) + +# self._data = new_graph + +# @classmethod +# def from_neo4j(cls, config_file: str = None, verbose: bool = False): +# """Load a connection to a Neo4j graph database and use it to +# instantiate a comptox_ai.graph.io.Neo4j object. + +# NOTE: All we do here is create a driver for the graph database; the +# Neo4j constructor handles building the node index and other important +# attributes. This is different from most of the other formats, where +# the attributes are provided by the constructor + +# Parameters +# ---------- +# config_file : str, default None +# Path to a ComptoxAI configuration file. If None, ComptoxAI will +# search for a configuration file in the default location. For more +# information, refer to http://comptox.ai/docs/guide/building.html. + +# Raises +# ------ +# RuntimeError +# If the data in the configuration file does not point to a valid +# Neo4j graph database. + +# See Also +# -------- +# comptox_ai.graph.Neo4jData +# """ +# if verbose: +# print("Parsing Neo4j configuration...") +# uri, username, password = _load_neo4j_config(config_file) +# if verbose: +# print(" URI:", uri) + +# if verbose: +# print("Creating database connection via py2neo...") +# database = Graph(uri, auth=(username, password)) +# if verbose: +# print("Connected to database, now reading contents") +# neo4j_data = Neo4jData(database=database) + +# return cls(data=neo4j_data) + +# @classmethod +# def from_networkx(cls): +# """ +# Create a new ComptoxAI graph from a JSON node-link graph file, storing +# the data as a NetworkX graph. + +# See Also +# -------- +# comptox_ai.graph.NetworkXData +# """ + +# print("Reading NetworkX graph from file...") +# with open("./test_json.json", 'r') as fp: +# graph_text = json.load(fp) + +# nx_g = nx.readwrite.json_graph.node_link_graph(graph_text) + +# networkx_data = NetworkXData(graph=nx_g) + +# return cls(data=networkx_data) + +# @classmethod +# def from_graphsage(cls, prefix: str, directory: str = None): +# """ +# Create a new GraphSAGE data structure from files formatted according to +# the examples given in https://github.com/williamleif/GraphSAGE. + + +# Parameters +# ---------- +# prefix : str +# The prefix used at the beginning of each file name (see above for +# format specification). +# directory : str, default=None +# The directory (fully specified or relative) containing the data +# files to load. + +# See Also +# -------- +# comptox_ai.graph.GraphSAGEData + +# Notes +# ----- + +# The parameters should point to files with the following structure: + +# {prefix}-G.json +# JSON file containing a NetworkX 'node link' instance of the input +# graph. GraphSAGE usually expects there to be 'val' and 'test' +# attributes on each node indicating if they are part of the +# validation and test sets, but this isn't enforced by ComptoxAI (at +# least not currently). + +# {prefix}-id_map.json +# A JSON object that maps graph node ids (integers) to consecutive +# integers (0-indexed). + +# {prefix}-class_map.json (OPTIONAL) +# A JSON object that maps graph node ids (integers) to a one-hot list +# of binary class membership (e.g., {2: [0, 0, 1, 0, 1]} means that +# node 2 is a member of classes 3 and 5). NOTE: While this is shown +# as a mandatory component of a dataset in GraphSAGE's documentation, +# we don't enforce that. NOTE: The notion of a class in terms of +# GraphSAGE is different than the notion of a class in heterogeneous +# network theory. Here, a 'class' is a label to be used in a +# supervised learning setting (such as classifying chemicals as +# likely carcinogens versus likely non-carcinogens). + +# {prefix}-feats.npy (OPTIONAL) +# A NumPy ndarray containing numerical node features. NOTE: This +# serialization is currently not compatible with heterogeneous +# graphs, as GraphSAGE was originally implemented for +# nonheterogeneous graphs only. + +# {prefix}-walks.txt (OPTIONAL) +# A text file containing precomputed random walks along the graph. +# Each line is a pair of node integers (e.g., the second fields in +# the id_map file) indicating an edge included in random walks. The +# lines should be arranged in ascending order, starting with the +# first item in each pair. +# """ + +# nx_json_file = os.path.join(directory, "".join([prefix, '-G.json'])) +# id_map_file = os.path.join( +# directory, "".join([prefix, '-id_map.json'])) +# class_map_file = os.path.join( +# directory, "".join([prefix, '-class_map.json'])) +# feats_map_file = os.path.join( +# directory, "".join([prefix, '-feats.npy'])) +# walks_file = os.path.join(directory, "".join([prefix, '-walks.txt'])) + +# G = json_graph.node_link_graph(json.load(open(nx_json_file, 'r'))) +# id_map = json.load(open(id_map_file, 'r')) + +# try: +# class_map = json.load(open(class_map_file, 'r')) +# except FileNotFoundError: +# class_map = None + +# try: +# feats_map = np.load(feats_map_file) +# except FileNotFoundError: +# feats_map = None + +# try: +# walks = [] +# with open(walks_file, 'r') as fp: +# for l in fp: +# walks.append(l) +# except FileNotFoundError: +# walks = None + +# graph_data = GraphSAGEData(graph=G, node_map=id_map, +# node_classes=class_map, +# node_features=feats_map) + +# return cls(data=graph_data) + +# @classmethod +# def from_dgl(cls): +# """ +# Create a ComptoxAI graph, populating the contents from a DGL graph (not +# yet implemented). + +# Raises +# ------ +# NotImplementedError +# """ +# raise NotImplementedError diff --git a/comptox_ai/graph/io.py b/comptox_ai/graph/io.py index 6f300a1a..026175d2 100644 --- a/comptox_ai/graph/io.py +++ b/comptox_ai/graph/io.py @@ -21,19 +21,19 @@ # License: MIT License from abc import abstractmethod +from json import JSONEncoder, dump from typing import Iterable, List, Tuple, Union +import neo4j import networkx as nx import numpy as np import pandas as pd -import neo4j -from py2neo import Graph, Subgraph, Node, Relationship -from json import JSONEncoder, dump from networkx.readwrite.json_graph import node_link_data from ..cypher import queries from ..utils import load_config + def _execute_cypher_transaction(tx, query, **kwargs): if kwargs: verbose = kwargs['verbose'] @@ -47,11 +47,12 @@ def _execute_cypher_transaction(tx, query, **kwargs): records.append(record) return records + class GraphDataMixin(object): """ Abstract base class specifying a common interface for all graph data. """ - + @property @abstractmethod def nodes(self): @@ -70,7 +71,7 @@ def is_heterogeneous(self): @abstractmethod def add_node(self, node: tuple): pass - + @abstractmethod def add_edge(self, edge: tuple): pass @@ -127,12 +128,12 @@ class GraphSAGEData(GraphDataMixin): """ format = 'graphsage' - - def __init__(self, graph: nx.DiGraph, node_map: Iterable=None, - edge_map: Iterable=None, node_classes: List[str]=None, - edge_classes: List[str]=None, - node_features: Union[np.ndarray, pd.DataFrame]=None, - edge_features: Union[np.ndarray, pd.DataFrame]=None): + + def __init__(self, graph: nx.DiGraph, node_map: Iterable = None, + edge_map: Iterable = None, node_classes: List[str] = None, + edge_classes: List[str] = None, + node_features: Union[np.ndarray, pd.DataFrame] = None, + edge_features: Union[np.ndarray, pd.DataFrame] = None): self._graph = graph self._node_map = node_map @@ -140,7 +141,7 @@ def __init__(self, graph: nx.DiGraph, node_map: Iterable=None, self._node_classes = node_classes self._edge_classes = edge_classes - + self._node_features = node_features self._edge_features = edge_features @@ -164,7 +165,7 @@ def is_heterogeneous(self): def add_node(self, node: int, **kwargs): """ Add a node to GraphSAGE. - + A node is simply an ID corresponding to a node in the Neo4j graph. Node features aren't tied to the NetworkX digraph under GraphSAGE, instead, they are stored in _node_features. @@ -173,7 +174,7 @@ def add_node(self, node: int, **kwargs): ---------- node : int A Neo4j node id - kwargs : + kwargs : """ self._graph.add_node(node, **kwargs) @@ -207,181 +208,182 @@ def add_edge(self, edge: Tuple[int, str, int]): else: self._graph.add_edge(u, v) -class Neo4jData(GraphDataMixin): - """Internal representation of a connection to a Neo4j graph database - containing ComptoxAI data. - - Importantly, this data structure does not load the complete contents of the - database into Python's memory space. This places significantly less demand - on system resources when not executing large queries or performing complex - data manipulations. This representation is also able to unload a fair deal - of logic onto Neo4j's standard library in implementing various standardized - operations. - - The recommended way to instantiate this class is by calling - comptox_ai.Graph.from_neo4j(), which handles establishing a database driver - connection. - - Parameters - ---------- - driver : neo4j.Driver - A driver connected to a Neo4j graph database containing ComptoxAI data. - """ +# class Neo4jData(GraphDataMixin): +# """Internal representation of a connection to a Neo4j graph database +# containing ComptoxAI data. + +# Importantly, this data structure does not load the complete contents of the +# database into Python's memory space. This places significantly less demand +# on system resources when not executing large queries or performing complex +# data manipulations. This representation is also able to unload a fair deal +# of logic onto Neo4j's standard library in implementing various standardized +# operations. + +# The recommended way to instantiate this class is by calling +# comptox_ai.Graph.from_neo4j(), which handles establishing a database driver +# connection. + +# Parameters +# ---------- +# driver : neo4j.Driver +# A driver connected to a Neo4j graph database containing ComptoxAI data. +# """ + +# format = 'neo4j' + +# def __init__(self, database: Graph, verbose: bool = False): +# self._graph = database.default_graph + +# n_size = len(self._graph.nodes) +# e_size = len(self._graph.relationships) + +# if verbose: +# if (n_size > 100000) or (e_size > 400000): +# print("Warning: This is a very large graph! It may take a long time to load.") + +# if verbose: +# print(" Reading {0} nodes...".format(n_size)) +# self._nodes = list(self._graph.nodes.match("owl__NamedIndividual")) +# if verbose: +# print(" Reading {0} edges...".format(e_size)) +# self._edges = list(self._graph.relationships.match()) +# if verbose: +# print(" Building index of node IDs...") +# self._node_ids = [n.identity for n in self._nodes] +# if verbose: +# print() +# print("Done! The database connection is ready to use.") + +# @staticmethod +# def standardize_node(n: Node): +# return (( +# n.identity, +# list(n.labels - {'Resource', 'owl__NamedIndividual'})[0], +# dict(n) +# )) + +# @staticmethod +# def standardize_edge(e: Relationship): +# return (( +# e.start_node.identity, +# list(e.types())[0], +# e.end_node.identity, +# dict(e) +# )) + +# @property +# def nodes(self): +# """Get a list of all nodes corresponding to a named individual in the +# ComptoxAI ontology. + +# Returns +# ------- +# list of py2neo.Node +# List of all Neo4j nodes corresponding to a named individual. +# """ +# return [self.standardize_node(n) for n in self._nodes] + +# @property +# def edges(self): +# return [self.standardize_edge(e) for e in self._edges] + +# def node_labels(self): +# """ +# Get all node labels from ns0. + +# Returns +# ------- +# set of str +# Set of ontology labels (as strings) present in the graph schema. +# """ +# all_lbl_set = self._graph.schema.node_labels +# filter_lbls = [x for x in all_lbl_set if x[:5] == "ns0__"] +# return set(filter_lbls) + +# def add_node(self, node: tuple): +# """ +# Add a node to the graph and synchronize it to the remote database. + +# Parameters +# ---------- +# node : tuple of (int, label, **props) +# Node to add to the graph. +# """ +# n_id, n_label, n_props = node +# n = Node(n_id, n_props) +# n.update_labels([ +# 'owl__NamedIndividual', +# n_label, +# 'Resource' +# ]) + +# self._graph.create(n) + +# def add_nodes(self, nodes: List[tuple]): +# """ +# Add a list of nodes to the graph and synchronize them to the remote +# database. +# """ + +# ns = [] +# # Since we have to synchronize changes as a single chunk, it's not as +# # simple as calling add_node() for every element of `nodes`. +# for n in nodes: +# n_id, n_label, n_props = n +# nn = Node(n_id, n_props) +# nn.update_labels([ +# 'owl__NamedIndividual', +# n_label, +# 'Resource' +# ]) +# ns.append(nn) + +# self._graph.create(Subgraph(ns)) + +# def add_edge(self, edge: tuple): +# """ +# Add an edge to the graph and synchronize it to the remote database. +# """ +# u, rel_type, v, props = edge +# e = Relationship(u, rel_type, v, props) +# self._graph.create(e) + +# def add_edges(self, edges: List[tuple]): +# """ +# Add a list of edges to the graph and synchronize them to the remote +# database. +# """ +# es = [] +# # Since we have to synchronize changes as a single chunk, it's not as +# # simple as calling add_edge() for every element of `edges`. +# for e in edges: +# u, rel_type, v, props = e +# ee = Relationship(u, rel_type, v, props) +# es.append(ee) + +# self._graph.create(Subgraph(es)) + +# def run_query_in_session(self, query: str): +# """Submit a cypher query transaction to the connected graph database +# driver and return the response to the calling function. + +# Parameters +# ---------- +# query : str +# String representation of the cypher query to be executed. + +# Returns +# ------- +# list of neo4j.Record +# """ +# #raise NotImplementedError +# with self._driver.session() as session: +# query_response = session.read_transaction(_execute_cypher_transaction, query) +# return query_response - format = 'neo4j' - - def __init__(self, database: Graph, verbose: bool = False): - self._graph = database.default_graph - - n_size = len(self._graph.nodes) - e_size = len(self._graph.relationships) - - if verbose: - if (n_size > 100000) or (e_size > 400000): - print("Warning: This is a very large graph! It may take a long time to load.") - - if verbose: - print(" Reading {0} nodes...".format(n_size)) - self._nodes = list(self._graph.nodes.match("owl__NamedIndividual")) - if verbose: - print(" Reading {0} edges...".format(e_size)) - self._edges = list(self._graph.relationships.match()) - if verbose: - print(" Building index of node IDs...") - self._node_ids = [n.identity for n in self._nodes] - if verbose: - print() - print("Done! The database connection is ready to use.") - - @staticmethod - def standardize_node(n: Node): - return (( - n.identity, - list(n.labels - {'Resource', 'owl__NamedIndividual'})[0], - dict(n) - )) - - @staticmethod - def standardize_edge(e: Relationship): - return (( - e.start_node.identity, - list(e.types())[0], - e.end_node.identity, - dict(e) - )) - - @property - def nodes(self): - """Get a list of all nodes corresponding to a named individual in the - ComptoxAI ontology. - - Returns - ------- - list of py2neo.Node - List of all Neo4j nodes corresponding to a named individual. - """ - return [self.standardize_node(n) for n in self._nodes] - - @property - def edges(self): - return [self.standardize_edge(e) for e in self._edges] - - def node_labels(self): - """ - Get all node labels from ns0. - - Returns - ------- - set of str - Set of ontology labels (as strings) present in the graph schema. - """ - all_lbl_set = self._graph.schema.node_labels - filter_lbls = [x for x in all_lbl_set if x[:5] == "ns0__"] - return set(filter_lbls) - - def add_node(self, node: tuple): - """ - Add a node to the graph and synchronize it to the remote database. - - Parameters - ---------- - node : tuple of (int, label, **props) - Node to add to the graph. - """ - n_id, n_label, n_props = node - n = Node(n_id, n_props) - n.update_labels([ - 'owl__NamedIndividual', - n_label, - 'Resource' - ]) - - self._graph.create(n) - - def add_nodes(self, nodes: List[tuple]): - """ - Add a list of nodes to the graph and synchronize them to the remote - database. - """ - - ns = [] - # Since we have to synchronize changes as a single chunk, it's not as - # simple as calling add_node() for every element of `nodes`. - for n in nodes: - n_id, n_label, n_props = n - nn = Node(n_id, n_props) - nn.update_labels([ - 'owl__NamedIndividual', - n_label, - 'Resource' - ]) - ns.append(nn) - - self._graph.create(Subgraph(ns)) - - def add_edge(self, edge: tuple): - """ - Add an edge to the graph and synchronize it to the remote database. - """ - u, rel_type, v, props = edge - e = Relationship(u, rel_type, v, props) - self._graph.create(e) - - def add_edges(self, edges: List[tuple]): - """ - Add a list of edges to the graph and synchronize them to the remote - database. - """ - es = [] - # Since we have to synchronize changes as a single chunk, it's not as - # simple as calling add_edge() for every element of `edges`. - for e in edges: - u, rel_type, v, props = e - ee = Relationship(u, rel_type, v, props) - es.append(ee) - - self._graph.create(Subgraph(es)) - - def run_query_in_session(self, query: str): - """Submit a cypher query transaction to the connected graph database - driver and return the response to the calling function. - - Parameters - ---------- - query : str - String representation of the cypher query to be executed. - - Returns - ------- - list of neo4j.Record - """ - #raise NotImplementedError - with self._driver.session() as session: - query_response = session.read_transaction(_execute_cypher_transaction, query) - return query_response class NetworkXData(GraphDataMixin): - def __init__(self, graph: nx.DiGraph=None): + def __init__(self, graph: nx.DiGraph = None): if graph is not None: self._graph = graph else: @@ -393,6 +395,7 @@ class NetworkxJsonEncoder(JSONEncoder): """ When encoding JSON, sets are converted to lists. """ + def default(self, o): try: iterable = iter(o) diff --git a/environment.yml b/environment.yml index 12438bb7..c0f03d5f 100644 --- a/environment.yml +++ b/environment.yml @@ -1,3 +1,4 @@ +--- # This Conda environment solves the dependencies needed for GPU support # in ComptoxAI. If you can't (or don't want to) use CUDA 10.2, installing # ComptoxAI using setuptools will be good enough to get you up and running. @@ -22,7 +23,6 @@ dependencies: - openpyxl=3.0.8 - owlready2=0.34 - pandas=1.3.3 - - py2neo=2021.1.5 - pytest=6.2.5 - python=3.8 - pytorch::pytorch