GraphStore definition + Data and HeteroData integration (#4816)

mananshah99 · web-flow · commit b274fbdeeeb4 · 2022-06-21T16:12:43.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ## [2.0.5] - 2022-MM-DD
 ### Added
+- Added `GraphStore` support to `Data` and `HeteroData` ([#4816](https://github.com/pyg-team/pytorch_geometric/pull/4816))
 - Added `FeatureStore` support to `Data` and `HeteroData` ([#4807](https://github.com/pyg-team/pytorch_geometric/pull/4807))
 - Added support for dense aggregations in `global_*_pool` ([#4827](https://github.com/pyg-team/pytorch_geometric/pull/4827))
 - Added Python version requirement ([#4825](https://github.com/pyg-team/pytorch_geometric/pull/4825))
diff --git a/test/data/test_data.py b/test/data/test_data.py
@@ -3,6 +3,7 @@
 import pytest
 import torch
 import torch.multiprocessing as mp
+import torch_sparse
 
 import torch_geometric
 from torch_geometric.data import Data
@@ -264,3 +265,34 @@ def test_basic_feature_store():
     assert 'x' in data.__dict__['_store']
     data.remove_tensor(attr_name='x', index=None)
     assert 'x' not in data.__dict__['_store']
+
+
+# Graph Store #################################################################
+
+
+def test_basic_graph_store():
+    data = Data()
+
+    edge_index = torch.LongTensor([[0, 1], [1, 2]])
+    adj = torch_sparse.SparseTensor(row=edge_index[0], col=edge_index[1])
+
+    def assert_equal_tensor_tuple(expected, actual):
+        assert len(expected) == len(actual)
+        for i in range(len(expected)):
+            assert torch.equal(expected[i], actual[i])
+
+    # We put all three tensor types: COO, CSR, and CSC, and we get them back
+    # to confirm that `GraphStore` works as intended.
+    coo = adj.coo()[:-1]
+    csr = adj.csr()[:-1]
+    csc = adj.csc()[:-1]
+
+    # Put:
+    data.put_edge_index(coo, layout='coo')
+    data.put_edge_index(csr, layout='csr')
+    data.put_edge_index(csc, layout='csc')
+
+    # Get:
+    assert_equal_tensor_tuple(coo, data.get_edge_index('coo'))
+    assert_equal_tensor_tuple(csr, data.get_edge_index('csr'))
+    assert_equal_tensor_tuple(csc, data.get_edge_index('csc'))
diff --git a/test/data/test_feature_store.py b/test/data/test_feature_store.py
@@ -66,7 +66,7 @@ def __init__(self, attr_name=_field_status.UNSET,
 class MyFeatureStoreNoGroupName(MyFeatureStore):
     def __init__(self):
         super().__init__()
-        self._attr_cls = MyTensorAttrNoGroupName
+        self._tensor_attr_cls = MyTensorAttrNoGroupName
 
     @staticmethod
     def key(attr: TensorAttr) -> str:
diff --git a/test/data/test_graph_store.py b/test/data/test_graph_store.py
@@ -0,0 +1,55 @@
+from typing import Optional
+
+import torch
+from torch_sparse import SparseTensor
+
+from torch_geometric.data.graph_store import (
+    EdgeAttr,
+    EdgeLayout,
+    EdgeTensorType,
+    GraphStore,
+)
+
+
+class MyGraphStore(GraphStore):
+    def __init__(self):
+        super().__init__()
+        self.store = {}
+
+    @staticmethod
+    def key(attr: EdgeAttr) -> str:
+        return f"{attr.edge_type or '<default>'}_{attr.layout}"
+
+    def _put_edge_index(self, edge_index: EdgeTensorType,
+                        edge_attr: EdgeAttr) -> bool:
+        self.store[MyGraphStore.key(edge_attr)] = edge_index
+
+    def _get_edge_index(self, edge_attr: EdgeAttr) -> Optional[EdgeTensorType]:
+        return self.store.get(MyGraphStore.key(edge_attr), None)
+
+
+def test_graph_store():
+    graph_store = MyGraphStore()
+    edge_index = torch.LongTensor([[0, 1], [1, 2]])
+    adj = SparseTensor(row=edge_index[0], col=edge_index[1])
+
+    def assert_equal_tensor_tuple(expected, actual):
+        assert len(expected) == len(actual)
+        for i in range(len(expected)):
+            assert torch.equal(expected[i], actual[i])
+
+    # We put all three tensor types: COO, CSR, and CSC, and we get them back
+    # to confirm that `GraphStore` works as intended.
+    coo = adj.coo()[:-1]
+    csr = adj.csr()[:-1]
+    csc = adj.csc()[:-1]
+
+    # Put:
+    graph_store['edge', EdgeLayout.COO] = coo
+    graph_store['edge', 'csr'] = csr
+    graph_store['edge', 'csc'] = csc
+
+    # Get:
+    assert_equal_tensor_tuple(coo, graph_store['edge', 'coo'])
+    assert_equal_tensor_tuple(csr, graph_store['edge', 'csr'])
+    assert_equal_tensor_tuple(csc, graph_store['edge', 'csc'])
diff --git a/test/data/test_hetero_data.py b/test/data/test_hetero_data.py
@@ -2,6 +2,7 @@
 
 import pytest
 import torch
+import torch_sparse
 
 from torch_geometric.data import HeteroData
 from torch_geometric.data.storage import EdgeStorage
@@ -427,3 +428,37 @@ def test_basic_feature_store():
     assert 'x' in data['paper'].__dict__['_mapping']
     data.remove_tensor(group_name='paper', attr_name='x', index=None)
     assert 'x' not in data['paper'].__dict__['_mapping']
+
+
+# Graph Store #################################################################
+
+
+def test_basic_graph_store():
+    data = HeteroData()
+
+    edge_index = torch.LongTensor([[0, 1], [1, 2]])
+    adj = torch_sparse.SparseTensor(row=edge_index[0], col=edge_index[1])
+
+    def assert_equal_tensor_tuple(expected, actual):
+        assert len(expected) == len(actual)
+        for i in range(len(expected)):
+            assert torch.equal(expected[i], actual[i])
+
+    # We put all three tensor types: COO, CSR, and CSC, and we get them back
+    # to confirm that `GraphStore` works as intended.
+    coo = adj.coo()[:-1]
+    csr = adj.csr()[:-1]
+    csc = adj.csc()[:-1]
+
+    # Put:
+    data.put_edge_index(coo, layout='coo', edge_type='1')
+    data.put_edge_index(csr, layout='csr', edge_type='2')
+    data.put_edge_index(csc, layout='csc', edge_type='3')
+
+    # Get:
+    assert_equal_tensor_tuple(coo,
+                              data.get_edge_index(layout='coo', edge_type='1'))
+    assert_equal_tensor_tuple(csr,
+                              data.get_edge_index(layout='csr', edge_type='2'))
+    assert_equal_tensor_tuple(csc,
+                              data.get_edge_index(layout='csc', edge_type='3'))
diff --git a/torch_geometric/data/data.py b/torch_geometric/data/data.py
@@ -24,14 +24,22 @@
     TensorAttr,
     _field_status,
 )
+from torch_geometric.data.graph_store import EdgeAttr, EdgeLayout, GraphStore
 from torch_geometric.data.storage import (
     BaseStorage,
     EdgeStorage,
     GlobalStorage,
     NodeStorage,
 )
 from torch_geometric.deprecation import deprecated
-from torch_geometric.typing import EdgeType, NodeType, OptTensor
+from torch_geometric.typing import (
+    Adj,
+    EdgeTensorType,
+    EdgeType,
+    FeatureTensorType,
+    NodeType,
+    OptTensor,
+)
 from torch_geometric.utils import subgraph
 
 
@@ -316,7 +324,17 @@ def __init__(self, attr_name=_field_status.UNSET,
         super().__init__(None, attr_name, index)
 
 
-class Data(BaseData, FeatureStore):
+@dataclass
+class DataEdgeAttr(EdgeAttr):
+    r"""Edge attribute class for `Data`, which does not require a
+    `edge_type`."""
+    def __init__(self, layout: EdgeLayout, is_sorted: bool = False,
+                 edge_type: EdgeType = None):
+        # Treat group_name as optional, and move it to the end
+        super().__init__(edge_type, layout, is_sorted)
+
+
+class Data(BaseData, FeatureStore, GraphStore):
     r"""A data object describing a homogeneous graph.
     The data object can hold node-level, link-level and graph-level attributes.
     In general, :class:`~torch_geometric.data.Data` tries to mimic the
@@ -366,7 +384,11 @@ def __init__(self, x: OptTensor = None, edge_index: OptTensor = None,
                  pos: OptTensor = None, **kwargs):
         # `Data` doesn't support group_name, so we need to adjust `TensorAttr`
         # accordingly here to avoid requiring `group_name` to be set:
-        super().__init__(attr_cls=DataTensorAttr)
+        super().__init__(tensor_attr_cls=DataTensorAttr)
+
+        # `Data` doesn't support edge_type, so we need to adjust `EdgeAttr`
+        # accordingly here to avoid requiring `edge_type` to be set:
+        GraphStore.__init__(self, edge_attr_cls=DataEdgeAttr)
 
         self.__dict__['_store'] = GlobalStorage(_parent=self)
 
@@ -755,9 +777,79 @@ def _remove_tensor(self, attr: TensorAttr) -> bool:
     def __len__(self) -> int:
         return BaseData.__len__(self)
 
+    # GraphStore interface ####################################################
+
+    def _put_edge_index(self, edge_index: EdgeTensorType,
+                        edge_attr: EdgeAttr) -> bool:
+        # Convert the edge index to a recognizable format:
+        attr_name = EDGE_LAYOUT_TO_ATTR_NAME[edge_attr.layout]
+        attr_val = edge_tensor_type_to_adj_type(edge_attr, edge_index)
+        setattr(self, attr_name, attr_val)
+        return True
+
+    def _get_edge_index(self, edge_attr: EdgeAttr) -> Optional[EdgeTensorType]:
+        # Get the requested format and the Adj tensor associated with it:
+        attr_name = EDGE_LAYOUT_TO_ATTR_NAME[edge_attr.layout]
+        attr_val = getattr(self._store, attr_name, None)
+        if attr_val is not None:
+            # Convert from Adj type to Tuple[Tensor, Tensor]
+            attr_val = adj_type_to_edge_tensor_type(edge_attr.layout, attr_val)
+        return attr_val
+
 
 ###############################################################################
 
+EDGE_LAYOUT_TO_ATTR_NAME = {
+    EdgeLayout.COO: 'edge_index',
+    EdgeLayout.CSR: 'adj',
+    EdgeLayout.CSC: 'adj_t',
+}
+
+
+def edge_tensor_type_to_adj_type(
+    attr: EdgeAttr,
+    tensor_tuple: EdgeTensorType,
+) -> Adj:
+    r"""Converts an EdgeTensorType tensor tuple to a PyG Adj tensor."""
+    if attr.layout == EdgeLayout.COO:
+        # COO: (row, col)
+        if (tensor_tuple[0].storage().data_ptr() ==
+                tensor_tuple[1].storage().data_ptr()):
+            # Do not copy if the tensor tuple is constructed from the same
+            # storage (instead, return a view):
+            out = torch.empty(0, dtype=tensor_tuple[0].dtype)
+            out.set_(tensor_tuple[0].storage(), storage_offset=0,
+                     size=tensor_tuple[0].size() + tensor_tuple[1].size())
+            return out.view(2, -1)
+        return torch.stack(tensor_tuple)
+    elif attr.layout == EdgeLayout.CSR:
+        # CSR: (rowptr, col)
+        return SparseTensor(rowptr=tensor_tuple[0], col=tensor_tuple[1],
+                            is_sorted=True)
+    elif attr.layout == EdgeLayout.CSC:
+        # CSC: (row, colptr) this is a transposed adjacency matrix, so rowptr
+        # is the compressed column and col is the uncompressed row.
+        return SparseTensor(rowptr=tensor_tuple[1], col=tensor_tuple[0],
+                            is_sorted=True)
+    raise ValueError(f"Bad edge layout (got '{attr.layout}')")
+
+
+def adj_type_to_edge_tensor_type(layout: EdgeLayout,
+                                 edge_index: Adj) -> EdgeTensorType:
+    r"""Converts a PyG Adj tensor to an EdgeTensorType equivalent."""
+    if isinstance(edge_index, Tensor):
+        return (edge_index[0], edge_index[1])
+    if layout == EdgeLayout.COO:
+        row, col, _ = edge_index.coo()
+        return (row, col)
+    elif layout == EdgeLayout.CSR:
+        rowptr, col, _ = edge_index.csr()
+        return (rowptr, col)
+    else:
+        # CSC is just adj_t.csr():
+        colptr, row, _ = edge_index.csr()
+        return (row, colptr)
+
 
 def size_repr(key: Any, value: Any, indent: int = 0) -> str:
     pad = ' ' * indent
diff --git a/torch_geometric/data/feature_store.py b/torch_geometric/data/feature_store.py
@@ -239,13 +239,13 @@ def __repr__(self) -> str:
 
 
 class FeatureStore(MutableMapping):
-    def __init__(self, attr_cls: Any = TensorAttr):
+    def __init__(self, tensor_attr_cls: Any = TensorAttr):
         r"""Initializes the feature store. Implementor classes can customize
         the ordering and required nature of their :class:`TensorAttr` tensor
         attributes by subclassing :class:`TensorAttr` and passing the subclass
         as :obj:`attr_cls`."""
         super().__init__()
-        self.__dict__['_attr_cls'] = attr_cls
+        self.__dict__['_tensor_attr_cls'] = tensor_attr_cls
 
     # Core (CRUD) #############################################################
 
@@ -270,7 +270,7 @@ def put_tensor(self, tensor: FeatureTensorType, *args, **kwargs) -> bool:
         Returns:
             bool: Whether insertion was successful.
         """
-        attr = self._attr_cls.cast(*args, **kwargs)
+        attr = self._tensor_attr_cls.cast(*args, **kwargs)
         if not attr.is_fully_specified():
             raise ValueError(f"The input TensorAttr '{attr}' is not fully "
                              f"specified. Please fully specify the input by "
@@ -310,7 +310,7 @@ def to_type(tensor: FeatureTensorType) -> FeatureTensorType:
                 return tensor.numpy()
             return tensor
 
-        attr = self._attr_cls.cast(*args, **kwargs)
+        attr = self._tensor_attr_cls.cast(*args, **kwargs)
         if isinstance(attr.index, slice):
             if attr.index.start == attr.index.stop == attr.index.step is None:
                 attr.index = None
@@ -341,7 +341,7 @@ def remove_tensor(self, *args, **kwargs) -> bool:
         Returns:
             bool: Whether deletion was succesful.
         """
-        attr = self._attr_cls.cast(*args, **kwargs)
+        attr = self._tensor_attr_cls.cast(*args, **kwargs)
         if not attr.is_fully_specified():
             raise ValueError(f"The input TensorAttr '{attr}' is not fully "
                              f"specified. Please fully specify the input by "
@@ -366,7 +366,7 @@ def update_tensor(self, tensor: FeatureTensorType, *args,
         Returns:
             bool: Whether the update was succesful.
         """
-        attr = self._attr_cls.cast(*args, **kwargs)
+        attr = self._tensor_attr_cls.cast(*args, **kwargs)
         self.remove_tensor(attr)
         return self.put_tensor(tensor, attr)
 
@@ -375,7 +375,7 @@ def update_tensor(self, tensor: FeatureTensorType, *args,
     def view(self, *args, **kwargs) -> AttrView:
         r"""Returns an :class:`AttrView` of the feature store, with the defined
         attributes set."""
-        attr = self._attr_cls.cast(*args, **kwargs)
+        attr = self._tensor_attr_cls.cast(*args, **kwargs)
         return AttrView(self, attr)
 
     # Python built-ins ########################################################
@@ -384,7 +384,7 @@ def __setitem__(self, key: TensorAttr, value: FeatureTensorType):
         r"""Supports store[tensor_attr] = tensor."""
         # CastMixin will handle the case of key being a tuple or TensorAttr
         # object:
-        key = self._attr_cls.cast(key)
+        key = self._tensor_attr_cls.cast(key)
         # We need to fully specify the key for __setitem__ as it does not make
         # sense to work with a view here:
         key.fully_specify()
@@ -403,7 +403,7 @@ def __getitem__(self, key: TensorAttr) -> Any:
         """
         # CastMixin will handle the case of key being a tuple or TensorAttr
         # object:
-        attr = self._attr_cls.cast(key)
+        attr = self._tensor_attr_cls.cast(key)
         if attr.is_fully_specified():
             return self.get_tensor(attr)
         # If the view is not fully specified, return a :class:`AttrView`:
@@ -413,7 +413,7 @@ def __delitem__(self, key: TensorAttr):
         r"""Supports del store[tensor_attr]."""
         # CastMixin will handle the case of key being a tuple or TensorAttr
         # object:
-        key = self._attr_cls.cast(key)
+        key = self._tensor_attr_cls.cast(key)
         key.fully_specify()
         self.remove_tensor(key)
 
diff --git a/torch_geometric/data/graph_store.py b/torch_geometric/data/graph_store.py
diff --git a/torch_geometric/data/hetero_data.py b/torch_geometric/data/hetero_data.py
diff --git a/torch_geometric/typing.py b/torch_geometric/typing.py