From 36f0651e64d5c1d51b7502203d7e8a51a0707293 Mon Sep 17 00:00:00 2001 From: Manan Shah Date: Wed, 6 Jul 2022 00:42:36 +0000 Subject: [PATCH 1/4] init --- torch_geometric/data/graph_store.py | 15 +++++++++++---- torch_geometric/loader/neighbor_loader.py | 3 ++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/torch_geometric/data/graph_store.py b/torch_geometric/data/graph_store.py index 48e66bd17503..95105cfac787 100644 --- a/torch_geometric/data/graph_store.py +++ b/torch_geometric/data/graph_store.py @@ -132,8 +132,12 @@ def get_edge_index(self, *args, **kwargs) -> EdgeTensorType: # Layout Conversion ####################################################### # TODO support `replace` to replace the existing edge index. - def _to_layout(self, layout: EdgeLayout, - store: bool = False) -> ConversionOutputType: + def _to_layout( + self, + layout: EdgeLayout, + store: bool = False, + is_sorted: bool = False, + ) -> ConversionOutputType: # Obtain all edge attributes, grouped by type: edge_attrs = self.get_all_edge_attrs() edge_type_to_attrs: Dict[Any, List[EdgeAttr]] = defaultdict(list) @@ -200,6 +204,8 @@ def _to_layout(self, layout: EdgeLayout, row, col, perm = to_csc(adj, from_attr_copy, device='cpu') else: + # Respect is_sorted override (sorted by col): + from_attr.is_sorted = from_attr.is_sorted or is_sorted adj = edge_tensor_type_to_adj_type(from_attr, from_tuple) # Actually colptr, row, perm @@ -242,10 +248,11 @@ def csr(self, store: bool = False) -> ConversionOutputType: optionally storing the converted edge indices in the graph store.""" return self._to_layout(EdgeLayout.CSR, store) - def csc(self, store: bool = False) -> ConversionOutputType: + def csc(self, store: bool = False, + is_sorted: bool = False) -> ConversionOutputType: r"""Converts the edge indices in the graph store to CSC format, optionally storing the converted edge indices in the graph store.""" - return self._to_layout(EdgeLayout.CSC, store) + return self._to_layout(EdgeLayout.CSC, store, is_sorted) # Additional methods ###################################################### diff --git a/torch_geometric/loader/neighbor_loader.py b/torch_geometric/loader/neighbor_loader.py index ff3c0e7b9cfa..5ac23dc06e98 100644 --- a/torch_geometric/loader/neighbor_loader.py +++ b/torch_geometric/loader/neighbor_loader.py @@ -122,7 +122,8 @@ def __init__( self.input_type = input_type # Obtain CSC representations for in-memory sampling: - row_dict, colptr_dict, perm_dict = graph_store.csc() + row_dict, colptr_dict, perm_dict = graph_store.csc( + is_sorted=is_sorted) self.row_dict = { edge_type_to_str(k): v for k, v in row_dict.items() From cc9fe1f90656a1059ec816b834e8647667989eb2 Mon Sep 17 00:00:00 2001 From: Manan Shah Date: Wed, 6 Jul 2022 00:49:05 +0000 Subject: [PATCH 2/4] update --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 55764f4c8505..9be18ab6dd77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,12 +5,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## [2.0.5] - 2022-MM-DD ### Added -- Added `LinkeNeighborLoader` support to lightning datamodule ([#4868](https://github.com/pyg-team/pytorch_geometric/pull/4868)) +- Added `LinkNeighborLoader` support to lightning datamodule ([#4868](https://github.com/pyg-team/pytorch_geometric/pull/4868)) - Added `predict()` support to the `LightningNodeData` module ([#4884](https://github.com/pyg-team/pytorch_geometric/pull/4884)) - Added `time_attr` argument to `LinkNeighborLoader` ([#4877](https://github.com/pyg-team/pytorch_geometric/pull/4877)) - Added a `filter_per_worker` argument to data loaders to allow filtering of data within sub-processes ([#4873](https://github.com/pyg-team/pytorch_geometric/pull/4873)) - Added a `NeighborLoader` benchmark script ([#4815](https://github.com/pyg-team/pytorch_geometric/pull/4815)) -- Added support for `FeatureStore` and `GraphStore` in `NeighborLoader` ([#4817](https://github.com/pyg-team/pytorch_geometric/pull/4817), [#4851](https://github.com/pyg-team/pytorch_geometric/pull/4851), [#4854](https://github.com/pyg-team/pytorch_geometric/pull/4854), [#4856](https://github.com/pyg-team/pytorch_geometric/pull/4856), [#4857](https://github.com/pyg-team/pytorch_geometric/pull/4857), [#4882](https://github.com/pyg-team/pytorch_geometric/pull/4882), [#4883](https://github.com/pyg-team/pytorch_geometric/pull/4883)) +- Added support for `FeatureStore` and `GraphStore` in `NeighborLoader` ([#4817](https://github.com/pyg-team/pytorch_geometric/pull/4817), [#4851](https://github.com/pyg-team/pytorch_geometric/pull/4851), [#4854](https://github.com/pyg-team/pytorch_geometric/pull/4854), [#4856](https://github.com/pyg-team/pytorch_geometric/pull/4856), [#4857](https://github.com/pyg-team/pytorch_geometric/pull/4857), [#4882](https://github.com/pyg-team/pytorch_geometric/pull/4882), [#4883](https://github.com/pyg-team/pytorch_geometric/pull/4883), [#4992](https://github.com/pyg-team/pytorch_geometric/pull/4922)) - Added a `normalize` parameter to `dense_diff_pool` ([#4847](https://github.com/pyg-team/pytorch_geometric/pull/4847)) - Added `size=None` explanation to jittable `MessagePassing` modules in the documentation ([#4850](https://github.com/pyg-team/pytorch_geometric/pull/4850)) - Added documentation to the `DataLoaderIterator` class ([#4838](https://github.com/pyg-team/pytorch_geometric/pull/4838)) From b125c9700909d7b0f0939b5709289a41c3d5430e Mon Sep 17 00:00:00 2001 From: Manan Shah Date: Wed, 6 Jul 2022 19:56:41 +0000 Subject: [PATCH 3/4] update --- test/loader/test_neighbor_loader.py | 9 ++ torch_geometric/data/data.py | 5 ++ torch_geometric/data/graph_store.py | 103 ++++++++++++---------- torch_geometric/data/hetero_data.py | 6 ++ torch_geometric/loader/neighbor_loader.py | 3 +- 5 files changed, 76 insertions(+), 50 deletions(-) diff --git a/test/loader/test_neighbor_loader.py b/test/loader/test_neighbor_loader.py index 789403f2747e..445cfdf840ee 100644 --- a/test/loader/test_neighbor_loader.py +++ b/test/loader/test_neighbor_loader.py @@ -322,6 +322,15 @@ def test_custom_neighbor_loader(FeatureStore, GraphStore): edge_type=('author', 'to', 'paper'), layout='csc', size=(200, 100)) + # COO (sorted): + edge_index = get_edge_index(200, 200, 100) + edge_index = edge_index[:, edge_index[1].argsort()] + data['author', 'to', 'author'].edge_index = edge_index + coo = (edge_index[0], edge_index[1]) + graph_store.put_edge_index(edge_index=coo, + edge_type=('author', 'to', 'author'), + layout='coo', size=(200, 200), is_sorted=True) + # Construct neighbor loaders: loader1 = NeighborLoader(data, batch_size=20, input_nodes=('paper', range(100)), diff --git a/torch_geometric/data/data.py b/torch_geometric/data/data.py index 7894c7b423c8..d3903e8fde6f 100644 --- a/torch_geometric/data/data.py +++ b/torch_geometric/data/data.py @@ -812,6 +812,9 @@ def _put_edge_index(self, edge_index: EdgeTensorType, attr_val = edge_tensor_type_to_adj_type(edge_attr, edge_index) setattr(self, attr_name, attr_val) + # Set edge attributes: + setattr(self, f'{attr_name}_edge_attr', edge_attr) + # Set size, if possible: size = edge_attr.size if size is not None: @@ -839,8 +842,10 @@ def get_all_edge_attrs(self) -> List[EdgeAttr]: out = [] for layout, attr_name in EDGE_LAYOUT_TO_ATTR_NAME.items(): if attr_name in self: + attr_val = self[f'{attr_name}_edge_attr'] out.append( EdgeAttr(edge_type=None, layout=layout, + is_sorted=attr_val.is_sorted, size=(self.num_nodes, self.num_nodes))) return out diff --git a/torch_geometric/data/graph_store.py b/torch_geometric/data/graph_store.py index 95105cfac787..380aaabe0aed 100644 --- a/torch_geometric/data/graph_store.py +++ b/torch_geometric/data/graph_store.py @@ -117,9 +117,12 @@ def get_edge_index(self, *args, **kwargs) -> EdgeTensorType: Raises: KeyError: if the edge index corresponding to attr was not found. """ + edge_attr = self._edge_attr_cls.cast(*args, **kwargs) edge_attr.layout = EdgeLayout(edge_attr.layout) # Override is_sorted for CSC and CSR: + # TODO treat is_sorted specially in this function, where is_sorted=True + # returns an edge index sorted by column. edge_attr.is_sorted = edge_attr.is_sorted or (edge_attr.layout in [ EdgeLayout.CSC, EdgeLayout.CSR ]) @@ -131,12 +134,57 @@ def get_edge_index(self, *args, **kwargs) -> EdgeTensorType: # Layout Conversion ####################################################### + def _edge_to_layout( + self, + attr: EdgeAttr, + layout: EdgeLayout, + ) -> Tuple[Tensor, Tensor, OptTensor]: + print(attr) + from_tuple = self.get_edge_index(attr) + + if layout == EdgeLayout.COO: + if attr.layout == EdgeLayout.CSR: + col = from_tuple[1] + row = torch.ops.torch_sparse.ptr2ind(from_tuple[0], + col.numel()) + else: + row = from_tuple[0] + col = torch.ops.torch_sparse.ptr2ind(from_tuple[1], + row.numel()) + perm = None + + elif layout == EdgeLayout.CSR: + # We convert to CSR by converting to CSC on the transpose + if attr.layout == EdgeLayout.COO: + adj = edge_tensor_type_to_adj_type( + attr, (from_tuple[1], from_tuple[0])) + else: + adj = edge_tensor_type_to_adj_type(attr, from_tuple).t() + + # NOTE we set is_sorted=False here as is_sorted refers to + # the edge_index being sorted by the destination node + # (column), but here we deal with the transpose + attr_copy = copy.copy(attr) + attr_copy.is_sorted = False + attr_copy.size = None if attr.size is None else (attr.size[1], + attr.size[0]) + + # Actually rowptr, col, perm + row, col, perm = to_csc(adj, attr_copy, device='cpu') + + else: + adj = edge_tensor_type_to_adj_type(attr, from_tuple) + + # Actually colptr, row, perm + col, row, perm = to_csc(adj, attr, device='cpu') + + return row, col, perm + # TODO support `replace` to replace the existing edge index. - def _to_layout( + def _all_edges_to_layout( self, layout: EdgeLayout, store: bool = False, - is_sorted: bool = False, ) -> ConversionOutputType: # Obtain all edge attributes, grouped by type: edge_attrs = self.get_all_edge_attrs() @@ -169,47 +217,7 @@ def _to_layout( else: from_attr = edge_attrs[edge_layouts.index(EdgeLayout.CSR)] - from_tuple = self.get_edge_index(from_attr) - - # Convert to the new layout: - if layout == EdgeLayout.COO: - if from_attr.layout == EdgeLayout.CSR: - col = from_tuple[1] - row = torch.ops.torch_sparse.ptr2ind( - from_tuple[0], col.numel()) - else: - row = from_tuple[0] - col = torch.ops.torch_sparse.ptr2ind( - from_tuple[1], row.numel()) - perm = None - - elif layout == EdgeLayout.CSR: - # We convert to CSR by converting to CSC on the transpose - if from_attr.layout == EdgeLayout.COO: - adj = edge_tensor_type_to_adj_type( - from_attr, (from_tuple[1], from_tuple[0])) - else: - adj = edge_tensor_type_to_adj_type( - from_attr, from_tuple).t() - - # NOTE we set is_sorted=False here as is_sorted refers to - # the edge_index being sorted by the destination node - # (column), but here we deal with the transpose - from_attr_copy = copy.copy(from_attr) - from_attr_copy.is_sorted = False - from_attr_copy.size = None if from_attr.size is None else ( - from_attr.size[1], from_attr.size[0]) - - # Actually rowptr, col, perm - row, col, perm = to_csc(adj, from_attr_copy, device='cpu') - - else: - # Respect is_sorted override (sorted by col): - from_attr.is_sorted = from_attr.is_sorted or is_sorted - adj = edge_tensor_type_to_adj_type(from_attr, from_tuple) - - # Actually colptr, row, perm - col, row, perm = to_csc(adj, from_attr, device='cpu') + row, col, perm = self._edge_to_layout(from_attr, layout) row_dict[from_attr.edge_type] = row col_dict[from_attr.edge_type] = col @@ -241,18 +249,17 @@ def _to_layout( def coo(self, store: bool = False) -> ConversionOutputType: r"""Converts the edge indices in the graph store to COO format, optionally storing the converted edge indices in the graph store.""" - return self._to_layout(EdgeLayout.COO, store) + return self._all_edges_to_layout(EdgeLayout.COO, store) def csr(self, store: bool = False) -> ConversionOutputType: r"""Converts the edge indices in the graph store to CSR format, optionally storing the converted edge indices in the graph store.""" - return self._to_layout(EdgeLayout.CSR, store) + return self._all_edges_to_layout(EdgeLayout.CSR, store) - def csc(self, store: bool = False, - is_sorted: bool = False) -> ConversionOutputType: + def csc(self, store: bool = False) -> ConversionOutputType: r"""Converts the edge indices in the graph store to CSC format, optionally storing the converted edge indices in the graph store.""" - return self._to_layout(EdgeLayout.CSC, store, is_sorted) + return self._all_edges_to_layout(EdgeLayout.CSC, store) # Additional methods ###################################################### diff --git a/torch_geometric/data/hetero_data.py b/torch_geometric/data/hetero_data.py index 6a0505b2654f..8ed5818c8cd9 100644 --- a/torch_geometric/data/hetero_data.py +++ b/torch_geometric/data/hetero_data.py @@ -701,6 +701,9 @@ def _put_edge_index(self, edge_index: EdgeTensorType, attr_val = edge_tensor_type_to_adj_type(edge_attr, edge_index) setattr(self[edge_attr.edge_type], attr_name, attr_val) + # Set edge attributes: + setattr(self[edge_attr.edge_type], f'{attr_name}_edge_attr', edge_attr) + key = self._to_canonical(edge_attr.edge_type) src, _, dst = key @@ -730,8 +733,11 @@ def get_all_edge_attrs(self) -> List[EdgeAttr]: for edge_type, edge_store in self.edge_items(): for layout, attr_name in EDGE_LAYOUT_TO_ATTR_NAME.items(): if attr_name in edge_store: + attr_val = getattr(self[edge_type], + f'{attr_name}_edge_attr') out.append( EdgeAttr(edge_type=edge_type, layout=layout, + is_sorted=attr_val.is_sorted, size=self[edge_type].size())) return out diff --git a/torch_geometric/loader/neighbor_loader.py b/torch_geometric/loader/neighbor_loader.py index 5ac23dc06e98..ff3c0e7b9cfa 100644 --- a/torch_geometric/loader/neighbor_loader.py +++ b/torch_geometric/loader/neighbor_loader.py @@ -122,8 +122,7 @@ def __init__( self.input_type = input_type # Obtain CSC representations for in-memory sampling: - row_dict, colptr_dict, perm_dict = graph_store.csc( - is_sorted=is_sorted) + row_dict, colptr_dict, perm_dict = graph_store.csc() self.row_dict = { edge_type_to_str(k): v for k, v in row_dict.items() From d51ee887eed2cd8227ebaf3b6952a9b764eb2a6c Mon Sep 17 00:00:00 2001 From: Manan Shah Date: Thu, 7 Jul 2022 20:26:45 +0000 Subject: [PATCH 4/4] updates --- torch_geometric/data/data.py | 21 +++++++++++---------- torch_geometric/data/graph_store.py | 1 - torch_geometric/data/hetero_data.py | 20 +++++++++++--------- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/torch_geometric/data/data.py b/torch_geometric/data/data.py index fc723002e237..a2ad1eef9c1f 100644 --- a/torch_geometric/data/data.py +++ b/torch_geometric/data/data.py @@ -843,7 +843,10 @@ def _put_edge_index(self, edge_index: EdgeTensorType, setattr(self, attr_name, attr_val) # Set edge attributes: - setattr(self, f'{attr_name}_edge_attr', edge_attr) + if not hasattr(self, '_edge_attrs'): + self._edge_attrs = {} + + self._edge_attrs[edge_attr.layout.value] = edge_attr # Set size, if possible: size = edge_attr.size @@ -869,15 +872,13 @@ def _get_edge_index(self, edge_attr: EdgeAttr) -> Optional[EdgeTensorType]: def get_all_edge_attrs(self) -> List[EdgeAttr]: r"""Returns `EdgeAttr` objects corresponding to the edge indices stored in `Data` and their layouts""" - out = [] - for layout, attr_name in EDGE_LAYOUT_TO_ATTR_NAME.items(): - if attr_name in self: - attr_val = self[f'{attr_name}_edge_attr'] - out.append( - EdgeAttr(edge_type=None, layout=layout, - is_sorted=attr_val.is_sorted, - size=(self.num_nodes, self.num_nodes))) - return out + if not hasattr(self, '_edge_attrs'): + return [] + + edge_attrs = self._edge_attrs.values() + for attr in edge_attrs: + attr.size = (self.num_nodes, self.num_nodes) + return edge_attrs ############################################################################### diff --git a/torch_geometric/data/graph_store.py b/torch_geometric/data/graph_store.py index 380aaabe0aed..8f35792c254f 100644 --- a/torch_geometric/data/graph_store.py +++ b/torch_geometric/data/graph_store.py @@ -139,7 +139,6 @@ def _edge_to_layout( attr: EdgeAttr, layout: EdgeLayout, ) -> Tuple[Tensor, Tensor, OptTensor]: - print(attr) from_tuple = self.get_edge_index(attr) if layout == EdgeLayout.COO: diff --git a/torch_geometric/data/hetero_data.py b/torch_geometric/data/hetero_data.py index 1092a0d91b57..1cdf571ae088 100644 --- a/torch_geometric/data/hetero_data.py +++ b/torch_geometric/data/hetero_data.py @@ -755,7 +755,11 @@ def _put_edge_index(self, edge_index: EdgeTensorType, setattr(self[edge_attr.edge_type], attr_name, attr_val) # Set edge attributes: - setattr(self[edge_attr.edge_type], f'{attr_name}_edge_attr', edge_attr) + if not hasattr(self[edge_attr.edge_type], '_edge_attrs'): + self[edge_attr.edge_type]._edge_attrs = {} + + self[edge_attr.edge_type]._edge_attrs[ + edge_attr.layout.value] = edge_attr key = self._to_canonical(edge_attr.edge_type) src, _, dst = key @@ -784,14 +788,12 @@ def get_all_edge_attrs(self) -> List[EdgeAttr]: indices stored in `HeteroData` and their layouts.""" out = [] for edge_type, edge_store in self.edge_items(): - for layout, attr_name in EDGE_LAYOUT_TO_ATTR_NAME.items(): - if attr_name in edge_store: - attr_val = getattr(self[edge_type], - f'{attr_name}_edge_attr') - out.append( - EdgeAttr(edge_type=edge_type, layout=layout, - is_sorted=attr_val.is_sorted, - size=self[edge_type].size())) + if not hasattr(self[edge_type], '_edge_attrs'): + continue + edge_attrs = self[edge_type]._edge_attrs.values() + for attr in edge_attrs: + attr.size = self[edge_type].size() + out.extend(edge_attrs) return out