cugraph_pyg.loader.neighbor_loader.NeighborLoader#

class cugraph_pyg.loader.neighbor_loader.NeighborLoader(data: Union[torch_geometric.data.Data, torch_geometric.data.HeteroData, Tuple[torch_geometric.data.FeatureStore, torch_geometric.data.GraphStore]], num_neighbors: Union[List[int], Dict[torch_geometric.typing.EdgeType, List[int]]], input_nodes: torch_geometric.typing.InputNodes = None, input_time: torch_geometric.typing.OptTensor = None, replace: bool = False, subgraph_type: Union[torch_geometric.typing.SubgraphType, str] = 'directional', disjoint: bool = False, temporal_strategy: str = 'uniform', time_attr: Optional[str] = None, weight_attr: Optional[str] = None, transform: Optional[Callable] = None, transform_sampler_output: Optional[Callable] = None, is_sorted: bool = False, filter_per_worker: Optional[bool] = None, neighbor_sampler: Optional[torch_geometric.sampler.NeighborSampler] = None, directed: bool = True, batch_size: int = 16, directory: str = None, batches_per_partition=256, format: str = 'parquet', compression: Optional[str] = None, local_seeds_per_call: Optional[int] = None, **kwargs)[source]#

Node loader that implements the neighbor sampling algorithm used in GraphSAGE.

Duck-typed version of torch_geometric.loader.NeighborLoader

__init__(data: Union[torch_geometric.data.Data, torch_geometric.data.HeteroData, Tuple[torch_geometric.data.FeatureStore, torch_geometric.data.GraphStore]], num_neighbors: Union[List[int], Dict[torch_geometric.typing.EdgeType, List[int]]], input_nodes: torch_geometric.typing.InputNodes = None, input_time: torch_geometric.typing.OptTensor = None, replace: bool = False, subgraph_type: Union[torch_geometric.typing.SubgraphType, str] = 'directional', disjoint: bool = False, temporal_strategy: str = 'uniform', time_attr: Optional[str] = None, weight_attr: Optional[str] = None, transform: Optional[Callable] = None, transform_sampler_output: Optional[Callable] = None, is_sorted: bool = False, filter_per_worker: Optional[bool] = None, neighbor_sampler: Optional[torch_geometric.sampler.NeighborSampler] = None, directed: bool = True, batch_size: int = 16, directory: str = None, batches_per_partition=256, format: str = 'parquet', compression: Optional[str] = None, local_seeds_per_call: Optional[int] = None, **kwargs)[source]#
data: Data, HeteroData, or Tuple[FeatureStore, GraphStore]

See torch_geometric.loader.NeighborLoader.

num_neighbors: List[int] or Dict[EdgeType, List[int]]

Fanout values. See torch_geometric.loader.NeighborLoader.

input_nodes: InputNodes

Input nodes for sampling. See torch_geometric.loader.NeighborLoader.

input_time: OptTensor (optional)

See torch_geometric.loader.NeighborLoader.

replace: bool (optional, default=False)

Whether to sample with replacement. See torch_geometric.loader.NeighborLoader.

subgraph_type: Union[SubgraphType, str] (optional, default=’directional’)

The type of subgraph to return. Currently only ‘directional’ is supported. See torch_geometric.loader.NeighborLoader.

disjoint: bool (optional, default=False)

Whether to perform disjoint sampling. Currently unsupported. See torch_geometric.loader.NeighborLoader.

temporal_strategy: str (optional, default=’uniform’)

Currently only ‘uniform’ is suppported. See torch_geometric.loader.NeighborLoader.

time_attr: str (optional, default=None)

Used for temporal sampling. See torch_geometric.loader.NeighborLoader.

weight_attr: str (optional, default=None)

Used for biased sampling. See torch_geometric.loader.NeighborLoader.

transform: Callable (optional, default=None)

See torch_geometric.loader.NeighborLoader.

transform_sampler_output: Callable (optional, default=None)

See torch_geometric.loader.NeighborLoader.

is_sorted: bool (optional, default=False)

Ignored by cuGraph. See torch_geometric.loader.NeighborLoader.

filter_per_worker: bool (optional, default=False)

Currently ignored by cuGraph, but this may change once in-memory sampling is implemented. See torch_geometric.loader.NeighborLoader.

neighbor_sampler: torch_geometric.sampler.NeighborSampler

(optional, default=None) Not supported by cuGraph. See torch_geometric.loader.NeighborLoader.

directed: bool (optional, default=True)

Deprecated. See torch_geometric.loader.NeighborLoader.

batch_size: int (optional, default=16)

The number of input nodes per output minibatch. See torch.utils.dataloader.

directory: str (optional, default=None)

The directory where samples will be temporarily stored. It is recommend that this be set by the user, usually setting it to a tempfile.TemporaryDirectory with a context manager is a good option but depending on the filesystem, you may want to choose an alternative location with fast I/O intead. If not set, this will create a TemporaryDirectory that will persist until this object is garbage collected. See cugraph.gnn.DistSampleWriter.

batches_per_partition: int (optional, default=256)

The number of batches per partition if writing samples to disk. Manually tuning this parameter is not recommended but reducing it may help conserve GPU memory. See cugraph.gnn.DistSampleWriter.

format: str (optional, default=’parquet’)

If writing samples to disk, they will be written in this file format. See cugraph.gnn.DistSampleWriter.

compression: str (optional, default=None)

The compression type to use if writing samples to disk. If not provided, it is automatically chosen.

local_seeds_per_call: int (optional, default=None)

The number of seeds to process within a single sampling call. Manually tuning this parameter is not recommended but reducing it may conserve GPU memory. The total number of seeds processed per sampling call is equal to the sum of this parameter across all workers. If not provided, it will be automatically calculated. See cugraph.gnn.DistSampler.

**kwargs

Other keyword arguments passed to the superclass.

Methods

__init__(data, num_neighbors[, input_nodes, ...])

data: Data, HeteroData, or Tuple[FeatureStore, GraphStore]