libwholegraph API doc#
Doxygen WholeGraph C API documentation#
For doxygen documentation, please refer to Doxygen Documentation
WholeGraph C API documentation#
Library Level APIs#
-
enum wholememory_error_code_t#
WholeMemory Error Code definition.
Defines error code of WholeMemory library.
Values:
-
enumerator WHOLEMEMORY_SUCCESS#
success
-
enumerator WHOLEMEMORY_UNKNOW_ERROR#
unknown error
-
enumerator WHOLEMEMORY_NOT_IMPLEMENTED#
method is not implemented
-
enumerator WHOLEMEMORY_LOGIC_ERROR#
logic error
-
enumerator WHOLEMEMORY_CUDA_ERROR#
CUDA error
-
enumerator WHOLEMEMORY_COMMUNICATION_ERROR#
communication error
-
enumerator WHOLEMEMORY_INVALID_INPUT#
input is invalid, e.g. nullptr
-
enumerator WHOLEMEMORY_INVALID_VALUE#
input value is invalid
-
enumerator WHOLEMEMORY_OUT_OF_MEMORY#
out of memory
-
enumerator WHOLEMEMORY_NOT_SUPPORTED#
not supported
-
enumerator WHOLEMEMORY_SYSTEM_ERROR#
system error>
-
enumerator WHOLEMEMORY_SUCCESS#
-
wholememory_error_code_t wholememory_init(unsigned int flags, LogLevel log_level = LEVEL_INFO)#
Initialize WholeMemory library
- Parameters:
flags – : reserved should be 0
log_level – : wholememory log level, the default level is “info”
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_finalize()#
Finalize WholeMemory library
- Returns:
: wholememory_error_code_t
-
int fork_get_device_count()#
Fork a new process and get device count. Should be called before other CUDA call
- Returns:
: CUDA device count, -1 on error
WholeMemory Communicator APIs#
-
typedef struct wholememory_comm_ *wholememory_comm_t#
Opaque handle to communicator.
An Opaque handle to communicator
-
struct wholememory_unique_id_t#
Unique ID for WholeMemory Communicators.
An Opaque handle to WholeMemory Communicators, exposes as char array. Underlying implementation may be ncclUniqueId_t
-
wholememory_error_code_t wholememory_create_unique_id(wholememory_unique_id_t *unique_id)#
Create UniqueID for WholeMemory Communicator
- Parameters:
unique_id – : returned UniqueID
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_create_communicator(wholememory_comm_t *comm, wholememory_unique_id_t unique_id, int rank, int size)#
Create WholeMemory Communicator
- Parameters:
comm – : returned WholeMemory Communicator
unique_id – : UniqueID
rank – : rank of this process.
size – : number of processes in this Communicator
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_destroy_communicator(wholememory_comm_t comm)#
Destroy WholeMemory Communicator
- Parameters:
comm – : WholeMemory Communicator to destroy
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_communicator_get_rank(int *rank, wholememory_comm_t comm)#
Get the rank of current process in the WholeMemory Communicator
- Parameters:
rank – : returned rank
comm – : WholeMemory Communicator
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_communicator_get_size(int *size, wholememory_comm_t comm)#
Get the size of WholeMemory Communicator
- Parameters:
size – : returned size
comm – : WholeMemory Communicator
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_communicator_barrier(wholememory_comm_t comm)#
Barrier on WholeMemory Communicator
- Parameters:
comm – : WholeMemory Communicator
- Returns:
: wholememory_error_code_t
WholeMemoryHandle APIs#
-
enum wholememory_memory_type_t#
Memory Type of WholeMemory.
Memory Type is the Memory Address Mapping Type of WholeMemory
Values:
-
enumerator WHOLEMEMORY_MT_NONE#
Not defined.
-
enumerator WHOLEMEMORY_MT_CONTINUOUS#
Memory from all ranks are mapped in continuous address space
-
enumerator WHOLEMEMORY_MT_CHUNKED#
Memory from all ranks are mapped in chunked address space
-
enumerator WHOLEMEMORY_MT_DISTRIBUTED#
Memory from other ranks are not mapped.
-
enumerator WHOLEMEMORY_MT_NONE#
-
enum wholememory_memory_location_t#
Memory Location of WholeMemory.
Memory Location of WholeMemory can be host or device.
Values:
-
enumerator WHOLEMEMORY_ML_NONE#
Not defined
-
enumerator WHOLEMEMORY_ML_DEVICE#
Device Memory
-
enumerator WHOLEMEMORY_ML_HOST#
Host Memory
-
enumerator WHOLEMEMORY_ML_NONE#
-
typedef struct wholememory_handle_ *wholememory_handle_t#
Opaque handle to WholeMemory.
An Opaque handle to WholeMemory
-
struct wholememory_gref_t#
Global reference of a WholeMemory object.
A global reference is for Continuous of Chunked WholeMemory Type, in these types, each rank can directly access all memory from all ranks. The global reference is used to do this direct access.
-
wholememory_error_code_t wholememory_malloc(wholememory_handle_t *wholememory_handle_ptr, size_t total_size, wholememory_comm_t comm, wholememory_memory_type_t memory_type, wholememory_memory_location_t memory_location, size_t data_granularity)#
Malloc WholeMemory
- Parameters:
wholememory_handle_ptr – : returned WholeMemory Handle
total_size – : total allocated size in bytes.
comm – : WholeMemory Communicator
memory_type – : WholeMemory type
memory_location – : memory location, host or device
data_granularity – : granularity size of data, which is guaranteed not to be partitioned.
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_free(wholememory_handle_t wholememory_handle)#
Free allocated WholeMemory Handle
- Parameters:
wholememory_handle – : WholeMemory Handle to free
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_get_communicator(wholememory_comm_t *comm, wholememory_handle_t wholememory_handle)#
Get underlying WholeMemory Communicator from WholeMemory Handle
- Parameters:
comm – : returned WholeMemory Communicator
wholememory_handle – : WholeMemory Handle
- Returns:
: wholememory_error_code_t
-
wholememory_memory_type_t wholememory_get_memory_type(wholememory_handle_t wholememory_handle)#
Get WholeMemory Type
- Parameters:
wholememory_handle – : WholeMemory Handle
- Returns:
: WholeMemory Type
-
wholememory_memory_location_t wholememory_get_memory_location(wholememory_handle_t wholememory_handle)#
Get WholeMemory Location
- Parameters:
wholememory_handle – : WholeMemory Handle
- Returns:
: WholeMemory Location
-
size_t wholememory_get_total_size(wholememory_handle_t wholememory_handle)#
Get total size of WholeMemory
- Parameters:
wholememory_handle – : WholeMemory Handle
- Returns:
: total size
-
size_t wholememory_get_data_granularity(wholememory_handle_t wholememory_handle)#
Get data granularity of WholeMemory Handle
- Parameters:
wholememory_handle – : WholeMemory Handle
- Returns:
: data granularity size
-
wholememory_error_code_t wholememory_get_local_memory(void **local_ptr, size_t *local_size, size_t *local_offset, wholememory_handle_t wholememory_handle)#
Get local memory from WholeMemory Handle of current rank, local memory has direct access to the memory. But local memory doesn’t have to be on local GPU.
- Parameters:
local_ptr – : returned local memory pointer
local_size – : returned local memory size
local_offset – : returned local memory offset from WholeMemory
wholememory_handle – : WholeMemory Handle
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_get_rank_memory(void **rank_memory_ptr, size_t *rank_memory_size, size_t *rank_memory_offset, int rank, wholememory_handle_t wholememory_handle)#
Get local memory of specified rank from WholeMemory Handle
- Parameters:
rank_memory_ptr – : returned local memory pointer of specified rank
rank_memory_size – : returned local memory size of specified rank
rank_memory_offset – : returned local memory offset of specified rank from WholeMemory
rank – : rank specified
wholememory_handle – : WholeMemory Handle
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_get_global_pointer(void **global_ptr, wholememory_handle_t wholememory_handle)#
Get global memory pointer from WholeMemory Handle. Only Continuous memory type or Chunked Host memory has global pointer.
- Parameters:
global_ptr – : returned pointer of WholeMemory
wholememory_handle – : WholeMemory Handle
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_get_global_reference(wholememory_gref_t *wholememory_gref, wholememory_handle_t wholememory_handle)#
Get global reference from WholeMemory Handle WholeMemory global reference is common data structure for Continuous and Chunked Memory Types.
- Parameters:
wholememory_gref – : returned WholeMemory global reference
wholememory_handle – : WholeMemory Handle
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_determine_partition_plan(size_t *size_per_rank, size_t total_size, size_t data_granularity, int world_size)#
Get the partition plan WholeMemory will use
- Parameters:
size_per_rank – : returned size per rank
total_size – : total size
data_granularity – : data granularity
world_size – : communicator world size
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_determine_entry_partition_plan(size_t *entry_per_rank, size_t total_entry_count, int world_size)#
Get the partition plan WholeMemory will use based on entry count. Entry is number of data granularity
- Parameters:
entry_per_rank – : returned entry count per rank
total_entry_count – : total entry count
world_size – : communicator world size
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_get_partition_plan(size_t *size_per_rank, wholememory_handle_t wholememory_handle)#
Get the partition plan used in WholeMemory Handle
- Parameters:
size_per_rank – : returned size per rank
wholememory_handle – : WholeMemory Handle
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_load_from_file(wholememory_handle_t wholememory_handle, size_t memory_offset, size_t memory_entry_size, size_t file_entry_size, const char **file_names, int file_count, int round_robin_size)#
Load WholeMemory from binary files, all rank should be called together
- Parameters:
wholememory_handle – : WholeMemory Handle
memory_offset – : load to memory offset
memory_entry_size – : entry size of WholeMemory
file_entry_size – : entry size in file, should be less than or equal to memory_entry_size
file_names – : file names, all binary files will be logically concatenated and loaded.
file_count – : number of files.
round_robin_size – : continuous embedding number for a rank under round-robin shard mode
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_store_to_file(wholememory_handle_t wholememory_handle, size_t memory_offset, size_t memory_entry_stride, size_t file_entry_size, const char *local_file_name)#
Store local WholeMemory to file, this should be called by all ranks, with different local_file_name.
- Parameters:
wholememory_handle – : WholeMemory Handle
memory_offset – : memory offset to store
memory_entry_stride – : entry size of WholeMemory
file_entry_size – : entry size in file, should be less than or equal to memory_entry_size
local_file_name – : local file to store to
- Returns:
: wholememory_error_code_t
WholeMemoryTensor APIs#
-
enum wholememory_dtype_t#
defines WholeMemory data type for tensors
Values:
-
enumerator WHOLEMEMORY_DT_UNKNOWN#
Unknown type
-
enumerator WHOLEMEMORY_DT_FLOAT#
32-bit float type
-
enumerator WHOLEMEMORY_DT_HALF#
16-bit half float type
-
enumerator WHOLEMEMORY_DT_DOUBLE#
64-bit double type
-
enumerator WHOLEMEMORY_DT_BF16#
16-bit bfloat type
-
enumerator WHOLEMEMORY_DT_INT#
32-bit signed integer type
-
enumerator WHOLEMEMORY_DT_INT64#
64-bit signed integer type
-
enumerator WHOLEMEMORY_DT_INT16#
16-bit signed integer type
-
enumerator WHOLEMEMORY_DT_INT8#
8-bit signed integer type
-
enumerator WHOLEMEMORY_DT_COUNT#
total count if types
-
enumerator WHOLEMEMORY_DT_UNKNOWN#
-
struct wholememory_array_description_t#
wrapper for array in WholeMemory
-
struct wholememory_matrix_description_t#
wrapper for matrix in WholeMemory
-
struct wholememory_tensor_description_t#
Tensor description in WholeMemory, dimension 0 is the slowest changed dimension.
-
typedef struct wholememory_tensor_ *wholememory_tensor_t#
Opaque handle to WholeMemoryTensor.
An Opaque handle to WholeMemoryTensor
-
size_t wholememory_dtype_get_element_size(wholememory_dtype_t dtype)#
Get element size of wholememory_dtype_t
- Parameters:
dtype – : wholememory_dtype_t
- Returns:
: element size of dtype, -1 on invalid dtype.
-
bool wholememory_dtype_is_floating_number(wholememory_dtype_t dtype)#
Check if dtype is floating number
- Parameters:
dtype – : wholememory_dtype_t
- Returns:
: True if dtype is WHOLEMEMORY_DT_FLOAT, WHOLEMEMORY_DT_HALF, WHOLEMEMORY_DT_DOUBLE or WHOLEMEMORY_DT_BF16. False otherwise.
-
bool wholememory_dtype_is_integer_number(wholememory_dtype_t dtype)#
Check if dtype is integer number
- Parameters:
dtype – : wholememory_dtype_t
- Returns:
: True if dtype is WHOLEMEMORY_DT_INT, WHOLEMEMORY_DT_INT64, WHOLEMEMORY_DT_INT16 or WHOLEMEMORY_DT_INT8, False otherwise.
-
wholememory_array_description_t wholememory_create_array_desc(int64_t size, int64_t storage_offset, wholememory_dtype_t dtype)#
Create wholememory_array_description_t object
- Parameters:
size – : array size in number of elements
storage_offset – : storage offset in number of elements
dtype – : data type of array elements
- Returns:
created wholememory_array_description_t
-
wholememory_matrix_description_t wholememory_create_matrix_desc(int64_t sizes[2], int64_t stride, int64_t storage_offset, wholememory_dtype_t dtype)#
Create wholememory_matrix_description_t object
- Parameters:
sizes – : matrix sizes array, counted in number of elements, sizes[1] changes fastest.
stride – : stride of first dimension(slower changed dimension), stride is counted in number of elements
storage_offset – : storage offset in number of elements
dtype – : data type of matrix elements
- Returns:
created wholememory_matrix_description_t
-
void wholememory_initialize_tensor_desc(wholememory_tensor_description_t *p_tensor_description)#
Initialize wholememory_tensor_description_t, set sizes and strides to all ones, and set storage_offset to 0, set dtype to WHOLEMEMORY_DT_UNKNOWN, set dim to 0.
- Parameters:
p_tensor_description – : pointer to wholememory_tensor_description_t.
-
void wholememory_copy_array_desc_to_matrix(wholememory_matrix_description_t *p_matrix_description, wholememory_array_description_t *p_array_description)#
Copy array description to tensor description
- Parameters:
p_matrix_description – : pointer to wholememory_matrix_description_t.
p_array_description – : pointer to wholememory_array_description_t.
-
void wholememory_copy_array_desc_to_tensor(wholememory_tensor_description_t *p_tensor_description, wholememory_array_description_t *p_array_description)#
Copy array description to tensor description
- Parameters:
p_tensor_description – : pointer to wholememory_tensor_description_t.
p_array_description – : pointer to wholememory_array_description_t.
-
void wholememory_copy_matrix_desc_to_tensor(wholememory_tensor_description_t *p_tensor_description, wholememory_matrix_description_t *p_matrix_description)#
Copy matrix description to tensor description
- Parameters:
p_tensor_description – : pointer to wholememory_tensor_description_t.
p_matrix_description – : pointer to wholememory_matrix_description_t.
-
bool wholememory_convert_tensor_desc_to_array(wholememory_array_description_t *p_array_description, wholememory_tensor_description_t *p_tensor_description)#
Convert tensor description to array description
- Parameters:
p_array_description – : pointer to wholememory_array_description_t.
p_tensor_description – : pointer to wholememory_tensor_description_t.
- Returns:
: Return true if convertible else false.
-
bool wholememory_convert_tensor_desc_to_matrix(wholememory_matrix_description_t *p_matrix_description, wholememory_tensor_description_t *p_tensor_description)#
Convert tensor description to matrix description
- Parameters:
p_matrix_description – : pointer to wholememory_matrix_description_t.
p_tensor_description – : pointer to wholememory_tensor_description_t.
- Returns:
: Return true if convertible else false.
-
int64_t wholememory_get_memory_element_count_from_array(wholememory_array_description_t *p_array_description)#
Get total element count from array description.
- Parameters:
p_array_description – : pointer to wholememory_array_description_t.
- Returns:
: Return element count.
-
int64_t wholememory_get_memory_size_from_array(wholememory_array_description_t *p_array_description)#
Get total memory size from array description.
- Parameters:
p_array_description – : pointer to wholememory_array_description_t.
- Returns:
: Return memory size.
-
int64_t wholememory_get_memory_element_count_from_matrix(wholememory_matrix_description_t *p_matrix_description)#
Get total element count from matrix description.
- Parameters:
p_matrix_description – : pointer to wholememory_matrix_description_t.
- Returns:
: Return element count.
-
int64_t wholememory_get_memory_size_from_matrix(wholememory_matrix_description_t *p_matrix_description)#
Get total memory size from matrix description.
- Parameters:
p_matrix_description – : pointer to wholememory_matrix_description_t.
- Returns:
: Return memory size.
-
int64_t wholememory_get_memory_element_count_from_tensor(wholememory_tensor_description_t *p_tensor_description)#
Get total element count from tensor description.
- Parameters:
p_tensor_description – : pointer to wholememory_tensor_description_t.
- Returns:
: Return element count.
-
int64_t wholememory_get_memory_size_from_tensor(wholememory_tensor_description_t *p_tensor_description)#
Get total memory size from tensor description.
- Parameters:
p_tensor_description – : pointer to wholememory_tensor_description_t.
- Returns:
: Return memory size.
-
bool wholememory_unsqueeze_tensor(wholememory_tensor_description_t *p_tensor_description, int dim)#
Unsqueeze tensor
- Parameters:
p_tensor_description – : pointer to wholememory_tensor_description_t
dim – : unsqueeze at which dim
- Returns:
: true if success else false
-
wholememory_error_code_t wholememory_create_tensor(wholememory_tensor_t *wholememory_tensor, wholememory_tensor_description_t *tensor_description, wholememory_comm_t comm, wholememory_memory_type_t memory_type, wholememory_memory_location_t memory_location)#
Create WholeMemory Tensor
- Parameters:
wholememory_tensor – : returned WholeMemory Tensor handle
tensor_description – : description of the WholeMemory Tensor, should be 1-D or 2-D continuous tensor without offset.
comm – : WholeMemory Communicator
memory_type – : Memory Type of the underlying WholeMemory
memory_location – : Memory Location of the underlying WholeMemory
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_destroy_tensor(wholememory_tensor_t wholememory_tensor)#
Destroy WholeMemory Tensor
- Parameters:
wholememory_tensor – : WholeMemory Tensor to destroy
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_make_tensor_from_pointer(wholememory_tensor_t *wholememory_tensor, void *storage_ptr, wholememory_tensor_description_t *tensor_description)#
Make WholeMemory Tensor from local memory
- Parameters:
wholememory_tensor – : returned WholeMemory Tensor handle
storage_ptr – : pointer to underlying storage memory. Note: storage pointer may be not same as data pointer.
tensor_description – : description of the WholeMemory Tensor, should be 1-D or 2-D
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_make_tensor_from_handle(wholememory_tensor_t *wholememory_tensor, wholememory_handle_t wholememory_handle, wholememory_tensor_description_t *tensor_description)#
Make WholeMemory Tensor from local memory
- Parameters:
wholememory_tensor – : returned WholeMemory Tensor handle
wholememory_handle – : WholeMemory Handle
tensor_description – : description of the WholeMemory Tensor, should be 1-D or 2-D
- Returns:
: wholememory_error_code_t
-
bool wholememory_tensor_has_handle(wholememory_tensor_t wholememory_tensor)#
Check if has WholeMemory Handle, WholeMemory Tensor created by wholememory_make_tensor has no Handle
- Parameters:
wholememory_tensor – : WholeMemory Tensor
- Returns:
: if has WholeMemory Handle
-
wholememory_handle_t wholememory_tensor_get_memory_handle(wholememory_tensor_t wholememory_tensor)#
Get WholeMemory handle from WholeMemory Tensor
- Parameters:
wholememory_tensor – : WholeMemory Tensor
- Returns:
: WholeMemory handle
-
wholememory_tensor_description_t *wholememory_tensor_get_tensor_description(wholememory_tensor_t wholememory_tensor)#
Get tensor description from WholeMemory Tensor
- Parameters:
wholememory_tensor – : WholeMemory Tensor
- Returns:
: pointer to the underlying wholememory_tensor_description_t
-
wholememory_error_code_t wholememory_tensor_get_global_reference(wholememory_tensor_t wholememory_tensor, wholememory_gref_t *wholememory_gref)#
Get global reference from WholeMemory Tensor
- Parameters:
wholememory_tensor – : WholeMemory Tensor
wholememory_gref – : global reference
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_tensor_map_local_tensor(wholememory_tensor_t wholememory_tensor, wholememory_tensor_t *local_tensor)#
Map local tensor of WholeMemory Tensor. Only support 1D and 2D tensor with WholeMemory Handle. For 1D tensor, storage_offset should be 0 For 2D tensor, storage_offset + size[1] should <= stride[0]
- Parameters:
wholememory_tensor – : WholeMemory Tensor.
local_tensor – : returned local tensor, need to be destroyed.
- Returns:
: wholememory_error_code_t
-
void *wholememory_tensor_get_data_pointer(wholememory_tensor_t wholememory_tensor)#
Get data pointer from WholeMemory Tensor
- Parameters:
wholememory_tensor – : WholeMemory Tensor
- Returns:
: Pointer to first data for CONTINUOUS WholeMemory or not WholeMemory.
-
size_t wholememory_tensor_get_entry_per_partition(wholememory_tensor_t wholememory_tensor)#
Get entry count per rank of a WholeMemory Tensor
- Parameters:
wholememory_tensor – : WholeMemory Tensor
- Returns:
: entry count per rank
-
wholememory_error_code_t wholememory_tensor_get_subtensor(wholememory_tensor_t wholememory_tensor, int64_t *starts, int64_t *ends, wholememory_tensor_t *sub_wholememory_tensor)#
Get sub tensor of a WholeMemory Tensor
- Parameters:
wholememory_tensor – : WholeMemory Tensor
starts – : starts of each dim, length should be the dim of wholememory_tensor.
ends – : ends of each dim, length should be the dim of wholememory_tensor
sub_wholememory_tensor – : pointer to returned sub tensor
- Returns:
: wholememory_error_code_t
-
wholememory_tensor_t wholememory_tensor_get_root(wholememory_tensor_t wholememory_tensor)#
Get root tensor of a WholeMemory Tensor, root means it is not a sub tensor of any WholeMemory Tensor.
- Parameters:
wholememory_tensor – : WholeMemory Tensor
- Returns:
: the root of current WholeMemory tensor, maybe same as wholememory_tensor.
Ops on WholeMemory Tensors#
-
wholememory_error_code_t wholememory_gather(wholememory_tensor_t wholememory_tensor, wholememory_tensor_t indices_tensor, wholememory_tensor_t output_tensor, wholememory_env_func_t *p_env_fns, void *stream, int gather_sms = -1)#
Gather Op
- Parameters:
wholememory_tensor – : WholeMemory Tensor of embedding table.
indices_tensor – : indices to gather from, should NOT be WholeMemory Tensor
output_tensor – : output tensor to gather to, should NOT be WholeMemoryTensor
p_env_fns – : pointers to environment functions.
stream – : cudaStream_t to use.
gather_sms – : the number of stream multiprocessor used in gather kernel
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_scatter(wholememory_tensor_t input_tensor, wholememory_tensor_t indices_tensor, wholememory_tensor_t wholememory_tensor, wholememory_env_func_t *p_env_fns, void *stream, int scatter_sms = -1)#
Scatter Op
- Parameters:
input_tensor – : input tensor tor scatter from, should NOT be WholeMemory Tensor
indices_tensor – : indices to scatter to, should NOT be WholeMemory Tensor
wholememory_tensor – : WholeMemory Tensor of embedding table.
p_env_fns – : pointers to environment functions.
stream – : cudaStream_t to use.
scatter_sms – : the number of stream multiprocessor used in scatter kernel
- Returns:
: wholememory_error_code_t
WholeTensorEmbedding APIs#
-
typedef struct wholememory_embedding_cache_policy_ *wholememory_embedding_cache_policy_t#
Opaque handle to WholeMemory Embedding Cache Policy.
An Opaque handle to WholeMemory Embedding Cache Policy
-
typedef struct wholememory_embedding_optimizer_ *wholememory_embedding_optimizer_t#
Opaque handle to WholeMemory Embedding Optimizer.
An Opaque handle to WholeMemory Embedding Optimizer
-
typedef struct wholememory_embedding_ *wholememory_embedding_t#
Opaque handle to WholeMemory Embedding.
An Opaque handle to WholeMemory Embedding
-
enum wholememory_access_type_t#
defines access type of WholeMemory Embedding
Values:
-
enumerator WHOLEMEMORY_AT_NONE#
Not defined
-
enumerator WHOLEMEMORY_AT_READONLY#
Only have readonly access to the WholeMemory
-
enumerator WHOLEMEMORY_AT_READWRITE#
May have write access to the WholeMemory
-
enumerator WHOLEMEMORY_AT_NONE#
-
enum wholememory_optimizer_type_t#
defines optimizer type for WholeMemory Embedding
Values:
-
enumerator WHOLEMEMORY_OPT_NONE#
No optimizer needed
-
enumerator WHOLEMEMORY_OPT_SGD#
Use SGD optimizer
-
enumerator WHOLEMEMORY_OPT_LAZY_ADAM#
Use Lazy Adam optimizer
-
enumerator WHOLEMEMORY_OPT_RMSPROP#
Use RMSProp optimizer
-
enumerator WHOLEMEMORY_OPT_ADAGRAD#
Use AdaGrad optimizer
-
enumerator WHOLEMEMORY_OPT_NONE#
-
wholememory_error_code_t wholememory_create_embedding_optimizer(wholememory_embedding_optimizer_t *optimizer, wholememory_optimizer_type_t optimizer_type)#
Create Optimizer
- Parameters:
optimizer – : Returned wholememory_embedding_optimizer_t
optimizer_type – : Optimizer type
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_optimizer_set_parameter(wholememory_embedding_optimizer_t optimizer, const char *parameter_name, void *value)#
Set parameter for optimizer.
- Parameters:
optimizer – : Optimizer to set parameter
parameter_name – : parameter name
value – : parameter value
- Returns:
: wholememory_error_code_t
-
void wholememory_destroy_embedding_optimizer(wholememory_embedding_optimizer_t optimizer)#
Destroy optimizer
- Parameters:
optimizer – : optimizer to destroy.
-
wholememory_error_code_t wholememory_create_embedding_cache_policy(wholememory_embedding_cache_policy_t *cache_policy, wholememory_comm_t cache_level_comm, wholememory_memory_type_t memory_type, wholememory_memory_location_t memory_location, wholememory_access_type_t access_type, float cache_ratio)#
Create WholeMemory Embedding Cache Policy
- Parameters:
cache_policy – : Returned wholememory_embedding_cache_policy_t
cache_level_comm – : At which level to cache the full embedding. In most cases it should be same as wholememory_embedding_t’s comm. If access_type is WHOLEMEMORY_AT_READONLY, it can be different for multiple readonly caches. E.g. for a multi-node WHOLEMEMORY_MT_DISTRIBUTED WHOLEMEMORY_AT_READONLY embedding, it can have a intra-node WHOLEMEMORY_MT_CHUNKED cache. or a multi-node WHOLEMEMORY_MT_DISTRIBUTED cache.
memory_type – : Memory Type of the underlying WholeMemory for cache
memory_location – : Memory Location of the underlying WholeMemory for cache
access_type – : ReadOnly or ReadWrite
cache_ratio – : suggested cache ratio, values should be in range [1.0 / 512, 1.0]
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_destroy_embedding_cache_policy(wholememory_embedding_cache_policy_t cache_policy)#
Destroy WholeMemory Embedding Cache Policy
- Parameters:
cache_policy – : WholeMemory Embedding Cache Policy to destroy.
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_create_embedding(wholememory_embedding_t *wholememory_embedding, wholememory_tensor_description_t *embedding_tensor_description, wholememory_comm_t comm, wholememory_memory_type_t memory_type, wholememory_memory_location_t memory_location, wholememory_embedding_cache_policy_t cache_policy, int user_defined_sms = -1, int round_robin_size = 0)#
Create WholeMemory Embedding
- Parameters:
wholememory_embedding – : Returned wholememory_embedding_t
embedding_tensor_description – : Description of the embedding, sizes and dtype used, stride and storage_offset ignored. Must be matrix
comm – : WholeMemory Communicator
memory_type – : Memory Type of the underlying WholeMemory
memory_location – : Memory Location of the underlying WholeMemory
cache_policy – : Cache policy for this embedding, if don’t use cache, use nullptr
user_defined_sms – : User-defined sms number for raw embedding gather/scatter
round_robin_size – : continuous embedding size in each rank under round-robin shard mode
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_destroy_embedding(wholememory_embedding_t wholememory_embedding)#
Destroy WholeMemory Embedding
- Parameters:
wholememory_embedding – : WholeMemory Embedding to destroy
- Returns:
: wholememory_error_code_t
-
wholememory_tensor_t wholememory_embedding_get_embedding_tensor(wholememory_embedding_t wholememory_embedding)#
Get WholeMemory Tensor from WholeMemory Embedding.
- Parameters:
wholememory_embedding – : WholeMemory Embedding
- Returns:
: WholeMemory Tensor
-
wholememory_error_code_t wholememory_embedding_gather(wholememory_embedding_t wholememory_embedding, wholememory_tensor_t indices, wholememory_tensor_t output, bool adjust_cache, wholememory_env_func_t *p_env_fns, int64_t stream_int)#
Gather from WholeMemory Embedding
- Parameters:
wholememory_embedding – : WholeMemory Embedding
indices – : indices to gather
output – : output tensor
adjust_cache – : if we should adjust cache in this gather
p_env_fns – : env fns
stream_int – : CUDA stream to use
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_embedding_gather_gradient_apply(wholememory_embedding_t wholememory_embedding, wholememory_tensor_t indices, wholememory_tensor_t grads, bool adjust_cache, float lr, wholememory_env_func_t *p_env_fns, int64_t stream_int)#
Gather backward for WholeMemory Embedding
- Parameters:
wholememory_embedding – : WholeMemory Embedding
indices – : indices to gather
grads – : gradient of output tensor
adjust_cache – : if we should adjust cache in this gather
lr – : learning rate of current step.
p_env_fns – : env fns
stream_int – : CUDA stream to use
- Returns:
: wholememory_error_code_t
-
const char *const *wholememory_embedding_get_optimizer_state_names(wholememory_embedding_t wholememory_embedding)#
Get optimizer internal state names
- Parameters:
wholememory_embedding – : WholeMemory Embedding
- Returns:
: nullptr terminated names.
-
wholememory_tensor_t wholememory_embedding_get_optimizer_state(wholememory_embedding_t wholememory_embedding, const char *name)#
Get optimizer internal state
- Parameters:
wholememory_embedding – : WholeMemory Embedding
name – : state name
- Returns:
: internal state, nullptr for not exist.
-
wholememory_error_code_t wholememory_embedding_writeback_cache(wholememory_embedding_t wholememory_embedding, int64_t stream_int)#
Writeback all cache WholeMemory Embedding
- Parameters:
wholememory_embedding – : WholeMemory Embedding
stream_int – : CUDA stream to use.
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholememory_embedding_drop_all_cache(wholememory_embedding_t wholememory_embedding, int64_t stream_int)#
Drop all cache in WholeMemory Embedding
- Parameters:
wholememory_embedding – : WholeMemory Embedding
stream_int – : CUDA stream to use.
- Returns:
: wholememory_error_code_t
Ops on graphs stored in WholeMemory#
-
wholememory_error_code_t wholegraph_csr_unweighted_sample_without_replacement(wholememory_tensor_t wm_csr_row_ptr_tensor, wholememory_tensor_t wm_csr_col_ptr_tensor, wholememory_tensor_t center_nodes_tensor, int max_sample_count, wholememory_tensor_t output_sample_offset_tensor, void *output_dest_memory_context, void *output_center_localid_memory_context, void *output_edge_gid_memory_context, unsigned long long random_seed, wholememory_env_func_t *p_env_fns, void *stream)#
Unweighted sample without replacement kernel op
- Parameters:
wm_csr_row_ptr_tensor – : Wholememory Tensor of graph csr_row_ptr
wm_csr_col_ptr_tensor – : Wholememory Tensor of graph csr_col_ptr
center_nodes_tensor – : None Wholememory Tensor of center node to sample
max_sample_count – : maximum sample count
output_sample_offset_tensor – : pointer to output sample offset
output_dest_memory_context – : memory context to output dest nodes
output_center_localid_memory_context – : memory context to output center local id
output_edge_gid_memory_context – : memory context to output edge global id
random_seed – random number generator seed
p_env_fns – : pointers to environment functions.
stream – : CUDA stream to use
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t wholegraph_csr_weighted_sample_without_replacement(wholememory_tensor_t wm_csr_row_ptr_tensor, wholememory_tensor_t wm_csr_col_ptr_tensor, wholememory_tensor_t wm_csr_weight_ptr_tensor, wholememory_tensor_t center_nodes_tensor, int max_sample_count, wholememory_tensor_t output_sample_offset_tensor, void *output_dest_memory_context, void *output_center_localid_memory_context, void *output_edge_gid_memory_context, unsigned long long random_seed, wholememory_env_func_t *p_env_fns, void *stream)#
Unweighted sample without replacement kernel op
- Parameters:
wm_csr_row_ptr_tensor – : Wholememory Tensor of graph csr_row_ptr
wm_csr_col_ptr_tensor – : Wholememory Tensor of graph csr_col_ptr
wm_csr_weight_ptr_tensor – : Wholememory Tensor of graph edge weight
center_nodes_tensor – : None Wholememory Tensor of center node to sample
max_sample_count – : maximum sample count
output_sample_offset_tensor – : pointer to output sample offset
output_dest_memory_context – : memory context to output dest nodes
output_center_localid_memory_context – : memory context to output center local id
output_edge_gid_memory_context – : memory context to output edge global id
random_seed – random number generator seed
p_env_fns – : pointers to environment functions.
stream – : CUDA stream to use
- Returns:
: wholememory_error_code_t
Miscellaneous Ops for graph#
-
wholememory_error_code_t graph_append_unique(wholememory_tensor_t target_nodes_tensor, wholememory_tensor_t neighbor_nodes_tensor, void *output_unique_node_memory_context, wholememory_tensor_t output_neighbor_raw_to_unique_mapping_tensor, wholememory_env_func_t *p_env_fns, void *stream)#
Append Unique op
- Parameters:
target_nodes_tensor – : Wholememory Tensor of graph csr_row_ptr
neighbor_nodes_tensor – : Wholememory Tensor of graph csr_col_ptr
output_unique_node_memory_context – : memory context to output dest nodes
output_neighbor_raw_to_unique_mapping_tensor – : pointer to output sample offset, optional output
p_env_fns – : pointers to environment functions.
stream – : CUDA stream to use
- Returns:
: wholememory_error_code_t
-
wholememory_error_code_t csr_add_self_loop(wholememory_tensor_t csr_row_ptr_tensor, wholememory_tensor_t csr_col_ptr_tensor, wholememory_tensor_t output_csr_row_ptr_tensor, wholememory_tensor_t output_csr_col_ptr_tensor, void *stream)#
Csr Add Self Loop Op
- Parameters:
csr_row_ptr_tensor – : Wholememory Tensor of local graph csr_row_ptr
csr_col_ptr_tensor – : Wholememory Tensor of csr_col_ptr
output_csr_row_ptr_tensor – : Wholememory Tensor of output_csr_row_ptr
output_csr_col_ptr_tensor – : Wholememory Tensor of output_csr_col_ptr
stream – : CUDA stream to use
- Returns:
: wholememory_error_code_t