copying.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2022, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
21 #include <cudf/scalar/scalar.hpp>
24 #include <cudf/table/table.hpp>
25 #include <cudf/types.hpp>
26 
28 
29 #include <memory>
30 #include <vector>
31 
32 namespace cudf {
33 
48 enum class out_of_bounds_policy : bool {
49  NULLIFY,
50  DONT_CHECK
51 };
52 
83 std::unique_ptr<table> gather(
84  table_view const& source_table,
85  column_view const& gather_map,
87  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
88 
103 std::unique_ptr<table> reverse(
104  table_view const& source_table,
105  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
106 
121 std::unique_ptr<column> reverse(
122  column_view const& source_column,
123  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
124 
159 std::unique_ptr<table> scatter(
160  table_view const& source,
161  column_view const& scatter_map,
162  table_view const& target,
163  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
164 
195 std::unique_ptr<table> scatter(
196  std::vector<std::reference_wrapper<const scalar>> const& source,
197  column_view const& indices,
198  table_view const& target,
199  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
200 
205  NEVER,
206  RETAIN,
207  ALWAYS
208 };
209 
216 std::unique_ptr<column> empty_like(column_view const& input);
217 
224 std::unique_ptr<column> empty_like(scalar const& input);
225 
237 std::unique_ptr<column> allocate_like(
238  column_view const& input,
239  mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN,
240  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
241 
254 std::unique_ptr<column> allocate_like(
255  column_view const& input,
256  size_type size,
257  mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN,
258  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
259 
270 std::unique_ptr<table> empty_like(table_view const& input_table);
271 
303 void copy_range_in_place(column_view const& source,
304  mutable_column_view& target,
305  size_type source_begin,
306  size_type source_end,
307  size_type target_begin);
308 
338 std::unique_ptr<column> copy_range(
339  column_view const& source,
340  column_view const& target,
341  size_type source_begin,
342  size_type source_end,
343  size_type target_begin,
344  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
345 
382 std::unique_ptr<column> shift(
383  column_view const& input,
384  size_type offset,
385  scalar const& fill_value,
386  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
387 
418 std::vector<column_view> slice(column_view const& input, host_span<size_type const> indices);
423 std::vector<column_view> slice(column_view const& input, std::initializer_list<size_type> indices);
424 
457 std::vector<table_view> slice(table_view const& input, host_span<size_type const> indices);
462 std::vector<table_view> slice(table_view const& input, std::initializer_list<size_type> indices);
463 
496 std::vector<column_view> split(column_view const& input, host_span<size_type const> splits);
501 std::vector<column_view> split(column_view const& input, std::initializer_list<size_type> splits);
502 
537 std::vector<table_view> split(table_view const& input, host_span<size_type const> splits);
542 std::vector<table_view> split(table_view const& input, std::initializer_list<size_type> splits);
543 
558  struct metadata {
559  metadata() = default;
560 
566  metadata(std::vector<uint8_t>&& v) : data_(std::move(v)) {}
567 
573  [[nodiscard]] uint8_t const* data() const { return data_.data(); }
574 
580  [[nodiscard]] size_t size() const { return data_.size(); }
581 
582  private:
583  std::vector<uint8_t> data_;
584  };
585 
587  : metadata_(std::make_unique<metadata>()), gpu_data(std::make_unique<rmm::device_buffer>())
588  {
589  }
590 
597  packed_columns(std::unique_ptr<metadata>&& md, std::unique_ptr<rmm::device_buffer>&& gd)
598  : metadata_(std::move(md)), gpu_data(std::move(gd))
599  {
600  }
601 
602  std::unique_ptr<metadata> metadata_;
603  std::unique_ptr<rmm::device_buffer> gpu_data;
604 };
605 
620 struct packed_table {
623 };
624 
666 std::vector<packed_table> contiguous_split(
667  cudf::table_view const& input,
668  std::vector<size_type> const& splits,
669  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
670 
684  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
685 
700  uint8_t const* contiguous_buffer,
701  size_t buffer_size);
702 
718 
736 table_view unpack(uint8_t const* metadata, uint8_t const* gpu_data);
737 
757 std::unique_ptr<column> copy_if_else(
758  column_view const& lhs,
759  column_view const& rhs,
760  column_view const& boolean_mask,
761  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
762 
781 std::unique_ptr<column> copy_if_else(
782  scalar const& lhs,
783  column_view const& rhs,
784  column_view const& boolean_mask,
785  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
786 
805 std::unique_ptr<column> copy_if_else(
806  column_view const& lhs,
807  scalar const& rhs,
808  column_view const& boolean_mask,
809  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
810 
827 std::unique_ptr<column> copy_if_else(
828  scalar const& lhs,
829  scalar const& rhs,
830  column_view const& boolean_mask,
831  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
832 
869 std::unique_ptr<table> boolean_mask_scatter(
870  table_view const& input,
871  table_view const& target,
872  column_view const& boolean_mask,
873  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
874 
906 std::unique_ptr<table> boolean_mask_scatter(
907  std::vector<std::reference_wrapper<const scalar>> const& input,
908  table_view const& target,
909  column_view const& boolean_mask,
910  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
911 
925 std::unique_ptr<scalar> get_element(
926  column_view const& input,
927  size_type index,
928  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
929 
933 enum class sample_with_replacement : bool {
934  FALSE,
935  TRUE
936 };
937 
965 std::unique_ptr<table> sample(
966  table_view const& input,
967  size_type const n,
968  sample_with_replacement replacement = sample_with_replacement::FALSE,
969  int64_t const seed = 0,
970  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
971 
988 bool has_nonempty_nulls(column_view const& input);
989 
1013 
1081 std::unique_ptr<column> purge_nonempty_nulls(
1082  column_view const& input,
1083  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
1084 
1086 } // namespace cudf
per_device_resource.hpp
cudf::copy_range_in_place
void copy_range_in_place(column_view const &source, mutable_column_view &target, size_type source_begin, size_type source_end, size_type target_begin)
Copies a range of elements in-place from one column to another.
cudf::get_element
std::unique_ptr< scalar > get_element(column_view const &input, size_type index, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Get the element at specified index from a column.
cudf::packed_columns::gpu_data
std::unique_ptr< rmm::device_buffer > gpu_data
Device-side data buffer.
Definition: copying.hpp:603
cudf::unpack
table_view unpack(packed_columns const &input)
Deserialize the result of cudf::pack
cudf::mask_allocation_policy
mask_allocation_policy
Indicates when to allocate a mask, based on an existing mask.
Definition: copying.hpp:204
cudf::pack_metadata
packed_columns::metadata pack_metadata(table_view const &table, uint8_t const *contiguous_buffer, size_t buffer_size)
Produce the metadata used for packing a table stored in a contiguous buffer.
strings_column_view.hpp
Class definition for cudf::strings_column_view.
cudf::packed_columns::metadata::size
size_t size() const
Returns size of the metadata buffer.
Definition: copying.hpp:580
scalar.hpp
Class definitions for cudf::scalar.
cudf::size_type
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:80
cudf::io::NEVER
@ NEVER
Never use dictionary encoding.
Definition: io/types.hpp:107
cudf::packed_table::data
packed_columns data
Column data owned.
Definition: copying.hpp:622
cudf::empty_like
std::unique_ptr< column > empty_like(column_view const &input)
Initializes and returns an empty column of the same type as the input.
cudf::column_view
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
Definition: column_view.hpp:322
cudf::host_span
C++20 std::span with reduced feature set.
Definition: span.hpp:214
types.hpp
Type declarations for libcudf.
rmm
cudf::mutable_column_view
A non-owning, mutable view of device data as a column of elements, some of which may be null as indic...
Definition: column_view.hpp:485
structs_column_view.hpp
Class definition for cudf::structs_column_view.
cudf::table
A set of cudf::column's of the same size.
Definition: table.hpp:40
cudf::slice
std::vector< column_view > slice(column_view const &input, host_span< size_type const > indices)
Slices a column_view into a set of column_views according to a set of indices.
cudf::packed_columns::metadata::data
uint8_t const * data() const
Returns pointer to the host-side metadata buffer data.
Definition: copying.hpp:573
cudf::packed_columns::metadata
Host-side metadata buffer used for reconstructing columns via unpack.
Definition: copying.hpp:558
cudf::packed_columns::metadata::metadata
metadata(std::vector< uint8_t > &&v)
Construct a new metadata object.
Definition: copying.hpp:566
cudf::packed_table
The result(s) of a cudf::contiguous_split.
Definition: copying.hpp:620
cudf::packed_columns::metadata_
std::unique_ptr< metadata > metadata_
Host-side metadata buffer.
Definition: copying.hpp:602
cudf::out_of_bounds_policy::NULLIFY
@ NULLIFY
Output values corresponding to out-of-bounds indices are null.
cudf::gather
std::unique_ptr< table > gather(table_view const &source_table, column_view const &gather_map, out_of_bounds_policy bounds_policy=out_of_bounds_policy::DONT_CHECK, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Gathers the specified rows (including null values) of a set of columns.
cudf::packed_columns::packed_columns
packed_columns(std::unique_ptr< metadata > &&md, std::unique_ptr< rmm::device_buffer > &&gd)
Construct a new packed columns object.
Definition: copying.hpp:597
cudf::purge_nonempty_nulls
std::unique_ptr< column > purge_nonempty_nulls(column_view const &input, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Copy input into output while purging any non-empty null rows in the column or its descendants.
cudf::may_have_nonempty_nulls
bool may_have_nonempty_nulls(column_view const &input)
Approximates if a column or its descendants may have non-empty null elements.
cudf::pack
packed_columns pack(cudf::table_view const &input, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Deep-copy a table_view into a serialized contiguous memory format.
cudf::shift
std::unique_ptr< column > shift(column_view const &input, size_type offset, scalar const &fill_value, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Creates a new column by shifting all values by an offset.
cudf::packed_table::table
cudf::table_view table
Result table_view of a cudf::contiguous_split.
Definition: copying.hpp:621
cudf::scalar
An owning class to represent a singular value.
Definition: scalar.hpp:48
cudf::mask_allocation_policy::NEVER
@ NEVER
Do not allocate a null mask, regardless of input.
cudf::scatter
std::unique_ptr< table > scatter(table_view const &source, column_view const &scatter_map, table_view const &target, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Scatters the rows of the source table into a copy of the target table according to a scatter map.
cudf::table_view
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:187
cudf::split
std::vector< column_view > split(column_view const &input, host_span< size_type const > splits)
Splits a column_view into a set of column_views according to a set of indices derived from expected s...
cudf::boolean_mask_scatter
std::unique_ptr< table > boolean_mask_scatter(table_view const &input, table_view const &target, column_view const &boolean_mask, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Scatters rows from the input table to rows of the output corresponding to true values in a boolean ma...
cudf::copy_if_else
std::unique_ptr< column > copy_if_else(column_view const &lhs, column_view const &rhs, column_view const &boolean_mask, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Returns a new column, where each element is selected from either lhs or rhs based on the value of the...
cudf::sample_with_replacement::FALSE
@ FALSE
A row can be sampled only once.
cudf
cuDF interfaces
Definition: aggregation.hpp:34
cudf::reverse
std::unique_ptr< table > reverse(table_view const &source_table, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Reverses the rows within a table.
cudf::has_nonempty_nulls
bool has_nonempty_nulls(column_view const &input)
Checks if a column or its descendants have non-empty null rows.
cudf::contiguous_split
std::vector< packed_table > contiguous_split(cudf::table_view const &input, std::vector< size_type > const &splits, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs a deep-copy split of a table_view into a set of table_views into a single contiguous block o...
cudf::out_of_bounds_policy
out_of_bounds_policy
Policy to account for possible out-of-bounds indices.
Definition: copying.hpp:48
cudf::sample
std::unique_ptr< table > sample(table_view const &input, size_type const n, sample_with_replacement replacement=sample_with_replacement::FALSE, int64_t const seed=0, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Gather n samples from given input randomly.
table.hpp
Class definition for cudf::table.
cudf::packed_columns
Column data in a serialized format.
Definition: copying.hpp:552
rmm::mr::device_memory_resource
column_view.hpp
column view class definitions
cudf::copy_range
std::unique_ptr< column > copy_range(column_view const &source, column_view const &target, size_type source_begin, size_type source_end, size_type target_begin, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Copies a range of elements out-of-place from one column to another.
cudf::sample_with_replacement
sample_with_replacement
Indicates whether a row can be sampled more than once.
Definition: copying.hpp:933
lists_column_view.hpp
Class definition for cudf::lists_column_view.
cudf::allocate_like
std::unique_ptr< column > allocate_like(column_view const &input, mask_allocation_policy mask_alloc=mask_allocation_policy::RETAIN, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Creates an uninitialized new column of the same size and type as the input.