copying.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2022, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
21 #include <cudf/scalar/scalar.hpp>
24 #include <cudf/table/table.hpp>
25 #include <cudf/types.hpp>
26 
27 #include <memory>
28 #include <vector>
29 
30 namespace cudf {
31 
46 enum class out_of_bounds_policy : bool {
47  NULLIFY,
48  DONT_CHECK
49 };
50 
81 std::unique_ptr<table> gather(
82  table_view const& source_table,
83  column_view const& gather_map,
85  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
86 
99 std::unique_ptr<table> reverse(
100  table_view const& source_table,
101  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
102 
115 std::unique_ptr<column> reverse(
116  column_view const& source_column,
117  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
118 
156 std::unique_ptr<table> scatter(
157  table_view const& source,
158  column_view const& scatter_map,
159  table_view const& target,
160  bool check_bounds = false,
161  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
162 
196 std::unique_ptr<table> scatter(
197  std::vector<std::reference_wrapper<const scalar>> const& source,
198  column_view const& indices,
199  table_view const& target,
200  bool check_bounds = false,
201  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
202 
207  NEVER,
208  RETAIN,
209  ALWAYS
210 };
211 
218 std::unique_ptr<column> empty_like(column_view const& input);
219 
226 std::unique_ptr<column> empty_like(scalar const& input);
227 
238 std::unique_ptr<column> allocate_like(
239  column_view const& input,
240  mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN,
241  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
242 
254 std::unique_ptr<column> allocate_like(
255  column_view const& input,
256  size_type size,
257  mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN,
258  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
259 
270 std::unique_ptr<table> empty_like(table_view const& input_table);
271 
303 void copy_range_in_place(column_view const& source,
304  mutable_column_view& target,
305  size_type source_begin,
306  size_type source_end,
307  size_type target_begin);
308 
338 std::unique_ptr<column> copy_range(
339  column_view const& source,
340  column_view const& target,
341  size_type source_begin,
342  size_type source_end,
343  size_type target_begin,
344  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
345 
380 std::unique_ptr<column> shift(
381  column_view const& input,
382  size_type offset,
383  scalar const& fill_value,
384  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
385 
416 std::vector<column_view> slice(column_view const& input, host_span<size_type const> indices);
421 std::vector<column_view> slice(column_view const& input, std::initializer_list<size_type> indices);
422 
455 std::vector<table_view> slice(table_view const& input, host_span<size_type const> indices);
460 std::vector<table_view> slice(table_view const& input, std::initializer_list<size_type> indices);
461 
494 std::vector<column_view> split(column_view const& input, host_span<size_type const> splits);
499 std::vector<column_view> split(column_view const& input, std::initializer_list<size_type> splits);
500 
535 std::vector<table_view> split(table_view const& input, host_span<size_type const> splits);
540 std::vector<table_view> split(table_view const& input, std::initializer_list<size_type> splits);
541 
556  struct metadata {
557  metadata() = default;
558  metadata(std::vector<uint8_t>&& v) : data_(std::move(v)) {}
559  [[nodiscard]] uint8_t const* data() const { return data_.data(); }
560  [[nodiscard]] size_t size() const { return data_.size(); }
561 
562  private:
563  std::vector<uint8_t> data_;
564  };
565 
567  : metadata_(std::make_unique<metadata>()), gpu_data(std::make_unique<rmm::device_buffer>())
568  {
569  }
570  packed_columns(std::unique_ptr<metadata>&& md, std::unique_ptr<rmm::device_buffer>&& gd)
571  : metadata_(std::move(md)), gpu_data(std::move(gd))
572  {
573  }
574 
575  std::unique_ptr<metadata> metadata_;
576  std::unique_ptr<rmm::device_buffer> gpu_data;
577 };
578 
593 struct packed_table {
595  packed_columns data;
596 };
597 
639 std::vector<packed_table> contiguous_split(
640  cudf::table_view const& input,
641  std::vector<size_type> const& splits,
642  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
643 
657  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
658 
673  uint8_t const* contiguous_buffer,
674  size_t buffer_size);
675 
691 
709 table_view unpack(uint8_t const* metadata, uint8_t const* gpu_data);
710 
730 std::unique_ptr<column> copy_if_else(
731  column_view const& lhs,
732  column_view const& rhs,
733  column_view const& boolean_mask,
734  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
735 
754 std::unique_ptr<column> copy_if_else(
755  scalar const& lhs,
756  column_view const& rhs,
757  column_view const& boolean_mask,
758  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
759 
778 std::unique_ptr<column> copy_if_else(
779  column_view const& lhs,
780  scalar const& rhs,
781  column_view const& boolean_mask,
782  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
783 
800 std::unique_ptr<column> copy_if_else(
801  scalar const& lhs,
802  scalar const& rhs,
803  column_view const& boolean_mask,
804  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
805 
842 std::unique_ptr<table> boolean_mask_scatter(
843  table_view const& input,
844  table_view const& target,
845  column_view const& boolean_mask,
846  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
847 
879 std::unique_ptr<table> boolean_mask_scatter(
880  std::vector<std::reference_wrapper<const scalar>> const& input,
881  table_view const& target,
882  column_view const& boolean_mask,
883  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
884 
898 std::unique_ptr<scalar> get_element(
899  column_view const& input,
900  size_type index,
901  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
902 
906 enum class sample_with_replacement : bool {
907  FALSE,
908  TRUE
909 };
910 
938 std::unique_ptr<table> sample(
939  table_view const& input,
940  size_type const n,
941  sample_with_replacement replacement = sample_with_replacement::FALSE,
942  int64_t const seed = 0,
943  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
944 
961 bool has_nonempty_nulls(column_view const& input);
962 
986 
1019 std::unique_ptr<column> purge_nonempty_nulls(
1020  lists_column_view const& input,
1021  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
1022 
1055 std::unique_ptr<column> purge_nonempty_nulls(
1056  strings_column_view const& input,
1057  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
1058 
1091 std::unique_ptr<column> purge_nonempty_nulls(
1092  structs_column_view const& input,
1093  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
1094 
1096 } // namespace cudf
cudf::structs_column_view
Definition: structs_column_view.hpp:33
cudf::copy_range_in_place
void copy_range_in_place(column_view const &source, mutable_column_view &target, size_type source_begin, size_type source_end, size_type target_begin)
Copies a range of elements in-place from one column to another.
cudf::get_element
std::unique_ptr< scalar > get_element(column_view const &input, size_type index, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Get the element at specified index from a column.
cudf::unpack
table_view unpack(packed_columns const &input)
Deserialize the result of cudf::pack
cudf::mask_allocation_policy
mask_allocation_policy
Indicates when to allocate a mask, based on an existing mask.
Definition: copying.hpp:206
cudf::pack_metadata
packed_columns::metadata pack_metadata(table_view const &table, uint8_t const *contiguous_buffer, size_t buffer_size)
Produce the metadata used for packing a table stored in a contiguous buffer.
strings_column_view.hpp
Class definition for cudf::strings_column_view.
scalar.hpp
Class definitions for cudf::scalar.
cudf::size_type
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
cudf::empty_like
std::unique_ptr< column > empty_like(column_view const &input)
Initializes and returns an empty column of the same type as the input.
cudf::column_view
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
Definition: column_view.hpp:300
cudf::host_span
Definition: span.hpp:130
types.hpp
Type declarations for libcudf.
rmm
cudf::mutable_column_view
A non-owning, mutable view of device data as a column of elements, some of which may be null as indic...
Definition: column_view.hpp:448
structs_column_view.hpp
Class definition for cudf::structs_column_view.
cudf::table
A set of cudf::column's of the same size.
Definition: table.hpp:38
cudf::slice
std::vector< column_view > slice(column_view const &input, host_span< size_type const > indices)
Slices a column_view into a set of column_views according to a set of indices.
cudf::packed_columns::metadata
Host-side metadata buffer used for reconstructing columns via unpack.
Definition: copying.hpp:556
cudf::purge_nonempty_nulls
std::unique_ptr< column > purge_nonempty_nulls(lists_column_view const &input, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Copies input, purging any non-empty null rows in the column or its descendants.
cudf::packed_table
The result(s) of a contiguous_split
Definition: copying.hpp:593
cudf::out_of_bounds_policy::NULLIFY
@ NULLIFY
Output values corresponding to out-of-bounds indices are null.
cudf::gather
std::unique_ptr< table > gather(table_view const &source_table, column_view const &gather_map, out_of_bounds_policy bounds_policy=out_of_bounds_policy::DONT_CHECK, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Gathers the specified rows (including null values) of a set of columns.
cudf::may_have_nonempty_nulls
bool may_have_nonempty_nulls(column_view const &input)
Approximates if a column or its descendants may have non-empty null elements.
cudf::pack
packed_columns pack(cudf::table_view const &input, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Deep-copy a table_view into a serialized contiguous memory format.
cudf::shift
std::unique_ptr< column > shift(column_view const &input, size_type offset, scalar const &fill_value, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Creates a new column by shifting all values by an offset.
cudf::scalar
An owning class to represent a singular value.
Definition: scalar.hpp:46
cudf::mask_allocation_policy::NEVER
@ NEVER
Do not allocate a null mask, regardless of input.
cudf::table_view
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:154
cudf::split
std::vector< column_view > split(column_view const &input, host_span< size_type const > splits)
Splits a column_view into a set of column_views according to a set of indices derived from expected s...
cudf::boolean_mask_scatter
std::unique_ptr< table > boolean_mask_scatter(table_view const &input, table_view const &target, column_view const &boolean_mask, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Scatters rows from the input table to rows of the output corresponding to true values in a boolean ma...
cudf::copy_if_else
std::unique_ptr< column > copy_if_else(column_view const &lhs, column_view const &rhs, column_view const &boolean_mask, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Returns a new column, where each element is selected from either lhs or rhs based on the value of the...
cudf::sample_with_replacement::FALSE
@ FALSE
A row can be sampled only once.
cudf
cuDF interfaces
Definition: aggregation.hpp:34
cudf::reverse
std::unique_ptr< table > reverse(table_view const &source_table, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Reverses the rows within a table. Creates a new table that is the reverse of source_table....
cudf::has_nonempty_nulls
bool has_nonempty_nulls(column_view const &input)
Checks if a column or its descendants have non-empty null rows.
cudf::strings_column_view
Given a column-view of strings type, an instance of this class provides a wrapper on this compound co...
Definition: strings_column_view.hpp:36
cudf::contiguous_split
std::vector< packed_table > contiguous_split(cudf::table_view const &input, std::vector< size_type > const &splits, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs a deep-copy split of a table_view into a set of table_views into a single contiguous block o...
cudf::lists_column_view
Given a column-view of lists type, an instance of this class provides a wrapper on this compound colu...
Definition: lists_column_view.hpp:39
cudf::out_of_bounds_policy
out_of_bounds_policy
Policy to account for possible out-of-bounds indices.
Definition: copying.hpp:46
cudf::sample
std::unique_ptr< table > sample(table_view const &input, size_type const n, sample_with_replacement replacement=sample_with_replacement::FALSE, int64_t const seed=0, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Gather n samples from given input randomly.
table.hpp
Class definition for cudf::table.
cudf::scatter
std::unique_ptr< table > scatter(table_view const &source, column_view const &scatter_map, table_view const &target, bool check_bounds=false, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Scatters the rows of the source table into a copy of the target table according to a scatter map.
cudf::packed_columns
Column data in a serialized format.
Definition: copying.hpp:550
rmm::mr::device_memory_resource
column_view.hpp
column view class definitions
cudf::copy_range
std::unique_ptr< column > copy_range(column_view const &source, column_view const &target, size_type source_begin, size_type source_end, size_type target_begin, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Copies a range of elements out-of-place from one column to another.
cudf::sample_with_replacement
sample_with_replacement
Indicates whether a row can be sampled more than once.
Definition: copying.hpp:906
lists_column_view.hpp
Class definition for cudf::lists_column_view.
cudf::allocate_like
std::unique_ptr< column > allocate_like(column_view const &input, mask_allocation_policy mask_alloc=mask_allocation_policy::RETAIN, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Creates an uninitialized new column of the same size and type as the input. Supports only fixed-width...