Transformation Replace#
- group transformation_replace
Enums
-
enum class replace_policy : bool#
Policy to specify the position of replacement values relative to null rows.
PRECEDING
means the replacement value is the first non-null value preceding the null row.FOLLOWING
means the replacement value is the first non-null value following the null row.Values:
-
enumerator PRECEDING#
-
enumerator FOLLOWING#
-
enumerator PRECEDING#
Functions
-
std::unique_ptr<column> replace_nulls(column_view const &input, column_view const &replacement, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())#
Replaces all null values in a column with corresponding values of another column.
If
input[i]
is NULL, thenoutput[i]
will containreplacement[i]
.input
andreplacement
must be of the same type and size.- Parameters:
input – [in] A column whose null values will be replaced
replacement – [in] A cudf::column whose values will replace null values in input
stream – CUDA stream used for device memory operations and kernel launches
mr – [in] Device memory resource used to allocate device memory of the returned column
- Returns:
A copy of
input
with the null values replaced with corresponding values fromreplacement
.
-
std::unique_ptr<column> replace_nulls(column_view const &input, scalar const &replacement, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())#
Replaces all null values in a column with a scalar.
If
input[i]
is NULL, thenoutput[i]
will containreplacement
.input
andreplacement
must have the same type.- Parameters:
input – [in] A column whose null values will be replaced
replacement – [in] Scalar used to replace null values in
input
stream – CUDA stream used for device memory operations and kernel launches
mr – [in] Device memory resource used to allocate device memory of the returned column
- Returns:
Copy of
input
with null values replaced byreplacement
-
std::unique_ptr<column> replace_nulls(column_view const &input, replace_policy const &replace_policy, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())#
Replaces all null values in a column with the first non-null value that precedes/follows.
If
input[i]
is NULL, thenoutput[i]
will contain the first non-null value that precedes or follows the null value, based onreplace_policy
.- Parameters:
input – [in] A column whose null values will be replaced
replace_policy – [in] Specify the position of replacement values relative to null values
stream – CUDA stream used for device memory operations and kernel launches
mr – [in] Device memory resource used to allocate device memory of the returned column
- Returns:
Copy of
input
with null values replaced based onreplace_policy
-
std::unique_ptr<column> replace_nans(column_view const &input, column_view const &replacement, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())#
Replaces all NaN values in a column with corresponding values from another column.
If
input[i]
is NaN, thenoutput[i]
will containreplacement[i]
.input = {1.0, NaN, 4.0} replacement = {3.0, 9.0, 7.0} output = {1.0, 9.0, 4.0}
Note
Nulls are not considered as NaN
- Throws:
cudf::logic_error – If
input
andreplacement
are of different type or size.cudf::logic_error – If
input
orreplacement
are not of floating-point dtype.
- Parameters:
input – A column whose NaN values will be replaced
replacement – A cudf::column whose values will replace NaN values in input
stream – CUDA stream used for device memory operations and kernel launches
mr – Device memory resource used to allocate the returned column’s device memory
- Returns:
A copy of
input
with the NaN values replaced with corresponding values fromreplacement
.
-
std::unique_ptr<column> replace_nans(column_view const &input, scalar const &replacement, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())#
Replaces all NaN values in a column with a scalar.
If
input[i]
is NaN, thenoutput[i]
will containreplacement
.input = {1.0, NaN, 4.0} replacement = 7.0 output = {1.0, 7.0, 4.0}
Note
Nulls are not considered as NaN
- Throws:
cudf::logic_error – If
input
andreplacement
are of different type.cudf::logic_error – If
input
orreplacement
are not of floating-point dtype.
- Parameters:
input – A column whose NaN values will be replaced
replacement – A cudf::scalar whose value will replace NaN values in input
stream – CUDA stream used for device memory operations and kernel launches
mr – Device memory resource used to allocate the returned column’s device memory
- Returns:
A copy of
input
with the NaN values replaced byreplacement
-
std::unique_ptr<column> find_and_replace_all(column_view const &input_col, column_view const &values_to_replace, column_view const &replacement_values, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())#
Return a copy of
input_col
replacing anyvalues_to_replace[i]
found withreplacement_values[i]
.- Parameters:
input_col – The column to find and replace values in
values_to_replace – The values to replace
replacement_values – The values to replace with
stream – CUDA stream used for device memory operations and kernel launches
mr – Device memory resource used to allocate the returned column’s device memory
- Returns:
Copy of
input_col
with specified values replaced
-
std::unique_ptr<column> clamp(column_view const &input, scalar const &lo, scalar const &lo_replace, scalar const &hi, scalar const &hi_replace, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())#
Replaces values less than
lo
ininput
withlo_replace
, and values greater thanhi
withhi_replace
.if
lo
is invalid, then lo will not be considered while evaluating the input (Essentially considered minimum value of that type). ifhi
is invalid, then hi will not be considered while evaluating the input (Essentially considered maximum value of that type).Example: input: {1, 2, 3, NULL, 5, 6, 7} valid lo and hi lo: 3, hi: 5, lo_replace : 0, hi_replace : 16 output:{0, 0, 3, NULL, 5, 16, 16} invalid lo lo: NULL, hi: 5, lo_replace : 0, hi_replace : 16 output:{1, 2, 3, NULL, 5, 16, 16} invalid hi lo: 3, hi: NULL, lo_replace : 0, hi_replace : 16 output:{0, 0, 3, NULL, 5, 6, 7}
Note
: If
lo
is valid thenlo_replace
should be valid Ifhi
is valid thenhi_replace
should be valid- Throws:
cudf::logic_error – if
lo.type() != hi.type()
cudf::logic_error – if
lo_replace.type() != hi_replace.type()
cudf::logic_error – if
lo.type() != lo_replace.type()
cudf::logic_error – if
lo.type() != input.type()
- Parameters:
input – [in] Column whose elements will be clamped
lo – [in] Minimum clamp value. All elements less than
lo
will be replaced bylo_replace
Ignored if null.lo_replace – [in] All elements less than
lo
will be replaced bylo_replace
hi – [in] Maximum clamp value. All elements greater than
hi
will be replaced byhi_replace
. Ignored if null.hi_replace – [in] All elements greater than
hi
will be replaced byhi_replace
stream – CUDA stream used for device memory operations and kernel launches
mr – [in] Device memory resource used to allocate device memory of the returned column
- Returns:
Returns a clamped column as per
lo
andhi
boundaries
-
std::unique_ptr<column> clamp(column_view const &input, scalar const &lo, scalar const &hi, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())#
Replaces values less than
lo
ininput
withlo
, and values greater thanhi
withhi
.if
lo
is invalid, then lo will not be considered while evaluating the input (Essentially considered minimum value of that type). ifhi
is invalid, then hi will not be considered while evaluating the input (Essentially considered maximum value of that type).Example: input: {1, 2, 3, NULL, 5, 6, 7} valid lo and hi lo: 3, hi: 5 output:{3, 3, 3, NULL, 5, 5, 5} invalid lo lo: NULL, hi:5 output:{1, 2, 3, NULL, 5, 5, 5} invalid hi lo: 3, hi:NULL output:{3, 3, 3, NULL, 5, 6, 7}
- Throws:
cudf::logic_error – if
lo.type() != hi.type()
cudf::logic_error – if
lo.type() != input.type()
- Parameters:
input – [in] Column whose elements will be clamped
lo – [in] Minimum clamp value. All elements less than
lo
will be replaced bylo
Ignored if null.hi – [in] Maximum clamp value. All elements greater than
hi
will be replaced byhi
Ignored if null.stream – CUDA stream used for device memory operations and kernel launches
mr – [in] Device memory resource used to allocate device memory of the returned column
- Returns:
Returns a clamped column as per
lo
andhi
boundaries
-
std::unique_ptr<column> normalize_nans_and_zeros(column_view const &input, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())#
Copies from a column of floating-point elements and replaces
-NaN
and-0.0
with+NaN
and+0.0
, respectively.Converts floating point values from
input
using the following rules: Convert -NaN -> NaN Convert -0.0 -> 0.0- Throws:
cudf::logic_error – if column does not have floating point data type.
- Parameters:
input – [in] column_view of floating-point elements to copy and normalize
stream – CUDA stream used for device memory operations and kernel launches
mr – [in] device_memory_resource allocator for allocating output data
- Returns:
new column with the modified data
-
void normalize_nans_and_zeros(mutable_column_view &in_out, rmm::cuda_stream_view stream = cudf::get_default_stream())#
Modifies a column of floating-point elements to replace all
-NaN
and-0.0
with+NaN
and+0.0
, respectively.Converts floating point values from
in_out
using the following rules: Convert -NaN -> NaN Convert -0.0 -> 0.0- Throws:
cudf::logic_error – if column does not have floating point data type.
- Parameters:
in_out – [inout] of floating-point elements to normalize
stream – CUDA stream used for device memory operations and kernel launches
-
enum class replace_policy : bool#