diff --git a/datafusion/functions/src/unicode/reverse.rs b/datafusion/functions/src/unicode/reverse.rs index 52666cc57059b..da16d3ee37520 100644 --- a/datafusion/functions/src/unicode/reverse.rs +++ b/datafusion/functions/src/unicode/reverse.rs @@ -18,12 +18,14 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait}; +use arrow::array::{ + Array, ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray, + OffsetSizeTrait, +}; use arrow::datatypes::DataType; - -use datafusion_common::cast::as_generic_string_array; use datafusion_common::{exec_err, Result}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use DataType::{LargeUtf8, Utf8, Utf8View}; use crate::utils::{make_scalar_function, utf8_to_str_type}; @@ -44,7 +46,7 @@ impl ReverseFunc { Self { signature: Signature::uniform( 1, - vec![Utf8, LargeUtf8], + vec![Utf8View, Utf8, LargeUtf8], Volatility::Immutable, ), } @@ -70,8 +72,8 @@ impl ScalarUDFImpl for ReverseFunc { fn invoke(&self, args: &[ColumnarValue]) -> Result { match args[0].data_type() { - DataType::Utf8 => make_scalar_function(reverse::, vec![])(args), - DataType::LargeUtf8 => make_scalar_function(reverse::, vec![])(args), + Utf8 | Utf8View => make_scalar_function(reverse::, vec![])(args), + LargeUtf8 => make_scalar_function(reverse::, vec![])(args), other => { exec_err!("Unsupported data type {other:?} for function reverse") } @@ -83,10 +85,17 @@ impl ScalarUDFImpl for ReverseFunc { /// reverse('abcde') = 'edcba' /// The implementation uses UTF-8 code points as characters pub fn reverse(args: &[ArrayRef]) -> Result { - let string_array = as_generic_string_array::(&args[0])?; + if args[0].data_type() == &Utf8View { + reverse_impl::(args[0].as_string_view()) + } else { + reverse_impl::(args[0].as_string::()) + } +} - let result = string_array - .iter() +fn reverse_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor>( + string_array: V, +) -> Result { + let result = ArrayIter::new(string_array) .map(|string| string.map(|string: &str| string.chars().rev().collect::())) .collect::>(); @@ -95,8 +104,8 @@ pub fn reverse(args: &[ArrayRef]) -> Result { #[cfg(test)] mod tests { - use arrow::array::{Array, StringArray}; - use arrow::datatypes::DataType::Utf8; + use arrow::array::{Array, LargeStringArray, StringArray}; + use arrow::datatypes::DataType::{LargeUtf8, Utf8}; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; @@ -104,50 +113,49 @@ mod tests { use crate::unicode::reverse::ReverseFunc; use crate::utils::test::test_function; + macro_rules! test_reverse { + ($INPUT:expr, $EXPECTED:expr) => { + test_function!( + ReverseFunc::new(), + &[ColumnarValue::Scalar(ScalarValue::Utf8($INPUT))], + $EXPECTED, + &str, + Utf8, + StringArray + ); + + test_function!( + ReverseFunc::new(), + &[ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT))], + $EXPECTED, + &str, + LargeUtf8, + LargeStringArray + ); + + test_function!( + ReverseFunc::new(), + &[ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT))], + $EXPECTED, + &str, + Utf8, + StringArray + ); + }; + } + #[test] fn test_functions() -> Result<()> { - test_function!( - ReverseFunc::new(), - &[ColumnarValue::Scalar(ScalarValue::from("abcde"))], - Ok(Some("edcba")), - &str, - Utf8, - StringArray - ); - test_function!( - ReverseFunc::new(), - &[ColumnarValue::Scalar(ScalarValue::from("loẅks"))], - Ok(Some("sk̈wol")), - &str, - Utf8, - StringArray - ); - test_function!( - ReverseFunc::new(), - &[ColumnarValue::Scalar(ScalarValue::from("loẅks"))], - Ok(Some("sk̈wol")), - &str, - Utf8, - StringArray - ); - test_function!( - ReverseFunc::new(), - &[ColumnarValue::Scalar(ScalarValue::Utf8(None))], - Ok(None), - &str, - Utf8, - StringArray - ); + test_reverse!(Some("abcde".into()), Ok(Some("edcba"))); + test_reverse!(Some("loẅks".into()), Ok(Some("sk̈wol"))); + test_reverse!(Some("loẅks".into()), Ok(Some("sk̈wol"))); + test_reverse!(None, Ok(None)); #[cfg(not(feature = "unicode_expressions"))] - test_function!( - ReverseFunc::new(), - &[ColumnarValue::Scalar(ScalarValue::from("abcde"))], + test_reverse!( + Some("abcde".into()), internal_err!( "function reverse requires compilation with feature flag: unicode_expressions." ), - &str, - Utf8, - StringArray ); Ok(()) diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt index bea3016a21d32..0ffc6c91a0a45 100644 --- a/datafusion/sqllogictest/test_files/functions.slt +++ b/datafusion/sqllogictest/test_files/functions.slt @@ -234,6 +234,16 @@ SELECT reverse('abcde') ---- edcba +query T +SELECT reverse(arrow_cast('abcde', 'LargeUtf8')) +---- +edcba + +query T +SELECT reverse(arrow_cast('abcde', 'Utf8View')) +---- +edcba + query T SELECT reverse(arrow_cast('abcde', 'Dictionary(Int32, Utf8)')) ---- @@ -244,11 +254,31 @@ SELECT reverse('loẅks') ---- sk̈wol +query T +SELECT reverse(arrow_cast('loẅks', 'LargeUtf8')) +---- +sk̈wol + +query T +SELECT reverse(arrow_cast('loẅks', 'Utf8View')) +---- +sk̈wol + query T SELECT reverse(NULL) ---- NULL +query T +SELECT reverse(arrow_cast(NULL, 'LargeUtf8')) +---- +NULL + +query T +SELECT reverse(arrow_cast(NULL, 'Utf8View')) +---- +NULL + query T SELECT right('abcde', -2) ---- diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt index 2381bd122bdda..ca4e6699e07bc 100644 --- a/datafusion/sqllogictest/test_files/string_view.slt +++ b/datafusion/sqllogictest/test_files/string_view.slt @@ -884,14 +884,13 @@ logical_plan 03)----TableScan: test projection=[column1_utf8view, column2_utf8view] ## Ensure no casts for REVERSE -## TODO file ticket query TT EXPLAIN SELECT REVERSE(column1_utf8view) as c1 FROM test; ---- logical_plan -01)Projection: reverse(CAST(test.column1_utf8view AS Utf8)) AS c1 +01)Projection: reverse(test.column1_utf8view) AS c1 02)--TableScan: test projection=[column1_utf8view]