From 40ba5d1edae3d631bba590a33367d5ee6a596006 Mon Sep 17 00:00:00 2001 From: comphead Date: Wed, 11 Mar 2026 12:29:05 -0700 Subject: [PATCH] feat: correct struct column names for `arrays_zip` return type --- datafusion/functions-nested/src/arrays_zip.rs | 4 +- datafusion/sqllogictest/test_files/array.slt | 62 +++++++++---------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/datafusion/functions-nested/src/arrays_zip.rs b/datafusion/functions-nested/src/arrays_zip.rs index 2ac30d07046e2..5035439286fbb 100644 --- a/datafusion/functions-nested/src/arrays_zip.rs +++ b/datafusion/functions-nested/src/arrays_zip.rs @@ -125,7 +125,7 @@ impl ScalarUDFImpl for ArraysZip { return exec_err!("arrays_zip expects array arguments, got {dt}"); } }; - fields.push(Field::new(format!("c{i}"), element_type, true)); + fields.push(Field::new(format!("{}", i + 1), element_type, true)); } Ok(List(Arc::new(Field::new_list_field( @@ -227,7 +227,7 @@ fn arrays_zip_inner(args: &[ArrayRef]) -> Result { let struct_fields: Fields = element_types .iter() .enumerate() - .map(|(i, dt)| Field::new(format!("c{i}"), dt.clone(), true)) + .map(|(i, dt)| Field::new(format!("{}", i + 1), dt.clone(), true)) .collect::>() .into(); diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 112351c5efa73..83e9c9cc9c409 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -7112,61 +7112,61 @@ from array_distinct_table_2D_fixed; query ? select arrays_zip([1, 2, 3], [2, 3, 4]); ---- -[{c0: 1, c1: 2}, {c0: 2, c1: 3}, {c0: 3, c1: 4}] +[{1: 1, 2: 2}, {1: 2, 2: 3}, {1: 3, 2: 4}] # Spark example: arrays_zip(array(1, 2), array(2, 3), array(3, 4)) query ? select arrays_zip([1, 2], [2, 3], [3, 4]); ---- -[{c0: 1, c1: 2, c2: 3}, {c0: 2, c1: 3, c2: 4}] +[{1: 1, 2: 2, 3: 3}, {1: 2, 2: 3, 3: 4}] # basic: two integer arrays of equal length query ? select arrays_zip([1, 2, 3], [10, 20, 30]); ---- -[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: 30}] +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] # basic: two arrays with different element types (int + string) query ? select arrays_zip([1, 2, 3], ['a', 'b', 'c']); ---- -[{c0: 1, c1: a}, {c0: 2, c1: b}, {c0: 3, c1: c}] +[{1: 1, 2: a}, {1: 2, 2: b}, {1: 3, 2: c}] # three arrays of equal length query ? select arrays_zip([1, 2, 3], [10, 20, 30], [100, 200, 300]); ---- -[{c0: 1, c1: 10, c2: 100}, {c0: 2, c1: 20, c2: 200}, {c0: 3, c1: 30, c2: 300}] +[{1: 1, 2: 10, 3: 100}, {1: 2, 2: 20, 3: 200}, {1: 3, 2: 30, 3: 300}] # four arrays of equal length query ? select arrays_zip([1], [2], [3], [4]); ---- -[{c0: 1, c1: 2, c2: 3, c3: 4}] +[{1: 1, 2: 2, 3: 3, 4: 4}] # mixed element types: float + boolean query ? select arrays_zip([1.5, 2.5], [true, false]); ---- -[{c0: 1.5, c1: true}, {c0: 2.5, c1: false}] +[{1: 1.5, 2: true}, {1: 2.5, 2: false}] # different length arrays: shorter array padded with NULLs query ? select arrays_zip([1, 2], [3, 4, 5]); ---- -[{c0: 1, c1: 3}, {c0: 2, c1: 4}, {c0: NULL, c1: 5}] +[{1: 1, 2: 3}, {1: 2, 2: 4}, {1: NULL, 2: 5}] # different length arrays: first longer query ? select arrays_zip([1, 2, 3], [10]); ---- -[{c0: 1, c1: 10}, {c0: 2, c1: NULL}, {c0: 3, c1: NULL}] +[{1: 1, 2: 10}, {1: 2, 2: NULL}, {1: 3, 2: NULL}] # different length: one single element, other three elements query ? select arrays_zip([1], ['a', 'b', 'c']); ---- -[{c0: 1, c1: a}, {c0: NULL, c1: b}, {c0: NULL, c1: c}] +[{1: 1, 2: a}, {1: NULL, 2: b}, {1: NULL, 2: c}] # empty arrays query ? @@ -7178,19 +7178,19 @@ select arrays_zip([], []); query ? select arrays_zip([], [1, 2, 3]); ---- -[{c0: NULL, c1: 1}, {c0: NULL, c1: 2}, {c0: NULL, c1: 3}] +[{1: NULL, 2: 1}, {1: NULL, 2: 2}, {1: NULL, 2: 3}] # NULL elements inside arrays query ? select arrays_zip([1, NULL, 3], ['a', 'b', 'c']); ---- -[{c0: 1, c1: a}, {c0: NULL, c1: b}, {c0: 3, c1: c}] +[{1: 1, 2: a}, {1: NULL, 2: b}, {1: 3, 2: c}] # all NULL elements query ? select arrays_zip([NULL::int, NULL, NULL], [NULL::text, NULL, NULL]); ---- -[{c0: NULL, c1: NULL}, {c0: NULL, c1: NULL}, {c0: NULL, c1: NULL}] +[{1: NULL, 2: NULL}, {1: NULL, 2: NULL}, {1: NULL, 2: NULL}] # both args are NULL (entire list null) query ? @@ -7202,35 +7202,35 @@ NULL query ? select arrays_zip(NULL::int[], [1, 2, 3]); ---- -[{c0: NULL, c1: 1}, {c0: NULL, c1: 2}, {c0: NULL, c1: 3}] +[{1: NULL, 2: 1}, {1: NULL, 2: 2}, {1: NULL, 2: 3}] # real array + NULL list query ? select arrays_zip([1, 2], NULL::text[]); ---- -[{c0: 1, c1: NULL}, {c0: 2, c1: NULL}] +[{1: 1, 2: NULL}, {1: 2, 2: NULL}] # column-level test with multiple rows query ? select arrays_zip(a, b) from (values ([1, 2], [10, 20]), ([3, 4, 5], [30]), ([6], [60, 70])) as t(a, b); ---- -[{c0: 1, c1: 10}, {c0: 2, c1: 20}] -[{c0: 3, c1: 30}, {c0: 4, c1: NULL}, {c0: 5, c1: NULL}] -[{c0: 6, c1: 60}, {c0: NULL, c1: 70}] +[{1: 1, 2: 10}, {1: 2, 2: 20}] +[{1: 3, 2: 30}, {1: 4, 2: NULL}, {1: 5, 2: NULL}] +[{1: 6, 2: 60}, {1: NULL, 2: 70}] # column-level test with NULL rows query ? select arrays_zip(a, b) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); ---- -[{c0: 1, c1: 10}, {c0: 2, c1: 20}] -[{c0: NULL, c1: 30}, {c0: NULL, c1: 40}] -[{c0: 5, c1: NULL}, {c0: 6, c1: NULL}] +[{1: 1, 2: 10}, {1: 2, 2: 20}] +[{1: NULL, 2: 30}, {1: NULL, 2: 40}] +[{1: 5, 2: NULL}, {1: 6, 2: NULL}] # alias: list_zip query ? select list_zip([1, 2], [3, 4]); ---- -[{c0: 1, c1: 3}, {c0: 2, c1: 4}] +[{1: 1, 2: 3}, {1: 2, 2: 4}] # column test: total values equal (3 each) but per-row lengths differ # a: [1] b: [10, 20] → row 0: a has 1, b has 2 @@ -7239,14 +7239,14 @@ select list_zip([1, 2], [3, 4]); query ? select arrays_zip(a, b) from (values ([1], [10, 20]), ([2, 3], [30])) as t(a, b); ---- -[{c0: 1, c1: 10}, {c0: NULL, c1: 20}] -[{c0: 2, c1: 30}, {c0: 3, c1: NULL}] +[{1: 1, 2: 10}, {1: NULL, 2: 20}] +[{1: 2, 2: 30}, {1: 3, 2: NULL}] # single element arrays query ? select arrays_zip([42], ['hello']); ---- -[{c0: 42, c1: hello}] +[{1: 42, 2: hello}] # error: too few arguments statement error @@ -7259,7 +7259,7 @@ select arrays_zip( arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)') ); ---- -[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: 30}] +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] # arrays_zip with LargeList different lengths (padding) query ? @@ -7268,7 +7268,7 @@ select arrays_zip( arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)') ); ---- -[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: NULL, c1: 30}] +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: NULL, 2: 30}] # arrays_zip with FixedSizeList inputs query ? @@ -7277,7 +7277,7 @@ select arrays_zip( arrow_cast(make_array(10, 20, 30), 'FixedSizeList(3, Int64)') ); ---- -[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: 30}] +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] # arrays_zip mixing List and LargeList query ? @@ -7286,7 +7286,7 @@ select arrays_zip( arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)') ); ---- -[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: 30}] +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] # arrays_zip mixing List and FixedSizeList with different lengths (padding) query ? @@ -7295,7 +7295,7 @@ select arrays_zip( arrow_cast(make_array(10, 20), 'FixedSizeList(2, Int64)') ); ---- -[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: NULL}] +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: NULL}] # arrays_zip with LargeList and FixedSizeList mixed types query ? @@ -7304,7 +7304,7 @@ select arrays_zip( arrow_cast(make_array('a', 'b'), 'FixedSizeList(2, Utf8)') ); ---- -[{c0: 1, c1: a}, {c0: 2, c1: b}] +[{1: 1, 2: a}, {1: 2, 2: b}] query ??? select array_intersect(column1, column2),